Skip to content

Commit 5a855b3

Browse files
committed
support sciencedirect suppl
1 parent 6646591 commit 5a855b3

File tree

6 files changed

+160
-33
lines changed

6 files changed

+160
-33
lines changed

README.md

+27-3
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,30 @@ For website spider (optional):
2323

2424
- Headless Chrome is required for some of website with JavaScript driven render pages. For windows users, you may need to create an alias of Chrome to make [chromedp](https://github.com/chromedp/chromedp) work.
2525

26+
```bash
27+
# To resolve `[FATA] exec: "google-chrome": executable file not found in $PATH` error:
28+
# option 1: install Chrome in your OS
29+
## centos
30+
sudo yum install liberation-fonts
31+
sudo yum -y install libXss*
32+
sudo yum install libappindicator*
33+
wget https://dl.google.com/linux/direct/google-chrome-stable_current_x86_64.rpm
34+
sudo rpm -ivh google-chrome-stable_current_x86_64.rpm
35+
36+
## ubuntu
37+
wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb
38+
sudo dpkg -i google-chrome-stable_current_amd64.deb
39+
sudo apt install -f
40+
41+
# option 2: run bget in the headless-shell docker container
42+
docker run -d -p 9222:9222 --rm --name headless-shell -v /path_contains_bget/:/tmp/bget chromedp/headless-shell
43+
docker exec -it headless-shell /bin/bash
44+
45+
# set more timeout for poor network access
46+
bget doi 10.1016/j.devcel.2017.03.001 --suppl --timeout 100
47+
```
48+
49+
2650
For raw sequencing data query (optional):
2751

2852
- [sra-tools](https://github.com/ncbi/sra-tools) for SRA and dbGAP database: `bget i sratools`;
@@ -33,15 +57,15 @@ For raw sequencing data query (optional):
3357

3458
```bash
3559
# windows
36-
wget https://github.com/openanno/bget/releases/download/v0.3.0/bget.exe
60+
wget https://github.com/openanno/bget/releases/download/v0.3.1/bget.exe
3761

3862
# osx
39-
wget https://github.com/openanno/bget/releases/download/v0.3.0/bget_osx
63+
wget https://github.com/openanno/bget/releases/download/v0.3.1/bget_osx
4064
mv bget_osx bget
4165
chmod a+x bget
4266

4367
# linux
44-
wget https://github.com/openanno/bget/releases/download/v0.3.0/bget_linux64
68+
wget https://github.com/openanno/bget/releases/download/v0.3.1/bget_linux64
4569
mv bget_linux64 bget
4670
chmod a+x bget
4771

chrome/doi.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ func DoiSupplURLs(url string, timeout time.Duration, proxy string) []string {
3232
//err := cdp.Run(ctx, visibleNejm("https://www.nejm.org/doi/full/10.1056/NEJMoa1902226", &attbs))
3333
if strings.Contains(url, "www.nejm.org") {
3434
err = cdp.Run(ctx, visibleNejm(url, &attbs))
35-
} else if stringo.StrDetect(url, "sciencedirect.com|/10.1016/") {
35+
} else if stringo.StrDetect(url, "sciencedirect.com|/10.1016/|www.cell.com") {
3636
err = cdp.Run(ctx, visibleScienceDirect(url, &attbs))
3737
} else if strings.Contains(url, "www.ncbi.nlm.nih.gov/Traces/study") {
3838
err = cdp.Run(ctx, visibleSraRunSelect(url, &attbs, ctx))
@@ -177,5 +177,5 @@ func visibleDownloadTask(url string, ctx context.Context) cdp.Tasks {
177177
}
178178

179179
//func main() {
180-
//GetURLFile("https://linkinghub.elsevier.com/retrieve/pii/S2215036619303943", 145*time.Second, "http://lee_jianfeng:[email protected]:8000")
180+
//DoiSupplURLs("https://www.sciencedirect.com/science/article/pii/S1934590919303078?via=ihub", 145*time.Second)
181181
//}

go.mod

+1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ require (
2121
github.com/openbiox/ligo v0.0.0-20200607024921-dd2356ca56a1
2222
github.com/sirupsen/logrus v1.6.0
2323
github.com/spf13/cobra v1.0.0
24+
github.com/tebeka/selenium v0.9.9 // indirect
2425
github.com/tidwall/pretty v1.0.1
2526
github.com/vbauerster/mpb/v5 v5.2.2
2627
golang.org/x/crypto v0.0.0-20200604202706-70a84ac30bf9 // indirect

0 commit comments

Comments
 (0)