From 6d648a3f2d8f1a727b2081999f191d8596749b84 Mon Sep 17 00:00:00 2001 From: E Date: Fri, 10 May 2024 11:59:44 +0100 Subject: [PATCH] Add Dockerfile and add WIP scraper to serverless --- Dockerfile | 26 ++++++++++++++++ Makefile | 4 ++- go.mod | 37 +++++------------------ go.sum | 79 ++++--------------------------------------------- scraper/main.go | 34 ++++++++++++++++----- serverless.yml | 7 +++++ 6 files changed, 76 insertions(+), 111 deletions(-) create mode 100644 Dockerfile diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..ea6349f --- /dev/null +++ b/Dockerfile @@ -0,0 +1,26 @@ +FROM public.ecr.aws/lambda/go:1 as lambda + +# Copy over go source code +COPY scraper/main.go /src/main.go +COPY go.mod /src/go.mod +COPY go.sum /src/go.sum + +WORKDIR /src + +# Install packages +RUN yum install go xz atk cups-libs gtk3 libXcomposite alsa-lib tar \ + libXcursor libXdamage libXext libXi libXrandr libXScrnSaver \ + libXtst pango at-spi2-atk libXt xorg-x11-server-Xvfb \ + xorg-x11-xauth dbus-glib dbus-glib-devel unzip bzip2 -y -q + +# Build go lambda function +RUN GOARCH=amd64 GOOS=linux go build -ldflags="-s -w" -o ${LAMBDA_TASK_ROOT} /src/main.go + +# Install chrome +RUN mkdir -p /opt/chrome/ +RUN curl -Lo "/opt/chrome/chrome-linux.zip" "https://www.googleapis.com/download/storage/v1/b/chromium-browser-snapshots/o/Linux_x64%2F1299153%2Fchrome-linux.zip?generation=1715336417866122&alt=media" +RUN unzip -q "/opt/chrome/chrome-linux.zip" -d "/opt/chrome/" +RUN mv /opt/chrome/chrome-linux/* /opt/chrome/ +RUN rm -rf /opt/chrome/chrome-linux "/opt/chrome/chrome-linux.zip" + +CMD [ "app" ] diff --git a/Makefile b/Makefile index 9b0f474..e047823 100644 --- a/Makefile +++ b/Makefile @@ -6,13 +6,15 @@ install: build: env GOARCH=amd64 GOOS=linux go build -ldflags="-s -w" -o bin/hello hello/main.go env GOARCH=amd64 GOOS=linux go build -ldflags="-s -w" -o bin/world world/main.go + env GOARCH=amd64 GOOS=linux go build -ldflags="-s -w" -o bin/scraper scraper/main.go debugBuild: env GOARCH=amd64 GOOS=linux go build -v -gcflags='all=-N -l' -ldflags="-s -w" -o bin/hello hello/main.go env GOARCH=amd64 GOOS=linux go build -v -gcflags='all=-N -l' -ldflags="-s -w" -o bin/world world/main.go + env GOARCH=amd64 GOOS=linux go build -v -gcflags='all=-N -l' -ldflags="-s -w" -o bin/scraper scraper/main.go debugDeploy: clean install debugBuild - npx sls offline --useDocker + env SLS_DEBUG=* npx sls offline --useDocker clean: rm -rf ./node_modules diff --git a/go.mod b/go.mod index bdf0cb6..7b29c98 100644 --- a/go.mod +++ b/go.mod @@ -1,39 +1,18 @@ module github.com/aurora-dot/watcher-lambda -go 1.22.2 +go 1.22 + +require github.com/aws/aws-lambda-go v1.47.0 require ( - github.com/aws/aws-lambda-go v1.47.0 // indirect - github.com/aws/aws-sdk-go v1.51.23 // indirect - github.com/blmayer/awslambdarpc v1.1.0 // indirect - github.com/cilium/ebpf v0.11.0 // indirect - github.com/cosiner/argv v0.1.0 // indirect - github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect - github.com/derekparker/trie v0.0.0-20230829180723-39f4de51ef7d // indirect - github.com/go-delve/delve v1.22.1 // indirect - github.com/go-delve/liner v1.2.3-0.20231231155935-4726ab1d7f62 // indirect - github.com/go-rod/rod v0.115.0 // indirect - github.com/google/go-dap v0.11.0 // indirect - github.com/hashicorp/golang-lru v1.0.2 // indirect - github.com/inconshreveable/mousetrap v1.1.0 // indirect - github.com/jmespath/go-jmespath v0.4.0 // indirect - github.com/mattn/go-colorable v0.1.13 // indirect - github.com/mattn/go-isatty v0.0.20 // indirect - github.com/mattn/go-runewidth v0.0.13 // indirect - github.com/rivo/uniseg v0.2.0 // indirect - github.com/russross/blackfriday/v2 v2.1.0 // indirect - github.com/sirupsen/logrus v1.9.3 // indirect - github.com/spf13/cobra v1.7.0 // indirect - github.com/spf13/pflag v1.0.5 // indirect github.com/ysmood/fetchup v0.2.3 // indirect github.com/ysmood/goob v0.4.0 // indirect github.com/ysmood/got v0.34.1 // indirect github.com/ysmood/gson v0.7.3 // indirect github.com/ysmood/leakless v0.8.0 // indirect - go.starlark.net v0.0.0-20231101134539-556fd59b42f6 // indirect - golang.org/x/arch v0.6.0 // indirect - golang.org/x/exp v0.0.0-20230224173230-c95f2b4c22f2 // indirect - golang.org/x/sys v0.13.0 // indirect - gopkg.in/yaml.v2 v2.4.0 // indirect - gopkg.in/yaml.v3 v3.0.1 // indirect +) + +require ( + github.com/go-rod/rod v0.116.0 + github.com/stretchr/testify v1.8.4 // indirect ) diff --git a/go.sum b/go.sum index e1320f6..99975ed 100644 --- a/go.sum +++ b/go.sum @@ -1,62 +1,13 @@ -github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= -github.com/aws/aws-lambda-go v1.18.0/go.mod h1:FEwgPLE6+8wcGBTe5cJN3JWurd1Ztm9zN4jsXsjzKKw= github.com/aws/aws-lambda-go v1.47.0 h1:0H8s0vumYx/YKs4sE7YM0ktwL2eWse+kfopsRI1sXVI= github.com/aws/aws-lambda-go v1.47.0/go.mod h1:dpMpZgvWx5vuQJfBt0zqBha60q7Dd7RfgJv23DymV8A= -github.com/aws/aws-sdk-go v1.51.23 h1:/3TEdsEE/aHmdKGw2NrOp7Sdea76zfffGkTTSXTsDxY= -github.com/aws/aws-sdk-go v1.51.23/go.mod h1:LF8svs817+Nz+DmiMQKTO3ubZ/6IaTpq3TjupRn3Eqk= -github.com/blmayer/awslambdarpc v1.1.0 h1:qOdKCCZaeM8fziN6Pv8P3LU9N5jMSmCQxGGAwNBB+RM= -github.com/blmayer/awslambdarpc v1.1.0/go.mod h1:QUete86yJ+PFtuRV7j/c12AWRNqtwZVBiffDxNPF2Lk= -github.com/cilium/ebpf v0.11.0 h1:V8gS/bTCCjX9uUnkUFUpPsksM8n1lXBAvHcpiFk1X2Y= -github.com/cilium/ebpf v0.11.0/go.mod h1:WE7CZAnqOL2RouJ4f1uyNhqr2P4CCvXFIqdRDUgWsVs= -github.com/cosiner/argv v0.1.0 h1:BVDiEL32lwHukgJKP87btEPenzrrHUjajs/8yzaqcXg= -github.com/cosiner/argv v0.1.0/go.mod h1:EusR6TucWKX+zFgtdUsKT2Cvg45K5rtpCcWz4hK06d8= -github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= -github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w= -github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/derekparker/trie v0.0.0-20230829180723-39f4de51ef7d h1:hUWoLdw5kvo2xCsqlsIBMvWUc1QCSsCYD2J2+Fg6YoU= -github.com/derekparker/trie v0.0.0-20230829180723-39f4de51ef7d/go.mod h1:C7Es+DLenIpPc9J6IYw4jrK0h7S9bKj4DNl8+KxGEXU= -github.com/go-delve/delve v1.22.1 h1:LQSF2sv+lP3mmOzMkadl5HGQGgSS2bFg2tbyALqHu8Y= -github.com/go-delve/delve v1.22.1/go.mod h1:TfOb+G5H6YYKheZYAmA59ojoHbOimGfs5trbghHdLbM= -github.com/go-delve/liner v1.2.3-0.20231231155935-4726ab1d7f62 h1:IGtvsNyIuRjl04XAOFGACozgUD7A82UffYxZt4DWbvA= -github.com/go-delve/liner v1.2.3-0.20231231155935-4726ab1d7f62/go.mod h1:biJCRbqp51wS+I92HMqn5H8/A0PAhxn2vyOT+JqhiGI= -github.com/go-rod/rod v0.115.0 h1:xL+4BOr4sEGVphDPqpkSYWHwDOVmoCbZUmVZhEEUK+4= -github.com/go-rod/rod v0.115.0/go.mod h1:aiedSEFg5DwG/fnNbUOTPMTTWX3MRj6vIs/a684Mthw= -github.com/google/go-dap v0.11.0 h1:SpAZJL41rOOvd85PuLCCLE1dteTQOyKNnn0H3DBHywo= -github.com/google/go-dap v0.11.0/go.mod h1:HAeyoSd2WIfTfg+0GRXcFrb+RnojAtGNh+k+XTIxJDE= -github.com/hashicorp/golang-lru v1.0.2 h1:dV3g9Z/unq5DpblPpw+Oqcv4dU/1omnb4Ok8iPY6p1c= -github.com/hashicorp/golang-lru v1.0.2/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4= -github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= -github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= -github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg= -github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= -github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= -github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= -github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= -github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= -github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= -github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= -github.com/mattn/go-runewidth v0.0.3/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= -github.com/mattn/go-runewidth v0.0.13 h1:lTGmDsbAYt5DmK6OnoV7EuIF1wEIFAcxld6ypU4OSgU= -github.com/mattn/go-runewidth v0.0.13/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= +github.com/go-rod/rod v0.116.0 h1:ypRryjTys3EnqHskJ/TdgodFMvXV0EHvmy4bSkKZgHM= +github.com/go-rod/rod v0.116.0/go.mod h1:aiedSEFg5DwG/fnNbUOTPMTTWX3MRj6vIs/a684Mthw= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= -github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= -github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= -github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= -github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= -github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= -github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= -github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= -github.com/spf13/cobra v1.7.0 h1:hyqWnYt1ZQShIddO5kBpj3vu05/++x6tJ6dg8EC572I= -github.com/spf13/cobra v1.7.0/go.mod h1:uLxZILRyS/50WlhOIKD7W6V5bgeIt+4sICxh6uRMrb0= -github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= -github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= -github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/urfave/cli/v2 v2.1.1/go.mod h1:SE9GqnLQmjVa0iPEY0f1w3ygNIYcIJ0OKPMoW2caLfQ= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/ysmood/fetchup v0.2.3 h1:ulX+SonA0Vma5zUFXtv52Kzip/xe7aj4vqT5AJwQ+ZQ= github.com/ysmood/fetchup v0.2.3/go.mod h1:xhibcRKziSvol0H1/pj33dnKrYyI2ebIvz5cOOkYGns= github.com/ysmood/goob v0.4.0 h1:HsxXhyLBeGzWXnqVKtmT9qM7EuVs/XOgkX7T6r1o1AQ= @@ -69,23 +20,5 @@ github.com/ysmood/gson v0.7.3 h1:QFkWbTH8MxyUTKPkVWAENJhxqdBa4lYTQWqZCiLG6kE= github.com/ysmood/gson v0.7.3/go.mod h1:3Kzs5zDl21g5F/BlLTNcuAGAYLKt2lV5G8D1zF3RNmg= github.com/ysmood/leakless v0.8.0 h1:BzLrVoiwxikpgEQR0Lk8NyBN5Cit2b1z+u0mgL4ZJak= github.com/ysmood/leakless v0.8.0/go.mod h1:R8iAXPRaG97QJwqxs74RdwzcRHT1SWCGTNqY8q0JvMQ= -go.starlark.net v0.0.0-20231101134539-556fd59b42f6 h1:+eC0F/k4aBLC4szgOcjd7bDTEnpxADJyWJE0yowgM3E= -go.starlark.net v0.0.0-20231101134539-556fd59b42f6/go.mod h1:LcLNIzVOMp4oV+uusnpk+VU+SzXaJakUuBjoCSWH5dM= -golang.org/x/arch v0.6.0 h1:S0JTfE48HbRj80+4tbvZDYsJ3tGv6BUU3XxyZ7CirAc= -golang.org/x/arch v0.6.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys= -golang.org/x/exp v0.0.0-20230224173230-c95f2b4c22f2 h1:Jvc7gsqn21cJHCmAWx0LiimpP18LZmUxkT5Mp7EZ1mI= -golang.org/x/exp v0.0.0-20230224173230-c95f2b4c22f2/go.mod h1:CxIveKay+FTh1D0yPZemJVgC/95VzuuOLq5Qi4xnoYc= -golang.org/x/sys v0.0.0-20211117180635-dee7805ff2e1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE= -golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= -gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/scraper/main.go b/scraper/main.go index a3547d5..d35a72f 100644 --- a/scraper/main.go +++ b/scraper/main.go @@ -1,13 +1,31 @@ package main -// import ( -// "github.com/aws/aws-lambda-go/lambda" -// ) +import ( + "context" -// func scrape(websiteUrl string, xpathString string) (string, string) { + "github.com/aws/aws-lambda-go/lambda" + "github.com/go-rod/rod" + "github.com/go-rod/rod/lib/launcher" +) -// } +type MyEvent struct { + URL string `json:"URL"` + XPATH string `json:"XPATH"` +} -// func main() { -// lambda.Start(scrape) -// } +type MyResponse struct { + Content string `json:"content"` + Hash string `json:"hash"` +} + +func scrape(ctx context.Context, event *MyEvent) (*MyResponse, error) { + u := launcher.New().Bin("/opt/chrome/chrome").MustLaunch() + page := rod.New().ControlURL(u).MustConnect().MustPage("https://www.wikipedia.org/") + page.MustWaitStable().MustScreenshot("a.png") + + return &MyResponse{Content: "Test", Hash: "Test"}, nil +} + +func main() { + lambda.Start(scrape) +} diff --git a/serverless.yml b/serverless.yml index 30544e0..31ce72c 100644 --- a/serverless.yml +++ b/serverless.yml @@ -71,6 +71,13 @@ functions: - httpApi: path: /world method: get + scrape: + timeout: 30 + handler: bin/scraper + events: + - httpApi: + path: /scrape + method: post # The following are a few example events you can configure # NOTE: Please make sure to change your handler code to work with those events # Check the event documentation for details