diff --git a/.gitignore b/.gitignore index 0711f8a..d7b6f58 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ cache/ vendor bin +webdata/ diff --git a/Makefile b/Makefile index d81cd62..dcb13ec 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ APP_NAME := google_maps_scraper -VERSION := 1.3.6 +VERSION := 1.4.0 default: help diff --git a/README.md b/README.md index 40b9468..c246e9d 100644 --- a/README.md +++ b/README.md @@ -39,17 +39,31 @@ ## 🚀 Please [vote](https://github.com/gosom/google-maps-scraper/discussions/61) for the next features -A command line google maps scraper build using +A command line and web based google maps scraper build using [scrapemate](https://github.com/gosom/scrapemate) web crawling framework. You can use this repository either as is, or you can use it's code as a base and customize it to your needs -**Update** Added email extraction from business website support + ## Try it +### Web UI: + +![Example GIF](img/example.gif) + + +``` +mkdir -p gmapsdata && docker run -v $PWD/gmapsdata:/gmapsdata -p 8080:8080 gosom/google-maps-scraper -web -data-folder /gmapsdata +``` + +Or dowload the [binary](https://github.com/gosom/google-maps-scraper/releases) for your platform and run it with the `-web` command line argument. + + +### Command line: + ``` touch results.csv && docker run -v $PWD/example-queries.txt:/example-queries -v $PWD/results.csv:/results.csv gosom/google-maps-scraper -depth 1 -input /example-queries -results /results.csv -exit-on-inactivity 3m ``` diff --git a/go.mod b/go.mod index 9706ffa..244f79c 100644 --- a/go.mod +++ b/go.mod @@ -8,12 +8,16 @@ require ( github.com/google/uuid v1.6.0 github.com/gosom/scrapemate v0.6.0 github.com/jackc/pgx/v5 v5.7.1 + github.com/mattn/go-runewidth v0.0.16 github.com/mcnijman/go-emailaddress v1.1.1 github.com/playwright-community/playwright-go v0.4701.0 github.com/posthog/posthog-go v1.2.24 github.com/shirou/gopsutil/v4 v4.24.9 github.com/stretchr/testify v1.9.0 golang.org/x/exp v0.0.0-20240909161429-701f63a606c0 + golang.org/x/sync v0.8.0 + golang.org/x/term v0.24.0 + modernc.org/sqlite v1.33.1 ) require ( @@ -56,6 +60,7 @@ require ( github.com/davecgh/go-spew v1.1.1 // indirect github.com/deckarep/golang-set/v2 v2.6.0 // indirect github.com/denis-tingaikin/go-header v0.5.0 // indirect + github.com/dustin/go-humanize v1.0.1 // indirect github.com/ebitengine/purego v0.8.0 // indirect github.com/ettle/strcase v0.2.0 // indirect github.com/fatih/color v1.17.0 // indirect @@ -100,6 +105,7 @@ require ( github.com/gostaticanalysis/forcetypeassert v0.1.0 // indirect github.com/gostaticanalysis/nilerr v0.1.1 // indirect github.com/hashicorp/go-version v1.7.0 // indirect + github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect github.com/hashicorp/hcl v1.0.0 // indirect github.com/hexops/gotextdiff v1.0.3 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect @@ -131,13 +137,13 @@ require ( github.com/matoous/godox v0.0.0-20230222163458-006bad1f9d26 // indirect github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-isatty v0.0.20 // indirect - github.com/mattn/go-runewidth v0.0.9 // indirect github.com/matttproud/golang_protobuf_extensions v1.0.1 // indirect github.com/mgechev/revive v1.3.9 // indirect github.com/mitchellh/go-homedir v1.1.0 // indirect github.com/mitchellh/mapstructure v1.5.0 // indirect github.com/moricho/tparallel v0.3.2 // indirect github.com/nakabonne/nestif v0.3.1 // indirect + github.com/ncruces/go-strftime v0.1.9 // indirect github.com/nishanths/exhaustive v0.12.0 // indirect github.com/nishanths/predeclared v0.2.2 // indirect github.com/nunnatsa/ginkgolinter v0.16.2 // indirect @@ -156,6 +162,8 @@ require ( github.com/quasilyte/gogrep v0.5.0 // indirect github.com/quasilyte/regex/syntax v0.0.0-20210819130434-b3f0c404a727 // indirect github.com/quasilyte/stdinfo v0.0.0-20220114132959-f7386bf02567 // indirect + github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect + github.com/rivo/uniseg v0.2.0 // indirect github.com/rs/zerolog v1.33.0 // indirect github.com/ryancurrah/gomodguard v1.3.5 // indirect github.com/ryanrolds/sqlclosecheck v0.5.1 // indirect @@ -208,7 +216,6 @@ require ( golang.org/x/exp/typeparams v0.0.0-20240314144324-c7f7c6466f7f // indirect golang.org/x/mod v0.21.0 // indirect golang.org/x/net v0.29.0 // indirect - golang.org/x/sync v0.8.0 // indirect golang.org/x/sys v0.25.0 // indirect golang.org/x/text v0.18.0 // indirect golang.org/x/tools v0.25.0 // indirect @@ -217,6 +224,12 @@ require ( gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect honnef.co/go/tools v0.5.1 // indirect + modernc.org/gc/v3 v3.0.0-20240107210532-573471604cb6 // indirect + modernc.org/libc v1.55.3 // indirect + modernc.org/mathutil v1.6.0 // indirect + modernc.org/memory v1.8.0 // indirect + modernc.org/strutil v1.2.0 // indirect + modernc.org/token v1.1.0 // indirect mvdan.cc/gofumpt v0.7.0 // indirect mvdan.cc/unparam v0.0.0-20240528143540-8a5130ca722f // indirect ) diff --git a/go.sum b/go.sum index db95f20..f7e13a3 100644 --- a/go.sum +++ b/go.sum @@ -136,6 +136,8 @@ github.com/deckarep/golang-set/v2 v2.6.0 h1:XfcQbWM1LlMB8BsJ8N9vW5ehnnPVIw0je80N github.com/deckarep/golang-set/v2 v2.6.0/go.mod h1:VAky9rY/yGXJOLEDv3OMci+7wtDpOF4IN+y82NBOac4= github.com/denis-tingaikin/go-header v0.5.0 h1:SRdnP5ZKvcO9KKRP1KJrhFR3RrlGuD+42t4429eC9k8= github.com/denis-tingaikin/go-header v0.5.0/go.mod h1:mMenU5bWrok6Wl2UsZjy+1okegmwQ3UgWl4V1D8gjlY= +github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= +github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= github.com/ebitengine/purego v0.8.0 h1:JbqvnEzRvPpxhCJzJJ2y0RbiZ8nyjccVUrSM3q+GvvE= github.com/ebitengine/purego v0.8.0/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= @@ -329,6 +331,8 @@ github.com/hashicorp/go-version v1.7.0 h1:5tqGy27NaOTB8yJKUZELlFAS/LTKJkrmONwQKe github.com/hashicorp/go-version v1.7.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= +github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= +github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM= @@ -420,8 +424,9 @@ github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/ github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= -github.com/mattn/go-runewidth v0.0.9 h1:Lm995f3rfxdpd6TSmuVCHVb/QhupuXlYr8sCI/QdE+0= github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= +github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc= +github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= github.com/matttproud/golang_protobuf_extensions v1.0.1 h1:4hp9jkHxhMHkqkrB3Ix0jegS5sx/RkqARlsWZ6pIwiU= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= github.com/mcnijman/go-emailaddress v1.1.1 h1:AGhgVDG3tCDaL0/Vc6erlPQjDuDN3dAT7rRdgFtetr0= @@ -445,6 +450,8 @@ github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRW github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/nakabonne/nestif v0.3.1 h1:wm28nZjhQY5HyYPx+weN3Q65k6ilSBxDb8v5S81B81U= github.com/nakabonne/nestif v0.3.1/go.mod h1:9EtoZochLn5iUprVDmDjqGKPofoUEBL8U4Ngq6aY7OE= +github.com/ncruces/go-strftime v0.1.9 h1:bY0MQC28UADQmHmaF5dgpLmImcShSi2kHU9XLdhx/f4= +github.com/ncruces/go-strftime v0.1.9/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls= github.com/nishanths/exhaustive v0.12.0 h1:vIY9sALmw6T/yxiASewa4TQcFsVYZQQRUQJhKRf3Swg= github.com/nishanths/exhaustive v0.12.0/go.mod h1:mEZ95wPIZW+x8kC4TgC+9YCUgiST7ecevsVDTgc2obs= github.com/nishanths/predeclared v0.2.2 h1:V2EPdZPliZymNAn79T8RkNApBjMmVKh5XRpLm/w98Vk= @@ -520,6 +527,10 @@ github.com/quasilyte/regex/syntax v0.0.0-20210819130434-b3f0c404a727 h1:TCg2WBOl github.com/quasilyte/regex/syntax v0.0.0-20210819130434-b3f0c404a727/go.mod h1:rlzQ04UMyJXu/aOvhd8qT+hvDrFpiwqp8MRXDY9szc0= github.com/quasilyte/stdinfo v0.0.0-20220114132959-f7386bf02567 h1:M8mH9eK4OUR4lu7Gd+PU1fV2/qnDNfzT635KRSObncs= github.com/quasilyte/stdinfo v0.0.0-20220114132959-f7386bf02567/go.mod h1:DWNGW8A4Y+GyBgPuaQJuWiy0XYftx4Xm/y5Jqk9I6VQ= +github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE= +github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= +github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= +github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= @@ -856,6 +867,8 @@ golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/term v0.7.0/go.mod h1:P32HKFT3hSsZrRxla30E9HqToFYAQPCMs/zFMBUFqPY= golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk= +golang.org/x/term v0.24.0 h1:Mh5cbb+Zk2hqqXNO7S1iTjEphVL+jb8ZWaqh/g+JWkM= +golang.org/x/term v0.24.0/go.mod h1:lOBK/LVxemqiMij05LGJ0tzNr8xlmwBRJ81PX6wVLH8= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -1048,6 +1061,32 @@ honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9 honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= honnef.co/go/tools v0.5.1 h1:4bH5o3b5ZULQ4UrBmP+63W9r7qIkqJClEA9ko5YKx+I= honnef.co/go/tools v0.5.1/go.mod h1:e9irvo83WDG9/irijV44wr3tbhcFeRnfpVlRqVwpzMs= +modernc.org/cc/v4 v4.21.4 h1:3Be/Rdo1fpr8GrQ7IVw9OHtplU4gWbb+wNgeoBMmGLQ= +modernc.org/cc/v4 v4.21.4/go.mod h1:HM7VJTZbUCR3rV8EYBi9wxnJ0ZBRiGE5OeGXNA0IsLQ= +modernc.org/ccgo/v4 v4.19.2 h1:lwQZgvboKD0jBwdaeVCTouxhxAyN6iawF3STraAal8Y= +modernc.org/ccgo/v4 v4.19.2/go.mod h1:ysS3mxiMV38XGRTTcgo0DQTeTmAO4oCmJl1nX9VFI3s= +modernc.org/fileutil v1.3.0 h1:gQ5SIzK3H9kdfai/5x41oQiKValumqNTDXMvKo62HvE= +modernc.org/fileutil v1.3.0/go.mod h1:XatxS8fZi3pS8/hKG2GH/ArUogfxjpEKs3Ku3aK4JyQ= +modernc.org/gc/v2 v2.4.1 h1:9cNzOqPyMJBvrUipmynX0ZohMhcxPtMccYgGOJdOiBw= +modernc.org/gc/v2 v2.4.1/go.mod h1:wzN5dK1AzVGoH6XOzc3YZ+ey/jPgYHLuVckd62P0GYU= +modernc.org/gc/v3 v3.0.0-20240107210532-573471604cb6 h1:5D53IMaUuA5InSeMu9eJtlQXS2NxAhyWQvkKEgXZhHI= +modernc.org/gc/v3 v3.0.0-20240107210532-573471604cb6/go.mod h1:Qz0X07sNOR1jWYCrJMEnbW/X55x206Q7Vt4mz6/wHp4= +modernc.org/libc v1.55.3 h1:AzcW1mhlPNrRtjS5sS+eW2ISCgSOLLNyFzRh/V3Qj/U= +modernc.org/libc v1.55.3/go.mod h1:qFXepLhz+JjFThQ4kzwzOjA/y/artDeg+pcYnY+Q83w= +modernc.org/mathutil v1.6.0 h1:fRe9+AmYlaej+64JsEEhoWuAYBkOtQiMEU7n/XgfYi4= +modernc.org/mathutil v1.6.0/go.mod h1:Ui5Q9q1TR2gFm0AQRqQUaBWFLAhQpCwNcuhBOSedWPo= +modernc.org/memory v1.8.0 h1:IqGTL6eFMaDZZhEWwcREgeMXYwmW83LYW8cROZYkg+E= +modernc.org/memory v1.8.0/go.mod h1:XPZ936zp5OMKGWPqbD3JShgd/ZoQ7899TUuQqxY+peU= +modernc.org/opt v0.1.3 h1:3XOZf2yznlhC+ibLltsDGzABUGVx8J6pnFMS3E4dcq4= +modernc.org/opt v0.1.3/go.mod h1:WdSiB5evDcignE70guQKxYUl14mgWtbClRi5wmkkTX0= +modernc.org/sortutil v1.2.0 h1:jQiD3PfS2REGJNzNCMMaLSp/wdMNieTbKX920Cqdgqc= +modernc.org/sortutil v1.2.0/go.mod h1:TKU2s7kJMf1AE84OoiGppNHJwvB753OYfNl2WRb++Ss= +modernc.org/sqlite v1.33.1 h1:trb6Z3YYoeM9eDL1O8do81kP+0ejv+YzgyFo+Gwy0nM= +modernc.org/sqlite v1.33.1/go.mod h1:pXV2xHxhzXZsgT/RtTFAPY6JJDEvOTcTdwADQCCWD4k= +modernc.org/strutil v1.2.0 h1:agBi9dp1I+eOnxXeiZawM8F4LawKv4NzGWSaLfyeNZA= +modernc.org/strutil v1.2.0/go.mod h1:/mdcBmfOibveCTBxUl5B5l6W+TTH1FXPLHZE6bTosX0= +modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y= +modernc.org/token v1.1.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM= mvdan.cc/gofumpt v0.7.0 h1:bg91ttqXmi9y2xawvkuMXyvAA/1ZGJqYAEGjXuP0JXU= mvdan.cc/gofumpt v0.7.0/go.mod h1:txVFJy/Sc/mvaycET54pV8SW8gWxTlUuGHVEcncmNUo= mvdan.cc/unparam v0.0.0-20240528143540-8a5130ca722f h1:lMpcwN6GxNbWtbpI1+xzFLSW8XzX0u72NttUGVFjO3U= diff --git a/img/example.gif b/img/example.gif new file mode 100644 index 0000000..57304dc Binary files /dev/null and b/img/example.gif differ diff --git a/main.go b/main.go index 35760c6..c7698ef 100644 --- a/main.go +++ b/main.go @@ -3,20 +3,39 @@ package main import ( "context" "fmt" + "log" "os" + "os/signal" + "syscall" "github.com/gosom/google-maps-scraper/runner" "github.com/gosom/google-maps-scraper/runner/databaserunner" "github.com/gosom/google-maps-scraper/runner/filerunner" "github.com/gosom/google-maps-scraper/runner/installplaywright" + "github.com/gosom/google-maps-scraper/runner/webrunner" ) func main() { - ctx := context.Background() + ctx, cancel := context.WithCancel(context.Background()) + + runner.Banner() + + sigChan := make(chan os.Signal, 1) + signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM) + + go func() { + <-sigChan + + log.Println("Received signal, shutting down...") + + cancel() + }() + cfg := runner.ParseConfig() runnerInstance, err := runnerFactory(cfg) if err != nil { + cancel() os.Stderr.WriteString(err.Error() + "\n") runner.Telemetry().Close() @@ -30,12 +49,16 @@ func main() { _ = runnerInstance.Close(ctx) runner.Telemetry().Close() + cancel() + os.Exit(1) } _ = runnerInstance.Close(ctx) runner.Telemetry().Close() + cancel() + os.Exit(0) } @@ -47,6 +70,8 @@ func runnerFactory(cfg *runner.Config) (runner.Runner, error) { return databaserunner.New(cfg) case runner.RunModeInstallPlaywright: return installplaywright.New(cfg) + case runner.RunModeWeb: + return webrunner.New(cfg) default: return nil, fmt.Errorf("%w: %d", runner.ErrInvalidRunMode, cfg.RunMode) } diff --git a/runner/runner.go b/runner/runner.go index b25d87f..3b48f1f 100644 --- a/runner/runner.go +++ b/runner/runner.go @@ -4,11 +4,16 @@ import ( "context" "errors" "flag" + "fmt" "os" "runtime" + "strings" "sync" "time" + "github.com/mattn/go-runewidth" + "golang.org/x/term" + "github.com/gosom/google-maps-scraper/tlmt" "github.com/gosom/google-maps-scraper/tlmt/gonoop" "github.com/gosom/google-maps-scraper/tlmt/goposthog" @@ -19,6 +24,7 @@ const ( RunModeDatabase RunModeDatabaseProduce RunModeInstallPlaywright + RunModeWeb ) var ( @@ -48,6 +54,8 @@ type Config struct { Zoom int RunMode int DisableTelemetry bool + WebRunner bool + DataFolder string } func ParseConfig() *Config { @@ -74,6 +82,8 @@ func ParseConfig() *Config { flag.StringVar(&cfg.CustomWriter, "writer", "", "use custom writer plugin (format: 'dir:pluginName')") flag.StringVar(&cfg.GeoCoordinates, "geo", "", "set geo coordinates for search (e.g., '37.7749,-122.4194')") flag.IntVar(&cfg.Zoom, "zoom", 0, "set zoom level (0-21) for search") + flag.BoolVar(&cfg.WebRunner, "web", false, "run web server instead of crawling") + flag.StringVar(&cfg.DataFolder, "data-folder", "webdata", "data folder for web runner") flag.Parse() @@ -94,6 +104,8 @@ func ParseConfig() *Config { } switch { + case cfg.WebRunner: + cfg.RunMode = RunModeWeb case cfg.Dsn == "": cfg.RunMode = RunModeFile case cfg.ProduceOnly: @@ -136,3 +148,77 @@ func Telemetry() tlmt.Telemetry { return telemetry } + +func wrapText(text string, width int) []string { + var lines []string + + currentLine := "" + currentWidth := 0 + + for _, r := range text { + runeWidth := runewidth.RuneWidth(r) + if currentWidth+runeWidth > width { + lines = append(lines, currentLine) + currentLine = string(r) + currentWidth = runeWidth + } else { + currentLine += string(r) + currentWidth += runeWidth + } + } + + if currentLine != "" { + lines = append(lines, currentLine) + } + + return lines +} + +func banner(messages []string, width int) string { + if width <= 0 { + var err error + + width, _, err = term.GetSize(0) + if err != nil { + width = 80 + } + } + + if width < 20 { + width = 20 + } + + contentWidth := width - 4 + + var wrappedLines []string + for _, message := range messages { + wrappedLines = append(wrappedLines, wrapText(message, contentWidth)...) + } + + var builder strings.Builder + + builder.WriteString("╔" + strings.Repeat("═", width-2) + "╗\n") + + for _, line := range wrappedLines { + lineWidth := runewidth.StringWidth(line) + paddingRight := contentWidth - lineWidth + + if paddingRight < 0 { + paddingRight = 0 + } + + builder.WriteString(fmt.Sprintf("║ %s%s ║\n", line, strings.Repeat(" ", paddingRight))) + } + + builder.WriteString("╚" + strings.Repeat("═", width-2) + "╝\n") + + return builder.String() +} + +func Banner() { + message1 := "🌍 Google Maps Scraper" + message2 := "⭐ If you find this project useful, please star it on GitHub: https://github.com/gosom/google-maps-scraper" + message3 := "💖 Consider sponsoring to support development: https://github.com/sponsors/gosom" + + fmt.Fprintln(os.Stderr, banner([]string{message1, message2, message3}, 0)) +} diff --git a/runner/webrunner/webrunner.go b/runner/webrunner/webrunner.go new file mode 100644 index 0000000..0b319ec --- /dev/null +++ b/runner/webrunner/webrunner.go @@ -0,0 +1,231 @@ +package webrunner + +import ( + "context" + "encoding/csv" + "errors" + "fmt" + "io" + "log" + "os" + "path/filepath" + "strings" + "time" + + "github.com/gosom/google-maps-scraper/runner" + "github.com/gosom/google-maps-scraper/web" + "github.com/gosom/google-maps-scraper/web/sqlite" + "github.com/gosom/scrapemate" + "github.com/gosom/scrapemate/adapters/writers/csvwriter" + "github.com/gosom/scrapemate/scrapemateapp" + "golang.org/x/sync/errgroup" +) + +type webrunner struct { + srv *web.Server + svc *web.Service + cfg *runner.Config +} + +func New(cfg *runner.Config) (runner.Runner, error) { + if cfg.DataFolder == "" { + return nil, fmt.Errorf("data folder is required") + } + + if err := os.MkdirAll(cfg.DataFolder, os.ModePerm); err != nil { + return nil, err + } + + const dbfname = "jobs.db" + + dbpath := filepath.Join(cfg.DataFolder, dbfname) + + repo, err := sqlite.New(dbpath) + if err != nil { + return nil, err + } + + svc := web.NewService(repo, cfg.DataFolder) + + srv, err := web.New(svc) + if err != nil { + return nil, err + } + + ans := webrunner{ + srv: srv, + svc: svc, + cfg: cfg, + } + + return &ans, nil +} + +func (w *webrunner) Run(ctx context.Context) error { + egroup, ctx := errgroup.WithContext(ctx) + + egroup.Go(func() error { + return w.work(ctx) + }) + + egroup.Go(func() error { + return w.srv.Start(ctx) + }) + + return egroup.Wait() +} + +func (w *webrunner) Close(context.Context) error { + return nil +} + +func (w *webrunner) work(ctx context.Context) error { + ticker := time.NewTicker(time.Second) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return nil + case <-ticker.C: + jobs, err := w.svc.SelectPending(ctx) + if err != nil { + return err + } + + for i := range jobs { + select { + case <-ctx.Done(): + return nil + default: + if err := w.scrapeJob(ctx, &jobs[i]); err != nil { + log.Printf("error scraping job %s: %v", jobs[i].ID, err) + } else { + log.Printf("job %s scraped successfully", jobs[i].ID) + } + } + } + } + } +} + +func (w *webrunner) scrapeJob(ctx context.Context, job *web.Job) error { + job.Status = web.StatusWorking + + err := w.svc.Update(ctx, job) + if err != nil { + return err + } + + if len(job.Data.Keywords) == 0 { + job.Status = web.StatusFailed + + return w.svc.Update(ctx, job) + } + + outpath := filepath.Join(w.cfg.DataFolder, job.ID+".csv") + + outfile, err := os.Create(outpath) + if err != nil { + return err + } + + defer func() { + _ = outfile.Close() + }() + + mate, err := w.setupMate(ctx, outfile) + if err != nil { + job.Status = web.StatusFailed + + err2 := w.svc.Update(ctx, job) + if err2 != nil { + log.Printf("failed to update job status: %v", err2) + } + + return err + } + + defer mate.Close() + + var coords string + if job.Data.Lat != "" && job.Data.Lon != "" { + coords = job.Data.Lat + "," + job.Data.Lon + } + + seedJobs, err := runner.CreateSeedJobs( + job.Data.Lang, + strings.NewReader(strings.Join(job.Data.Keywords, "\n")), + job.Data.Depth, + job.Data.Email, + coords, + job.Data.Zoom, + ) + if err != nil { + err2 := w.svc.Update(ctx, job) + if err2 != nil { + log.Printf("failed to update job status: %v", err2) + } + + return err + } + + if len(seedJobs) > 0 { + allowedSeconds := max(60, len(seedJobs)*10*job.Data.Depth/50+120) + + if job.Data.MaxTime > 0 { + if job.Data.MaxTime.Seconds() < 60 { + allowedSeconds = 60 + } else { + allowedSeconds = int(job.Data.MaxTime.Seconds()) + } + } + + log.Printf("running job %s with %d seed jobs and %d allowed seconds", job.ID, len(seedJobs), allowedSeconds) + + mateCtx, cancel := context.WithTimeout(ctx, time.Duration(allowedSeconds)*time.Second) + defer cancel() + + err = mate.Start(mateCtx, seedJobs...) + if err != nil && !errors.Is(err, context.DeadlineExceeded) { + cancel() + + err2 := w.svc.Update(ctx, job) + if err2 != nil { + log.Printf("failed to update job status: %v", err2) + } + + return err + } + + cancel() + } + + mate.Close() + + job.Status = web.StatusOK + + return w.svc.Update(ctx, job) +} + +func (w *webrunner) setupMate(_ context.Context, writer io.Writer) (*scrapemateapp.ScrapemateApp, error) { + opts := []func(*scrapemateapp.Config) error{ + scrapemateapp.WithConcurrency(w.cfg.Concurrency), + scrapemateapp.WithJS(scrapemateapp.DisableImages()), + scrapemateapp.WithExitOnInactivity(time.Second * 20), + } + + csvWriter := csvwriter.NewCsvWriter(csv.NewWriter(writer)) + + writers := []scrapemate.ResultWriter{csvWriter} + + matecfg, err := scrapemateapp.NewConfig( + writers, + opts..., + ) + if err != nil { + return nil, err + } + + return scrapemateapp.NewScrapeMateApp(matecfg) +} diff --git a/web/errors.go b/web/errors.go new file mode 100644 index 0000000..561442f --- /dev/null +++ b/web/errors.go @@ -0,0 +1,8 @@ +package web + +import "errors" + +var ( + ErrNotFound = errors.New("not found") + ErrAlreadyExists = errors.New("already exists") +) diff --git a/web/job.go b/web/job.go new file mode 100644 index 0000000..8e35aba --- /dev/null +++ b/web/job.go @@ -0,0 +1,96 @@ +package web + +import ( + "context" + "errors" + "time" +) + +var jobs []Job + +const ( + StatusPending = "pending" + StatusWorking = "working" + StatusOK = "ok" + StatusFailed = "failed" +) + +type SelectParams struct { + Status string + Limit int +} + +type JobRepository interface { + Get(context.Context, string) (Job, error) + Create(context.Context, *Job) error + Delete(context.Context, string) error + Select(context.Context, SelectParams) ([]Job, error) + Update(context.Context, *Job) error +} + +type Job struct { + ID string + Name string + Date time.Time + Status string + Data JobData +} + +func (j *Job) Validate() error { + if j.ID == "" { + return errors.New("missing id") + } + + if j.Name == "" { + return errors.New("missing name") + } + + if j.Status == "" { + return errors.New("missing status") + } + + if j.Date.IsZero() { + return errors.New("missing date") + } + + if err := j.Data.Validate(); err != nil { + return err + } + + return nil +} + +type JobData struct { + Keywords []string `json:"keywords"` + Lang string `json:"lang"` + Zoom int `json:"zoom"` + Lat string `json:"lat"` + Lon string `json:"lon"` + Depth int `json:"depth"` + Email bool `json:"email"` + MaxTime time.Duration `json:"max_time"` +} + +func (d *JobData) Validate() error { + if len(d.Keywords) == 0 { + return errors.New("missing keywords") + } + + if d.Lang == "" { + return errors.New("missing lang") + } + + if len(d.Lang) != 2 { + return errors.New("invalid lang") + } + + if d.Depth == 0 { + return errors.New("missing depth") + } + + if d.MaxTime == 0 { + return errors.New("missing max time") + } + + return nil +} diff --git a/web/service.go b/web/service.go new file mode 100644 index 0000000..889295b --- /dev/null +++ b/web/service.go @@ -0,0 +1,69 @@ +package web + +import ( + "context" + "fmt" + "os" + "path/filepath" + "strings" +) + +type Service struct { + repo JobRepository + dataFolder string +} + +func NewService(repo JobRepository, dataFolder string) *Service { + return &Service{ + repo: repo, + dataFolder: dataFolder, + } +} + +func (s *Service) Create(ctx context.Context, job *Job) error { + return s.repo.Create(ctx, job) +} + +func (s *Service) All(ctx context.Context) ([]Job, error) { + return s.repo.Select(ctx, SelectParams{}) +} + +func (s *Service) Delete(ctx context.Context, id string) error { + if strings.Contains(id, "/") || strings.Contains(id, "\\") || strings.Contains(id, "..") { + return fmt.Errorf("invalid file name") + } + + datapath := filepath.Join(s.dataFolder, id+".csv") + + if _, err := os.Stat(datapath); err == nil { + if err := os.Remove(datapath); err != nil { + return err + } + } else if !os.IsNotExist(err) { + return err + } + + return s.repo.Delete(ctx, id) +} + +func (s *Service) Update(ctx context.Context, job *Job) error { + return s.repo.Update(ctx, job) +} + +func (s *Service) SelectPending(ctx context.Context) ([]Job, error) { + return s.repo.Select(ctx, SelectParams{Status: StatusPending, Limit: 1}) +} + +func (s *Service) GetCSV(_ context.Context, id string) (string, error) { + if strings.Contains(id, "/") || strings.Contains(id, "\\") || strings.Contains(id, "..") { + return "", fmt.Errorf("invalid file name") + } + + datapath := filepath.Join(s.dataFolder, id+".csv") + + if _, err := os.Stat(datapath); os.IsNotExist(err) { + return "", fmt.Errorf("csv file not found for job %s", id) + } + + return datapath, nil +} diff --git a/web/sqlite/sqlite.go b/web/sqlite/sqlite.go new file mode 100644 index 0000000..440b498 --- /dev/null +++ b/web/sqlite/sqlite.go @@ -0,0 +1,195 @@ +package sqlite + +import ( + "context" + "database/sql" + "encoding/json" + "time" + + _ "modernc.org/sqlite" // sqlite driver + + "github.com/gosom/google-maps-scraper/web" +) + +type repo struct { + db *sql.DB +} + +func New(path string) (web.JobRepository, error) { + db, err := initDatabase(path) + if err != nil { + return nil, err + } + + return &repo{db: db}, nil +} + +func (repo *repo) Get(ctx context.Context, id string) (web.Job, error) { + const q = `SELECT * from jobs WHERE id = ?` + + row := repo.db.QueryRowContext(ctx, q, id) + + return rowToJob(row) +} + +func (repo *repo) Create(ctx context.Context, job *web.Job) error { + item, err := jobToRow(job) + if err != nil { + return err + } + + const q = `INSERT INTO jobs (id, name, status, data, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?)` + + _, err = repo.db.ExecContext(ctx, q, item.ID, item.Name, item.Status, item.Data, item.CreatedAt, item.UpdatedAt) + if err != nil { + return err + } + + return nil +} + +func (repo *repo) Delete(ctx context.Context, id string) error { + const q = `DELETE FROM jobs WHERE id = ?` + + _, err := repo.db.ExecContext(ctx, q, id) + + return err +} + +func (repo *repo) Select(ctx context.Context, params web.SelectParams) ([]web.Job, error) { + q := `SELECT * from jobs` + + var args []any + + if params.Status != "" { + q += ` WHERE status = ?` + + args = append(args, params.Status) + } + + q += " ORDER BY created_at DESC" + + if params.Limit > 0 { + q += " LIMIT ?" + + args = append(args, params.Limit) + } + + rows, err := repo.db.QueryContext(ctx, q, args...) + if err != nil { + return nil, err + } + + defer rows.Close() + + var ans []web.Job + + for rows.Next() { + job, err := rowToJob(rows) + if err != nil { + return nil, err + } + + ans = append(ans, job) + } + + if err := rows.Err(); err != nil { + return nil, err + } + + return ans, nil +} + +func (repo *repo) Update(ctx context.Context, job *web.Job) error { + item, err := jobToRow(job) + if err != nil { + return err + } + + const q = `UPDATE jobs SET name = ?, status = ?, data = ?, updated_at = ? WHERE id = ?` + + _, err = repo.db.ExecContext(ctx, q, item.Name, item.Status, item.Data, item.UpdatedAt, item.ID) + + return err +} + +type scannable interface { + Scan(dest ...any) error +} + +func rowToJob(row scannable) (web.Job, error) { + var j job + + err := row.Scan(&j.ID, &j.Name, &j.Status, &j.Data, &j.CreatedAt, &j.UpdatedAt) + if err != nil { + return web.Job{}, err + } + + ans := web.Job{ + ID: j.ID, + Name: j.Name, + Status: j.Status, + Date: time.Unix(j.CreatedAt, 0).UTC(), + } + + err = json.Unmarshal([]byte(j.Data), &ans.Data) + if err != nil { + return web.Job{}, err + } + + return ans, nil +} + +func jobToRow(item *web.Job) (job, error) { + data, err := json.Marshal(item.Data) + if err != nil { + return job{}, err + } + + return job{ + ID: item.ID, + Name: item.Name, + Status: item.Status, + Data: string(data), + CreatedAt: item.Date.Unix(), + UpdatedAt: time.Now().UTC().Unix(), + }, nil +} + +type job struct { + ID string + Name string + Status string + Data string + CreatedAt int64 + UpdatedAt int64 +} + +func initDatabase(path string) (*sql.DB, error) { + db, err := sql.Open("sqlite", path) + if err != nil { + return nil, err + } + + err = db.Ping() + if err != nil { + return nil, err + } + + return db, createSchema(db) +} + +func createSchema(db *sql.DB) error { + _, err := db.Exec(` + CREATE TABLE IF NOT EXISTS jobs ( + id TEXT PRIMARY KEY, + name TEXT NOT NULL, + status TEXT NOT NULL, + data TEXT NOT NULL, + created_at INT NOT NULL, + updated_at INT NOT NULL + ) + `) + + return err +} diff --git a/web/static/css/main.css b/web/static/css/main.css new file mode 100644 index 0000000..784a2ba --- /dev/null +++ b/web/static/css/main.css @@ -0,0 +1,334 @@ +:root { + --color-background: #f9f9f9; + --color-surface: #ffffff; + --color-text: #333333; + --color-text-light: #666666; + --color-border: #e0e0e0; + --color-primary: #4a4a4a; + --color-primary-light: #5a5a5a; + --color-success: #43a047; + --color-warning: #fdd835; + --color-error: #e53935; + --color-github-star: #f1e05a; + --color-sponsor: #ea4aaa; + --color-sponsor: #4a4a4a; + --color-sponsor-hover: #5a5a5a; +} + +body { + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen-Sans, Ubuntu, Cantarell, 'Helvetica Neue', sans-serif; + line-height: 1.5; + color: var(--color-text); + background-color: var(--color-background); + margin: 0; + padding: 0; + height: 100vh; + display: flex; + flex-direction: column; +} + +.app-container { + flex: 1; + display: flex; + flex-direction: column; +} + +header { + background-color: var(--color-surface); + padding: 24px 32px; + box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1); +} + +h1 { + font-size: 24px; + font-weight: 500; + color: var(--color-primary); + margin: 0 0 16px 0; +} + +.github-section { + display: flex; + align-items: center; + margin-top: 16px; +} + +.github-button { + display: inline-flex; + align-items: center; + background-color: var(--color-sponsor); + color: white; + border: 1px solid var(--color-border); + padding: 10px 16px; + margin-left: 16px; + border-radius: 4px; + font-size: 14px; + text-decoration: none; + transition: background-color 0.2s, border-color 0.2s; +} + +.github-button:hover { + background-color: var(--color-sponsor); + border-color: var(--color-primary); +} + +.github-button svg { + margin-right: 8px; +} + +.github-button svg path { + fill: var(--color-github-star); +} + +main { + flex: 1; + display: flex; + overflow: hidden; +} + +.sidebar { + width: 320px; + background-color: var(--color-surface); + padding: 24px; + overflow-y: auto; + border-right: 1px solid var(--color-border); + box-shadow: 2px 0 5px rgba(0, 0, 0, 0.05); +} + +.content { + flex: 1; + padding: 24px 32px; + overflow-y: auto; + background-color: var(--color-background); +} + +form { + display: flex; + flex-direction: column; + gap: 24px; +} + +fieldset { + border: none; + padding: 0; + margin: 0; +} + +legend { + font-weight: 500; + margin-bottom: 16px; + color: var(--color-primary); +} + +.form-group { + margin-bottom: 20px; +} + +label { + display: block; + margin-bottom: 8px; + color: var(--color-text-light); +} + +input[type="text"], +input[type="number"], +textarea { + width: 100%; + padding: 10px 12px; + border: 1px solid var(--color-border); + border-radius: 4px; + font-size: 14px; + box-sizing: border-box; +} + +button { + background-color: var(--color-primary); + color: white; + border: none; + padding: 12px 20px; + border-radius: 4px; + cursor: pointer; + font-size: 14px; + transition: background-color 0.2s; +} + +button:hover { + background-color: var(--color-primary-light); +} + +table { + width: 100%; + border-collapse: separate; + border-spacing: 0; + background-color: var(--color-surface); + box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1); + border-radius: 4px; + overflow: hidden; +} + +th, td { + text-align: left; + padding: 16px; + border-bottom: 1px solid var(--color-border); +} + +th { + font-weight: 500; + color: var(--color-primary); + background-color: var(--color-surface); +} + +.status-indicator { + display: inline-block; + padding: 4px 10px; + border-radius: 12px; + font-size: 12px; + font-weight: 500; +} + +.status-ok { + background-color: var(--color-success); + color: white; +} + +.status-pending { + background-color: var(--color-warning); + color: var(--color-text); +} + +.status-working { + background-color: var(--color-warning); + color: var(--color-text); +} + +.status-failed { + background-color: var(--color-error); + color: var(--color-text); +} + +.status-error { + background-color: var(--color-error); + color: white; +} + +.download-button, .delete-button { + padding: 6px 12px; + border-radius: 4px; + font-size: 12px; + text-decoration: none; + color: white; +} + +.download-button { + background-color: var(--color-success); +} + +.delete-button { + background-color: var(--color-error); +} + +.error-message { + display: none; + background-color: #ffebee; + border: 1px solid var(--color-error); + color: var(--color-error); + padding: 12px 16px; + border-radius: 4px; + margin-bottom: 20px; + font-size: 14px; +} + +.error-message:not(:empty) { + display: block; +} + +.expandable-section summary { + cursor: pointer; + padding: 12px 16px; + background-color: var(--color-background); + border: 1px solid var(--color-border); + border-radius: 4px; + color: var(--color-text); +} + +.expandable-section[open] summary { + border-bottom: none; + border-bottom-left-radius: 0; + border-bottom-right-radius: 0; +} + +@media (max-width: 768px) { + main { + flex-direction: column; + } + + .sidebar { + width: 100%; + border-right: none; + border-bottom: 1px solid var(--color-border); + box-shadow: 0 2px 5px rgba(0, 0, 0, 0.05); + } + + .content { + padding: 20px; + } +} + +.sponsor-section { + position: relative; + margin-top: 24px; + padding: 16px; + background-color: var(--color-surface); + border: 1px solid var(--color-border); + border-radius: 4px; +} + +.sponsor-close { + position: absolute; + top: 8px; + right: 8px; + width: 20px; + height: 20px; + border: none; + background-color: transparent; + color: var(--color-text-light); + font-size: 16px; + line-height: 1; + cursor: pointer; + display: flex; + align-items: center; + justify-content: center; + transition: color 0.2s; +} + +.sponsor-close:hover { + color: var(--color-text); +} + +.sponsor-text { + margin: 0 0 12px 0; + font-size: 14px; + color: var(--color-text); + padding-right: 20px; /* Make space for the close button */ +} + +.sponsor-button { + display: inline-flex; + align-items: center; + background-color: var(--color-sponsor); + color: white; + border: none; + padding: 10px 16px; + border-radius: 4px; + font-size: 14px; + text-decoration: none; + transition: background-color 0.2s; +} + +.sponsor-button:hover { + background-color: var(--color-sponsor-hover); +} + +.sponsor-button svg { + margin-right: 8px; +} + diff --git a/web/static/templates/index.html b/web/static/templates/index.html new file mode 100644 index 0000000..de2b0b9 --- /dev/null +++ b/web/static/templates/index.html @@ -0,0 +1,133 @@ + + + + + + Google Maps Scraper + + + + +
+
+

Google Maps Scraper

+
+

If you find this tool useful, please consider starring our repository:

+ + + + + Star on GitHub + +
+ + +
+
+ +
+
+ + + + + + + + + + + + + +
Job IDJob NameJob DateStatusActions
+
+
+
+ + + + diff --git a/web/static/templates/job_row.html b/web/static/templates/job_row.html new file mode 100644 index 0000000..fe1d2d0 --- /dev/null +++ b/web/static/templates/job_row.html @@ -0,0 +1,18 @@ + + {{.ID}} + {{.Name}} + {{.Date}} + + {{.Status}} + + + {{ if eq .Status "ok" }} + Download + {{ end }} + + + diff --git a/web/static/templates/job_rows.html b/web/static/templates/job_rows.html new file mode 100644 index 0000000..acc7eb0 --- /dev/null +++ b/web/static/templates/job_rows.html @@ -0,0 +1,20 @@ +{{range .}} + + {{.ID}} + {{.Name}} + {{.Date}} + + {{.Status}} + + + {{ if eq .Status "ok" }} + Download + {{ end }} + + + +{{end}} diff --git a/web/web.go b/web/web.go new file mode 100644 index 0000000..013f1b5 --- /dev/null +++ b/web/web.go @@ -0,0 +1,338 @@ +package web + +import ( + "context" + "embed" + "fmt" + "html/template" + "io" + "io/fs" + "log" + "net/http" + "os" + "path/filepath" + "strconv" + "strings" + "time" + + "github.com/google/uuid" +) + +//go:embed static +var static embed.FS + +type Server struct { + tmpl map[string]*template.Template + srv *http.Server + svc *Service +} + +func New(svc *Service) (*Server, error) { + ans := Server{ + svc: svc, + tmpl: make(map[string]*template.Template), + srv: &http.Server{ + Addr: ":8080", + ReadHeaderTimeout: 10 * time.Second, + }, + } + + staticFS, err := fs.Sub(static, "static") + if err != nil { + return nil, err + } + + fileServer := http.FileServer(http.FS(staticFS)) + mux := http.NewServeMux() + + mux.Handle("/static/", http.StripPrefix("/static/", fileServer)) + mux.HandleFunc("/scrape", ans.scrape) + mux.HandleFunc("/download", ans.download) + mux.HandleFunc("/delete", ans.delete) + mux.HandleFunc("/jobs", ans.getJobs) + mux.HandleFunc("/", ans.index) + + ans.srv.Handler = mux + + tmplsKeys := []string{ + "static/templates/index.html", + "static/templates/job_rows.html", + "static/templates/job_row.html", + } + + for _, key := range tmplsKeys { + tmp, err := template.ParseFS(static, key) + if err != nil { + return nil, err + } + + ans.tmpl[key] = tmp + } + + return &ans, nil +} + +func (s *Server) Start(ctx context.Context) error { + go func() { + <-ctx.Done() + + err := s.srv.Shutdown(context.Background()) + if err != nil { + log.Println(err) + + return + } + + log.Println("server stopped") + }() + + fmt.Fprintf(os.Stderr, "visit http://localhost%s\n", s.srv.Addr) + + err := s.srv.ListenAndServe() + if err != nil && err != http.ErrServerClosed { + return err + } + + return nil +} + +type formData struct { + Name string + MaxTime string + Keywords []string + Language string + Zoom int + Lat string + Lon string + Depth int + Email bool +} + +//nolint:gocritic // this is used in template +func (f formData) KeywordsString() string { + return strings.Join(f.Keywords, "\n") +} + +func (s *Server) index(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodGet { + http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) + + return + } + + tmpl, ok := s.tmpl["static/templates/index.html"] + if !ok { + http.Error(w, "missing tpl", http.StatusInternalServerError) + + return + } + + data := formData{ + Name: "", + MaxTime: "10m", + Keywords: []string{}, + Language: "en", + Zoom: 0, + Lat: "0", + Lon: "0", + Depth: 10, + Email: false, + } + + _ = tmpl.Execute(w, data) +} + +func (s *Server) scrape(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) + + return + } + + err := r.ParseForm() + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + + return + } + + newJob := Job{ + ID: uuid.New().String(), + Name: r.Form.Get("name"), + Date: time.Now().UTC(), + Status: StatusPending, + Data: JobData{}, + } + + maxTimeStr := r.Form.Get("maxtime") + + maxTime, err := time.ParseDuration(maxTimeStr) + if err != nil { + http.Error(w, "invalid max time", http.StatusUnprocessableEntity) + + return + } + + if maxTime < time.Minute { + http.Error(w, "max time must be more than 1m", http.StatusUnprocessableEntity) + + return + } + + newJob.Data.MaxTime = maxTime + + keywordsStr, ok := r.Form["keywords"] + if !ok { + http.Error(w, "missing keywords", http.StatusUnprocessableEntity) + + return + } + + keywords := strings.Split(keywordsStr[0], "\n") + for _, k := range keywords { + k = strings.TrimSpace(k) + if k == "" { + continue + } + + newJob.Data.Keywords = append(newJob.Data.Keywords, k) + } + + newJob.Data.Lang = r.Form.Get("lang") + + newJob.Data.Zoom, err = strconv.Atoi(r.Form.Get("zoom")) + if err != nil { + http.Error(w, "invalid zoom", http.StatusUnprocessableEntity) + + return + } + + newJob.Data.Lat = r.Form.Get("latitude") + newJob.Data.Lon = r.Form.Get("longitude") + + newJob.Data.Depth, err = strconv.Atoi(r.Form.Get("depth")) + if err != nil { + http.Error(w, "invalid depth", http.StatusUnprocessableEntity) + + return + } + + newJob.Data.Email = r.Form.Get("email") == "on" + + err = newJob.Validate() + if err != nil { + http.Error(w, err.Error(), http.StatusUnprocessableEntity) + + return + } + + err = s.svc.Create(r.Context(), &newJob) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + + return + } + + tmpl, ok := s.tmpl["static/templates/job_row.html"] + if !ok { + http.Error(w, "missing tpl", http.StatusInternalServerError) + + return + } + + _ = tmpl.Execute(w, newJob) +} + +func (s *Server) getJobs(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodGet { + http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) + + return + } + + tmpl, ok := s.tmpl["static/templates/job_rows.html"] + if !ok { + http.Error(w, "missing tpl", http.StatusInternalServerError) + return + } + + jobs, err := s.svc.All(context.Background()) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + + return + } + + _ = tmpl.Execute(w, jobs) +} + +func (s *Server) download(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodGet { + http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) + + return + } + + ctx := r.Context() + id := r.URL.Query().Get("id") + + _, err := uuid.Parse(id) + if err != nil { + http.Error(w, "Invalid ID", http.StatusUnprocessableEntity) + + return + } + + filePath, err := s.svc.GetCSV(ctx, id) + if err != nil { + http.Error(w, err.Error(), http.StatusNotFound) + return + } + + file, err := os.Open(filePath) + if err != nil { + http.Error(w, "Failed to open file", http.StatusInternalServerError) + return + } + defer file.Close() + + fileName := filepath.Base(filePath) + w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%s", fileName)) + w.Header().Set("Content-Type", "text/csv") + + _, err = io.Copy(w, file) + if err != nil { + http.Error(w, "Failed to send file", http.StatusInternalServerError) + return + } +} + +func (s *Server) delete(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodDelete { + http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) + + return + } + + deleteID := r.URL.Query().Get("id") + + _, err := uuid.Parse(deleteID) + if err != nil { + http.Error(w, "Invalid ID", http.StatusUnprocessableEntity) + + return + } + + err = s.svc.Delete(r.Context(), deleteID) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + + return + } + + w.WriteHeader(http.StatusOK) +} + +func formatDate(t time.Time) string { + return t.Format("Jan 02, 2006 15:04:05") +}