From 20b389afd55e1dce9ef3507917cade9ab427f074 Mon Sep 17 00:00:00 2001 From: Monzurul Islam Date: Fri, 30 Jun 2023 18:14:22 +0900 Subject: [PATCH] added gg scrapper --- go.mod | 10 ++ go.sum | 25 +++++ main.go | 14 +++ scrapper/google.go | 231 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 280 insertions(+) diff --git a/go.mod b/go.mod index b799e78..c114a61 100644 --- a/go.mod +++ b/go.mod @@ -11,6 +11,9 @@ require ( require ( github.com/VividCortex/gohistogram v1.0.0 // indirect github.com/andybalholm/cascadia v1.3.2 // indirect + github.com/antchfx/htmlquery v1.3.0 // indirect + github.com/antchfx/xmlquery v1.3.17 // indirect + github.com/antchfx/xpath v1.2.4 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/bytedance/sonic v1.8.0 // indirect github.com/cespare/xxhash/v2 v2.2.0 // indirect @@ -24,14 +27,19 @@ require ( github.com/go-playground/locales v0.14.1 // indirect github.com/go-playground/universal-translator v0.18.1 // indirect github.com/go-playground/validator/v10 v10.11.2 // indirect + github.com/gobwas/glob v0.2.3 // indirect github.com/gobwas/httphead v0.1.0 // indirect github.com/gobwas/pool v0.2.1 // indirect github.com/gobwas/ws v1.2.1 // indirect github.com/goccy/go-json v0.10.0 // indirect + github.com/gocolly/colly v1.2.0 // indirect + github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/golang/protobuf v1.5.3 // indirect + github.com/jawher/mow.cli v1.2.0 // indirect github.com/joho/godotenv v1.5.1 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect + github.com/kennygrant/sanitize v1.2.4 // indirect github.com/klauspost/cpuid/v2 v2.0.9 // indirect github.com/leodido/go-urn v1.2.1 // indirect github.com/mailru/easyjson v0.7.7 // indirect @@ -44,6 +52,7 @@ require ( github.com/prometheus/client_model v0.4.0 // indirect github.com/prometheus/common v0.44.0 // indirect github.com/prometheus/procfs v0.11.0 // indirect + github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d // indirect github.com/temoto/robotstxt v1.1.2 // indirect github.com/twitchyliquid64/golang-asm v0.15.1 // indirect github.com/ugorji/go/codec v1.2.9 // indirect @@ -53,6 +62,7 @@ require ( golang.org/x/sys v0.9.0 // indirect golang.org/x/text v0.10.0 // indirect golang.org/x/time v0.3.0 // indirect + google.golang.org/appengine v1.6.7 // indirect google.golang.org/protobuf v1.31.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index 33129a4..84fa0df 100644 --- a/go.sum +++ b/go.sum @@ -56,6 +56,13 @@ github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x0 github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA= github.com/andybalholm/cascadia v1.3.2 h1:3Xi6Dw5lHF15JtdcmAHD3i1+T8plmv7BQ/nsViSLyss= github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6lUvCFb+h7KvU= +github.com/antchfx/htmlquery v1.3.0 h1:5I5yNFOVI+egyia5F2s/5Do2nFWxJz41Tr3DyfKD25E= +github.com/antchfx/htmlquery v1.3.0/go.mod h1:zKPDVTMhfOmcwxheXUsx4rKJy8KEY/PU6eXr/2SebQ8= +github.com/antchfx/xmlquery v1.3.17 h1:d0qWjPp/D+vtRw7ivCwT5ApH/3CkQU8JOeo3245PpTk= +github.com/antchfx/xmlquery v1.3.17/go.mod h1:Afkq4JIeXut75taLSuI31ISJ/zeq+3jG7TunF7noreA= +github.com/antchfx/xpath v1.2.3/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs= +github.com/antchfx/xpath v1.2.4 h1:dW1HB/JxKvGtJ9WyVGJ0sIoEcqftV3SqIstujI+B9XY= +github.com/antchfx/xpath v1.2.4/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs= github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o= github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY= @@ -173,6 +180,8 @@ github.com/go-playground/validator/v10 v10.11.2/go.mod h1:NieE624vt4SCTJtD87arVL github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg7847qk6SyHyPtNmDHnmrv/HOrqktSC+C9fM+CJOE= github.com/go-zookeeper/zk v1.0.2/go.mod h1:nOB03cncLtlp4t+UAkGSV+9beXP/akpekBwL+UX1Qcw= +github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y= +github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8= github.com/gobwas/httphead v0.1.0 h1:exrUm0f4YX0L7EBwZHuCF4GDp8aJfVeBrlLQrs6NqWU= github.com/gobwas/httphead v0.1.0/go.mod h1:O/RXo79gxV8G+RqlR/otEwx4Q36zl9rqC5u12GKvMCM= github.com/gobwas/pool v0.2.1 h1:xfeeEhW7pwmX8nuLVlqbzVc7udMDrwetjEv+TZIz1og= @@ -183,6 +192,8 @@ github.com/gobwas/ws v1.2.1/go.mod h1:hRKAFb8wOxFROYNsT1bqfWnhX+b5MFeJM9r2ZSwg/K github.com/goccy/go-json v0.9.7/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= github.com/goccy/go-json v0.10.0 h1:mXKd9Qw4NuzShiRlOXKews24ufknHO7gx30lsDyokKA= github.com/goccy/go-json v0.10.0/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= +github.com/gocolly/colly v1.2.0 h1:qRz9YAn8FIH0qzgNUw+HT9UN7wm1oF9OBAilwEWpyrI= +github.com/gocolly/colly v1.2.0/go.mod h1:Hof5T3ZswNVsOHYmba1u03W65HDWgpV5HifSuueE0EA= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= @@ -192,6 +203,7 @@ github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfU github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= @@ -283,6 +295,8 @@ github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpO github.com/hudl/fargo v1.4.0/go.mod h1:9Ai6uvFy5fQNq6VPKtg+Ceq1+eTY4nKUlR2JElEOcDo= github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/influxdata/influxdb1-client v0.0.0-20200827194710-b269163b24ab/go.mod h1:qj24IKcXYK6Iy9ceXlo3Tc+vtHo9lIhSX5JddghvEPo= +github.com/jawher/mow.cli v1.2.0 h1:e6ViPPy+82A/NFF/cfbq3Lr6q4JHKT9tyHwTCcUQgQw= +github.com/jawher/mow.cli v1.2.0/go.mod h1:y+pcA3jBAdo/GIZx/0rFjw/K2bVEODP9rfZOfaiq8Ko= github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0= @@ -302,6 +316,8 @@ github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfV github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM= github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes= +github.com/kennygrant/sanitize v1.2.4 h1:gN25/otpP5vAsO2djbMhF/LQX6R7+O1TB4yv8NzpJ3o= +github.com/kennygrant/sanitize v1.2.4/go.mod h1:LGsjYYtgxbetdg5owWB2mpgUL6e2nfw2eObZ0u0qvak= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/klauspost/compress v1.13.4/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg= @@ -440,6 +456,8 @@ github.com/rogpeppe/go-internal v1.8.0 h1:FCbCCtXNOY3UtUuHUYaghJg4y7Fd14rXifAYUA github.com/rogpeppe/go-internal v1.8.0/go.mod h1:WmiCO8CzOY8rg0OYDC4/i/2WRWAB6poM+XZ2dLUbcbE= github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= +github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d h1:hrujxIzL1woJ7AwssoOcM/tq5JjjG2yYOc8odClEiXA= +github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU= github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc= github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= @@ -454,6 +472,7 @@ github.com/streadway/amqp v1.0.0/go.mod h1:AZpEONHx3DKn8O/DFsRAY58/XVQiIPMTMB1Sd github.com/streadway/handy v0.0.0-20200128134331-0f66f006fb2e/go.mod h1:qNTQ5P5JnDBl6z3cMAg/SywNDC5ABu5ApDIw6lUbRmI= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= @@ -602,6 +621,7 @@ golang.org/x/net v0.0.0-20210917221730-978cfadd31cf/go.mod h1:9nx3DQGgdP8bBQD5qx golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= golang.org/x/net v0.0.0-20220225172249-27dd8689420f/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.5.0/go.mod h1:DivGGAXEgPSlEBzxGzZI+ZLohi+xUj054jfeKui00ws= golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.7.0 h1:rJrUqqhjsgNp7KqAIc25s9pZnjU7TUcSY7HcVZjdn1g= golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= @@ -693,6 +713,7 @@ golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.4.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0 h1:MUK/U/4lj1t1oPg0HfuXDN/Z1wv31ZJ/YcPiGccS4DU= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -701,6 +722,7 @@ golang.org/x/sys v0.9.0 h1:KS/R3tvhPqvJvwcKfnBHJwwthS11LRhmM5D59eEXa0s= golang.org/x/sys v0.9.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.4.0/go.mod h1:9P2UbLfCdcvo3p/nzKvsmas4TnlujnuoV9hGgYzW1lQ= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/term v0.7.0/go.mod h1:P32HKFT3hSsZrRxla30E9HqToFYAQPCMs/zFMBUFqPY= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -712,6 +734,7 @@ golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= +golang.org/x/text v0.6.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.7.0 h1:4BRB4x83lYWy72KwLD/qYDuTu7q9PjSagHvijDw7cLo= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= @@ -808,6 +831,8 @@ google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7 google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0= google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= google.golang.org/appengine v1.6.6/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= +google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c= +google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= google.golang.org/genproto v0.0.0-20190418145605-e7d98fc518a7/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= diff --git a/main.go b/main.go index 4b3fcdf..44d5c7c 100644 --- a/main.go +++ b/main.go @@ -138,6 +138,20 @@ func main() { c.JSON(200, gin.H{"data": data}) }) + r.GET("/gg/:word", func(c *gin.Context) { + + data, status := scrapper.GetContents(c.Param("word")) + + fmt.Println(data, status) + + if status != 200 { + c.JSON(status, gin.H{"error": "could not find data"}) + return + } + + c.JSON(200, gin.H{"data": data}) + }) + r.GET("/mw/:word", func(c *gin.Context) { data, err := scrapper.GetMWData(c.Param("word")) diff --git a/scrapper/google.go b/scrapper/google.go index 64c034b..72acaa7 100644 --- a/scrapper/google.go +++ b/scrapper/google.go @@ -1,15 +1,21 @@ package scrapper import ( + "encoding/json" "fmt" + "io/ioutil" + "log" "math" + "math/rand" "net/http" "net/url" "os" "regexp" "strings" + "time" "github.com/PuerkitoBio/goquery" + "github.com/gocolly/colly" ) // structs @@ -313,3 +319,228 @@ func RoundRobinApiKey(API_KEY string) string { return key } + + +var userAgents = []string{} + +func GetContents(word string) (*WordStruct, int) { + getUserAgents() + + defaultUserAgent := "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/110.0" + randomUserAgent := getRandomUserAgent() + userAgent := "" + + if len(userAgents) == 0 || randomUserAgent == "" { + userAgent = defaultUserAgent + } else { + userAgent = randomUserAgent + } + + fmt.Println(userAgent) + + // Instantiate default collector + c := colly.NewCollector( + colly.UserAgent(userAgent), + ) + + var wordS WordStruct + errorStatus := 200 + + // On every a element which has href attribute call callback + c.OnHTML(mainContainer, func(e *colly.HTMLElement) { + + // fmt.Println(e.DOM.Html()) + + // children 5 div with tag jsslot="" + // 1. just the header + // 2. search box + // 3. all the definitions and the other things we want [our target] + // 4. translations in other language + // 5. use over time graph + + firstContainer := e.DOM.Find(".lr_container").First() + + // 3rd div with attribute jsslot="" go obtain the main data + thirdJsSlot := firstContainer.Find(jsSlotsFilterTag).FilterFunction(func(i int, s *goquery.Selection) bool { + return i == 2 + }) + + // inside the 3rd div 4 dive + // 1- the main word + // 2- see definitions in + // 3- meanings [our target] + // 4- origin + + // find the main word + mainWord := thirdJsSlot.Find(mainWordQueryTag).Text() + fmt.Println("main word", mainWord) + // check if it has phonetics and audio in the #1 div + + firstDiv := thirdJsSlot.Children().First() + + mainWordAudio := firstDiv.Find(mainWordAudioTag).Children().AttrOr("src", "") + mainWordPhonetics := firstDiv.Find(mainWordPhoneticsTag).First().Text() + + fmt.Println(mainWordAudio, mainWordPhonetics) + wordS.MainWord = mainWord + wordS.Audio = mainWordAudio + wordS.Phonetic = mainWordPhonetics + + child := thirdJsSlot.Children() + + allPoses := []PartsOfSpeech{} + + // inside #3 div + child.Find(posDivFilterTag).Each(func(i int, s *goquery.Selection) { + // we are inside each parts of speech div + poses := PartsOfSpeech{} + // fmt.Println("=========================================================================") + // fmt.Println(i, "th div") + // get the phonetics + phonetics := s.Find(posPhoneticsTag).First().Text() + // fmt.Println("phonetics ::", phonetics) + // get pronunciation the audio source + audio := s.Find(posAudioTag).Children().AttrOr("src", "") + // fmt.Println("audio ::", audio) + + // get the parts of speech + pos := s.Find(posTag).First().Text() + // fmt.Println("pos ::", pos) + + poses.Phonetic = phonetics + poses.Audio = audio + poses.PartsOfSpeech = pos + + // each meanings with examples + s.Find("ol > li").Children().Each(func(i int, s *goquery.Selection) { + // definition + dfnElement := s.Find(posEachDefinitionTag) + definition := Definition{} + + dfn := dfnElement.Text() + + if dfn != "" { + fmt.Println("definition ::", dfn) + // get the example sentence + exElement := dfnElement.Siblings() + example := strings.Trim(exElement.First().Text(), "\"") + + // fmt.Println("example ::", example) + + // now lets find the synonym and antonyms + var synonyms []string + var antonyms []string + + currentType := "Similar" + synAntElements := s.Find(posSynAntParentTag) + + synAntElements.Children().Each(func(i int, s *goquery.Selection) { + + txtToAdd := strings.TrimSpace(s.Text()) + chkIfToAdd := false + + // filter out the grayed out words from the results + if s.Children().First().AttrOr("style", "") == "cursor:text" { + chkIfToAdd = false + } else { + chkIfToAdd = true + } + + // omit first div with text h + // now encounter Similar or Opposite + + if txtToAdd == "Similar:" { + currentType = "synonyms" + } + if txtToAdd == "Opposite:" { + currentType = "antonyms" + } + + if currentType == "synonyms" && txtToAdd != "Similar:" && txtToAdd != "h" && txtToAdd != "" && chkIfToAdd { + synonyms = append(synonyms, txtToAdd) + } + if currentType == "antonyms" && txtToAdd != "Opposite:" && txtToAdd != "h" && txtToAdd != "" && chkIfToAdd { + antonyms = append(antonyms, txtToAdd) + } + + }) + + definition.Definition = dfn + definition.Example = example + definition.Synonyms = synonyms + definition.Antonyms = antonyms + + // fmt.Println(currentType) + + // fmt.Println("synonyms ::", strings.Join(synonyms, ",")) + // fmt.Println("antonyms ::", strings.Join(antonyms, ",")) + poses.Definitions = append(poses.Definitions, definition) + + } + }) + allPoses = append(allPoses, poses) + + // fmt.Println("=========================================================================") + + }) + + // fmt.Println("third div ===== 2",thirdJsSlot.First().Children().Size()) + // fmt.Println(thirdDiv.Html()) + + wordS.PartsOfSpeeches = allPoses + + // fmt.Println("asdf",wordS) + + }) + + // Before making a request print "Visiting ..." + c.OnRequest(func(r *colly.Request) { + fmt.Println("Visiting", r.URL.String()) + }) + + // Set error handler + c.OnError(func(r *colly.Response, err error) { + errorStatus = r.StatusCode + fmt.Println("Request URL:", r.Request.URL, "failed with response:", r, "\nError:", err) + + }) + + // Start scraping on https://hackerspaces.org + c.Visit("https://www.google.com/search?&hl=en&q=define+" + word) + + // fmt.Println(wordS) + + // b, err := json.MarshalIndent(wordS, "", " ") + // if err != nil { + // fmt.Println(err) + // } + // fmt.Print(string(b)) + // fmt.Println("from here",wordS) + return &wordS, errorStatus +} + +func getUserAgents() { + resp, err := http.Get("https://jnrbsn.github.io/user-agents/user-agents.json") + if err != nil { + log.Fatalln(err) + } + //We Read the response body on the line below. + body, err := ioutil.ReadAll(resp.Body) + defer resp.Body.Close() + if err != nil { + log.Fatalln(err) + } + //Convert the body to type string + // sb := string(body) + _ = json.Unmarshal(body, &userAgents) + // log.Printf(sb) + // log.Println(userAgents) + +} + +func getRandomUserAgent() string { + s := rand.NewSource(time.Now().Unix()) + r := rand.New(s) // initialize local pseudorandom generator + index := r.Intn(len(userAgents)) + return userAgents[index] +}