From c8f2f4c17509edd2dcd581838a2a5b3bc5492056 Mon Sep 17 00:00:00 2001 From: rocky <41517713+Gateway2745@users.noreply.github.com> Date: Sat, 5 Oct 2019 19:09:32 +0530 Subject: [PATCH 1/3] automate excel file generation of GSoC 2018 organizations data for better readability --- .../GSoC-Organizations-Data/requirements.txt | 4 ++ .../src/GSoC-Organizations-Data/results.xlsx | Bin 0 -> 17887 bytes .../src/GSoC-Organizations-Data/scraper.py | 53 ++++++++++++++++++ 3 files changed, 57 insertions(+) create mode 100644 Automation/src/GSoC-Organizations-Data/requirements.txt create mode 100644 Automation/src/GSoC-Organizations-Data/results.xlsx create mode 100644 Automation/src/GSoC-Organizations-Data/scraper.py diff --git a/Automation/src/GSoC-Organizations-Data/requirements.txt b/Automation/src/GSoC-Organizations-Data/requirements.txt new file mode 100644 index 00000000..5364f71a --- /dev/null +++ b/Automation/src/GSoC-Organizations-Data/requirements.txt @@ -0,0 +1,4 @@ +pkg-resources==0.0.0 +selenium==3.141.0 +urllib3==1.25.3 +XlsxWriter==1.1.8 diff --git a/Automation/src/GSoC-Organizations-Data/results.xlsx b/Automation/src/GSoC-Organizations-Data/results.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..29f99c466e6ed5ddefee5ae360ef8ca9d269db1c GIT binary patch literal 17887 zcmZ5|b8si!7VR&Z*tTukb~5qAwrx9^SQBTGiEZ1qZQFX8@4L6&yYG+gu3EMBK6|e| z-KT2zKCK`P0*V3v03f~&LV&)uB$qr801yKT03d%Mn!>g=PR2G)y2|c$#*W%_Zq`=i ziK8|>3<#o+?-4_*RE$oFDyaUV_@ijWcOY7xLNqw5H!vt49SK(onu<9AvSmgWYmS3I zc`#_PQ)mftVLMRX^T?U5xG+u&3*54K4qb_azi&{W^CTL8QHb@Hb{nArG21+-NBpAL zkPdGN5z|K`a;s1#WQ}-Yl{HrO`16Or)}vjsIFS4*QR$R0Bv8C)ZcXu_Zg29G01_wS zOrO?Dqd8OdlYH|Iz;@TXN<2OLu8t008>ys_S5E@y7&6aMgk(Ypp_thos5OAi^K-IKKgfG;KgNltN={3IrvZevqRI-({Y~Y zlj!dcI50G&SSVnl6QL7bqaWt>@Kn&>LW#kd?DApK4@}-=h^2~$+ha6PC5FCv#69IW zH}J})QvXC5ksmb|qMV#N4s+@Hc~??#>GU$}cMw$EFd?wgLR$3BIKygXqiK5_cdnmD zVEW^2gu+M$H{9QS?lL1p?I4#*Vd_v@b+Pr0w=hd$DmSMg!wgZWSZ-(ZN2fE+sy5`| zWR$c}<7YF@uiyY;R}DzRH>MeK_9-vG|0*Klta)|xr9|_WB3NHnH!J#oMRB!tur#o> zwfs9L|0|G1F9U+euo?7pa_ zH1O8b@yV%35VDg1H3d{sh&!^SiAE?sSKF9Lg0T9=7I-|=D2b|K{DsRU%L(Sa773zI zx9?l2yOD`TIB>rr5T*~a$?^pqlZ%rS)7xzn=|*lO|CoQ9Q%f_jM9nWR7dRUT`y7Uo zoyGKmJ+Ks*WT)D5mGE>3%Mf<9h(pz)i7zaqa-bSoWr*a|=a}5VACk-(Z-p7<#0(qc zqrU^TUwAxGt6kvEobs_7S!63nUFl@FIoUZO{$G9cm3*Td5dnZQUjP8(Uw!^6<7j4V z?Bq!Q&xP@?MA6!#HaOx)ZI@~YZIw&&+Vk*&iQ8oQXLxApXSY0L;z;`O#Ic5>uaDgM zR=zxD8L%+4=d`vft}`Jvz8~bh1*|*PoX^t`F=yAD^yo-Znp9 zE+%iEmv8MK-CA4uc`oQDKO24dJ`dirH@rD{-ml)wJNdIReY$pU_n!-QuWwf)BM;N% zUqjb#(|Zr!cOjqc+&q?W<#qYI+uT0h>NeLq;_)u%Un;Qa@mySE4t0HdRtx!WnBnmG z*4ED|;o155&hTS&eTQa0d|Y{GKc8pU4-ao2FKsU6mPS7h&&)r!Z{AjK9$GH{+(tfL z>i9nTUOeb#vHS9KUvzvPe)#x0u5D&jbUiPR`kue`Zik{@RTlc5=oF#%yk9tL`1*3S zuKO;%yZOEiZjb6dxZ?BU%Jb*9E7-t2PFDJ6@viZ9eC&m;Po^C99<#5l2b-`1w%XUBwwe)@oo(BD<|y+Bugh|$m*>v+A7SC+PI;JRLJ^5S-6@V3)!Ki^&y z{MfhV)h?7>Y08!DC?fu$GQZFA>{Q*vg zpObVNs@1I8<%}9-+-gPIAO!}L1odYnT8lWDRQ0k3b!EgMoZ^%;=ZD}HRw#9KNJ{yN zDjTQB@&RDpvR+ivR23E4eObzex{XS}Q)fQ40;@g8@FKMBe`D<7_l=; zWCUuvRehsu8-;+W&Rj+Zn15@kN`H+`12E4BAF{%-A>DB#Zqq>YtNO}oIUwe2Fsz2~ zvAB+SQNj~n+OrdNE3N=nG5LWn$z>1A{Bm3j$7PP@+)s|?+`9!EyhQPVPxZm4v=f<{ zJFo1_@1y{@;(wvAkrIG2SX5H`=E)@q*jipKN|iAfXbH6*&Dp!Ek0~WV&i5`xZ*bs6 zR0gyMA1O7~g3@0zDzd&tD%CZQn(vwW#$ZKlWd~(`0<5pWd|wpmPUcH=g#CBmoSGuq zrquHugwvNQmCfKr=&&0XAjOgk*1SJH!eytus@~`o_<35Lg!9gOEy-ek5g+tp3jmdO zG@mwVP9}A(dHq(0rbSp43Zi~7QBOrwbqjpe<3;{Y2x&qkZ}OT_BbqA^o6dqn2x(f) z76t-zwNdpw@FjorV$z=P(m@5u5UoE3g!lzCMLkqOrD+Sjj3HVL1`PPe1dj%VXB+T-`#(%KOzt|_l z=qu9j!9(Z3$Y0+57X}LmGS-)1V~=H3{Tsr1C+hYimT7bLfw8OQy8_Uk7!L=>H z)EbNyq{%h7`b9^$RbOZS&R=i%U*2{$zh9HrAby(QweI{@LGumJc+*^|`-`AO`hs{L zRBTBxMEXjYeO^+fDNhkj9SqG8%=7xL{tNP*^m zIaIM}+K#bmzHh8*mi&^~>6LL{))D7Z49Ni-gs}+!E5vdI3&bk%q;d4%MGb z5lqt#iBp4pO=6ne$75ujzsebIXA5Q+&%x`b-KXF3@mELR&{yBk z=S(nNiZf7u#`EryRv>CB(y6=56c>>KIy*&gP_t@C zoj+#OFygI&M@6gjW+!%isCTh?1H?LHEEqSz+_@6^PxMadi2b9ae}P|_xevL9XH5jv za7m~R>k;fCodRhgvq-qs`WKe_Y2e=38ga$ihd%-;t`YKDgXB$xerSWW1bIQEq{^r8 zJsP(}zBrMU&9ml6uY}qSzd9AuGOUaGa3Qap=m}&qH&E}!zXEBkB_0ppNSXF%;=-Ir zW2M+XfcNb~;?8Qf)G||6z~!kUjT>tbWoerrkGT*|P6Rb`DL957Bhg7}_XYM4t)~T# zo_;4$8w;>wgzgG-qnXK6TUw(tBDLR4o2HnxNA1K^Jb~p=@HJQw4IfJ7?HE zE$i8LgKl`~r-DE)1!!&7h`6e5-?SmL;<_2x8EcgR3%5HQj(HuT(k($hulj?7s}1y1 zOI@Ifxaw>Jcf|JMx_Nxj+5*0JcwEO`j`fgw6UK30L2QpSrHbM`k~X}P0CrI^R`)4; zlI-6({OVHroJ`T^o}q!({Y@d&dYT>Gf}8j%3_2X4k%8z^clZ6jc=nNer&NrqX~B}f z=n!XcJj!3;n)uu6EjJ^7Whn`YE^z_JqwyDv8wd#J<*Z~F-U0^N@Gpe^8ysMIQIU#w&58)?n8A=R=rZp&MPKn^h-R~8 zq+WRSfQCZ<@pWJOO3MrQO3zusRZJha4=kVqWD@5~$$t%foT3<^B;Me7+?tRkq6esZ z?m>)VZTN)-oX+ zfE_LE^g3G?Nj!94Na+I_WQw(bEO}Du#g}RF*MqB7-yGJ&h7JT@fjjd>6G?#H3oT_s z|A%Hv9cIn1FyWnt6W07?xhS6`{RN%70FmWs5r}Km*Gn{=0W*Y_IfJ!W)BehE@Tal= z8{7^Gt22}VcOiq~KCt^vNv3E|KP5v<*(7@SWmyksGIV}KsS6q;dTG(KHQ02fQtF(8 zzmfg?#9zaE*BALdS5^JIQLvqU74FEONR_%b^DmZ4>XU_6@eP|X9LYf68 znk-Ba?ayf%-kRC|y25evud<&X`0I++S6$G_6qRgDi@;v1zE+}19gHmbUR%)Xl`#K8 zNkVY^Z}>h)<~C>&U)m>F(joBuqy1Q3fr+4%DH_?T7J=VC!lsH!oq77Oru2*Aa~W#Dzw7r$z8ftG+d?$pw9Ojkz{cEp^Y)-_2qE;464kLp~5ur$bb- z4=sYcFOo!)8`#!M1wl(K6tX{EtWw2_Hi=dlk-Q-Bnw9JHUJi7AQo0K|2 z^8}3yPK$t1v-#J6jLhtgA5yH!;iT6HHm+vi~PY{jOm}mS3_8bj2MqJd4 zKt*t}b|-++V|>f`KCuh6)4E`L!c&a3g0CHw@q`tXlbZYf(pE*hPx~M~mduFZ=6A&PV7x?oNj7Q((wrvSZ7EetN*7 z=Z`$@RtCs3V9H}MM5`Xs!04}+dF;hnPkt4&Q9*iN6()LPS6O|VdH5G*R1FRKCq1b8 zFUHX@af`6|94u*s9wy5kZp2LIQ@saI8+-A6xxcRl+NdynuLcvn6`*D79ro9>78%U7Z zIZxV^3;gzpV$W*SsJ|4(#6rSC?8SsK)ABi9&HQhpM$DAJB)}p_YuE*x8q@SYpvV`9 zoD4uOC2hg3Jd<^54Yj(b*iDM6)g+*cX-d18h-UkjIHtx#M=oK=rm5>H!EQ61e&hnb ze4=bf=&FNgT1|#y51|7Hjnc3x|uph z_;8`SmAY*i`yUQi@28jUpT7TFs0qZdAeHzkt&)8evf%%3q2|9OtZ2p2uks#3+b!zC zhB=-E@ekJGK51nkKN0guvJDnhT<#j&nGfDmWlFoo=+?XU1mD1I2MMaXtsA_QO69Q~Jm^s&T$C zP5-#w(+~J>E|{(#Q^DP^v;o3VCEIRnBx6M}3~sS$w6`hs}!t zLf9~V2m60T_jeksPa^~X7(4-hzg6z9>Y<~VzJsxml9PkEjj7|`%2|32nwcu8`}(|&(x%4WM>UYH6||Dtp6)B_r-2wxxIZbkyAm z9>gLErsS2KuNR>S8KXO3kt&)joo!FotWh&)H`3H-2P5#6zHZm2ll5=+hX;%4t+&tj z{SH39_KxEfhiaz$*({wOUIF;O=-0AoH*`0(HZpWGXopSN8_}T~Qa7_RH`m@xpOn1F zny+6MKc8OfOMbz&QWz1t6T1d<=PxxYI78aH57_9HSQW2Op6GnQ^bTyotnGayiuI?qT?q_Q}s)rFbJvvH$bm4SYxt0G=a^_V2XE^BlN z>@z$nw#P!RX#rgNrOE}s~%QM@W-l{ZS>gc*ain)&}c@lJu$ZxCF^H3dx(%N z_*Mlh%otOurGH=)zMW=q-H!XuD7)dvx?t9FR;6}S?zmzmbkW|6q>+>B^1bCQJ_pz+ z2palGF{3n@I8oxbIuk)6p1U$%X*TjhY|f6<5}2ci5|?^YhQWx@vWuUg%1xi5AO^r1 z`d*iqW4pvv%;}!3LI*&Fq0B}eilWZHsrD)^Xv0XfGR(lN7L)7w5N%d)5@7?*grl__x5CazQ1l!Fmi*;TG&Cjbz01# zgjot+o?cS#E-|R$j0^#d!<*Wy4!{R7|AI}GTkryF zrB}EpV%;ThS72ue)W+r)H6pU1QW;7;Ps|eOPDeX3Of#0&jS4p)-yR*xNF3BpVp1t} zs4YwgRk1XZb^%pe&mvWjs#<16Rkp!VughZtzbA;seB&<_DNkWN?m*&LlTgCevl*+>i)b%+xzJQPYl%V3e48A9CHau z$($iP=BRjWMy_}*4uYlLUWpwC4%c%IrfF1@fE~qtrd7=H`vr38X9|;4Dbr8AIkA2! z!%~;_f!-)|{pVj@9>Xigs3g!}#2|_wi6@U&oDf>QQG|?vL|FNq5^TD_URNhcyP%#j z!>)CGA+w9P617(M9RQVYZE(uy;YD>4H2I`8DHl%NCHb`e#_6%D!N^L0N%i=#5H$-h zanNsYklGlf=LdnVay;(pj1idFiYE*&Q@s6PYZGm+F+vNryBK@q-IkYaC zbC`xuY4`G@9d(Ni!tC1vqGsYDY90~7l%JMje`*2dQ*-wvTmV#U48_ZdQq2VuO!&Kj zjM>{SX;kPv^P_#o1~{Sdd50qycY0jdy9q@3^mGHc^Tl@~`dci;u2|m+#_z6iC|Lu0 z5*g6Os@wS6DtnqWwMrbM96Jtg_+Q6@23z5{N9gT5bxP*pfXap3LgL=7|B~8MD(FJ^ z;q_`QumPykR%wd|wAS5C`+gREen5V9eEgKkxz=8IC_&=7S>Lx}^p(!ak*~@m{6IHF z?7zIDxlnxV$0{#@>5`-JGyY|PA59=NmVi8kdnGSocgM;cy!Av01tkCp8c-+-4dF|j zN88m}FN5o>hil>_9~Q>!FoP~kQNJGq(D9>;BY?a4c_}3A{_d@}<)~PML&nzqG7nR? z2m-+Hy%(713I=xD_m^r<0*%b$H%#_boDT3v)-9jtx~?afav>na2Fykz)X#!X>zBVM zR-_kg99pU<92*Y^M_-l!rm{$-JkY+zJ8KWmEj9dTgHdB=+jE&opt(Gr@7Lwq=+vbH#u8Do zarK3r@i?74^GhX$GNE9wR0Q_(dl$eoaqH5O6ktzPJG=0B@Oh@A@j9CdU)IbzN90Vx znQ|g);pMXE1Rkoi|1HhJXIRI1791_Z&fFJt+|jyV-#a}>H*uIUmUSFk9y{_)UX?t# z2V+_?Moy{)ov&{~wEcSX7V0cg`KVS6n3-KKlI;T>qWp3?pZg-(-E13csqVSsBi=l0 znh<73>-$OszaV95V}Ig4PP)AQBfDSmFIWcg=sz5dJpPXT&g3YzU3-K_Obcqu@8ECs0dMHhisIoS4fLtapWo{Z<2wwnHoKJwtSe4H9{S5 zF`~(8)G;7B9NvpNN^Tm<=@jmM(<*)VmbU!l6mAbxc{-WSn zE8pgL^QIt?>bdh~%?RePR4ka)!E(kCOn|cjZmUdvF&Pi)0ad!T&u8D!sz<`sg%k>1 zBerS2m8Bv^VC??M{rG`d6IJ1&fT|8os`E&ghN-aQ#sp6RZ_)88Qc4Acpd2Hs-?FgJ zhYYY|j=O>BasiTQ`?ep2MB#$c7XmD`v)(VxflJX>|A)63`REh1OLE1UXO_;}!Qrd} z8@qSc^E8D3=;wEQZH@$K!^|Lfg>l!Nd?rwlw4GrqFdhvv!_(5DIU2>7U>D6jrp$OS zYeJF>B2UZy(*ic+M<1t?^#0XRXeP==@JzFLnV5i2l8WP*Xn2$m52hpxwBrIiYUA)f zHBS32w?Rv_|GkL1e06(^H7>uk^KhY!>70{O8 zC$oc!c}L3Udc{_Scuk0a!18Gwq_Wigo28>DmO;aq%y9KU-6KvgR?EU*$H5kRIe{!) zruD}31KtH-Q|*9%jUPce!nh$-d?CAab2o zHxKF4!KRy%MM*qNV=a1=UJc_uncG5C-ES zIr8)>MK#5*PIIy1G#oWsiDkNy^-Mjo3_ z8TdsB4ofd^X9$2piyL@ynp+9($hvjSiv;Zb@M1pf zF9NR!S>bYcn=K{Hoz2@F>_iSP;TY+OJwCP~zEn7Z2=6C@8&cF?Kc*n&;tmw2uL5=) z&xjY6xw_ESfimFl<9|oF#yV&m0ah>z-7X9>yrKlQ1k|HPkRzE88Lrc8W+5(JGGc_) zvsTH}DcS#acm@qM&8(LP42RO)cS)6r7O45~=z911-Ll+^4{1%Lpx^FJi9tghvgO4) zF*$T#` zEn zz*W=f$FQgePL({HYZttnhrOF&qI}S~9a`aNc$ZM}xx;RWDP8vPP?&dq=8Ll2QB~dTiw1$U z9pm-6e8^60{;RiSoimQ}G^m$V)ZgdP_wx6Eq@3)YQ zk_eJvROvis-FDuG>G&-kp25ZYX=r}L@VvRVXD>Um{UlVYznP94Nt{Cbgc88fh|$UY z!wMgh_?-4DiCdy%ZPu#Gz}0#5N?QrUpai6-ubI%PWTJem#j|1f;?wl}B;i5s? zIC&u@+t=ClLIwl{NOG^)g}oGuX{22Id+o9)8H`$GYlq`PNLfnT7dv)y4(FbiPimm9 ztp_!Z@GOD1@y|o+a;E{ti|j1YEDy_E)(x8vrfglS1VoL|RK%y9Bxs5V_pgwV#D2d1L-+{siTqI?<(|+V&$*JQEqeODyyhfN-D_bXNc>h|a2&v{(^T8&NmW`fQQI}kk{WY?~6Rmd2EVU0!g zIL0g&wAAW2DImr?uhz^qRGez4w#1)22Gly9(tolL2)Z_*Ba;Ee-xo|)5%-X{RjwP~ zqJ0D)G8QOi4B~OFyGi`9yKa_J?wxQ#A+GZ=H<(P36$l$e+8_N>fOn6wl^Y#;NAyD% zNAHoYfV=`=Hz5UJrT{zI9XW~Mmo3*%w;QyoZU!C0sM>oyg25q1*K#Ua1EvrNQ6(!t zJponCi>jIFrn!6SzHDA=$h{AcbrZUzc-vR8v3W1Nn}Q+xxH!#})8yZUoaNboBQF#@k`lUxrWyMJWN#Nd)a zPiIzwrzbJ6f<=bE?~|QlBvJfx$3rk+Y4%!WjZucEK)nL&piT>zNefMe9O+nwNI{GI z1v+X8MSIR;#MDmFs{19csd2-<9#FGE!WuxAUK8pfL=G*pvmBXcV>HWV^*;GoywoPq zPYm&E;I_ePnf=Pc=Ng<$$HxP6Zr{hg*DBgXR~h1d5WK51?A0Yf6{u9E#)7`Qqwb)G zLUX`-ZE!+Jk~%dd<9pj6nq?Z3k73RVhmDGvOpN+^0A#73ncLxmM)I1G~5PP-9XwmU;^mSyO3iLUlXXx z_Nh6Tw9l@TZi(VqrC#m>pbKX9d{#yXBbT3fp#;VyH!C5GaVK&J z-yl0Zc1U8X;-_x=EwkWMr*t<+*33le2yUst|S;Nae&qKsZnLviyG8^$fu zWn4Qko^Tbk9?ttZWx#}2$t0T5NFXHm>5ED|PfG9H9K|>;cv73gUC#`sjVrD2p@&1I zbSz-FZQ>l(2rOP3J>>(u{CmYC7U)!az-tz_W!DPBU8Xh#R(?p03cjyl8{-)g_Sf%L zGhLi2*z2YkZ&^d#cM&+bEpzJL+W)qcr4*&mMlIE^lB%|CU{Q@ z*Da3hsI|4VOpse3*k!W^6wVtNKB=Nb;8W6J5C_5NoJX|1mPyUW5yK+-T=GLV;!h`< z_g1yFK$$>d&IIw_ke&VpJc)O(w~fDW;ry-o6grTl<`w0*2QsjV;HdBGVn2#sRPC<>Z*&wClr(%UBdb4ZL=14Yc1Zv>b>BFNMQ+T77 zY8sKbE(BEMOr^mg1V}ukPI`fd&D%WWvrbk?|ksOi^ zycYSxi@gukg)yh%4}=PHf~2HmS9m-w_5&8;c4PeGI56^IPvp)C?pZE|e*6q+HVLXp z-UPxT#DEH(stqaZU(nZZ?Kdm~Apl{uUtl((uNE@V=*o>Ion*Z(UI6J7oNP$^BX!eQ z2ri=cb5lZ9)g`rTXUqLT2o;FXu|HjkkGGa+qbdj84z`J;`%mDpTKj#_x2lRoZ1=qd zE-9#mykXpFYGb8J$hbn=M=*kOY8cxJqgEPM*{?*7yT#^x~gwy6!7P22(hFyJbR zlB+gs%4*`{m{#^MG7gDen*Kd8sVcK4N-oKcm*O&XQ69m{pVDAAspMx#ZRN&Mm=l`( zxDffMi}WvJE<%g_ys8Nxq!{UH`Hn!>&g$j~Cs67rcbVL`jCzbE1HDe45fW7zIE3!W zrfy9_Qc+o9j59a(sXicmN6MVB_D!xO@#AEEmz9b6j2@*lZ&GRAlwy>)1JLsd!B(>P zS^ALd11qRju_Z7BYiTM)Ajx-0-yzF+@h}B+nA}MiUu4%i)*BE&NnS*Dxi$cVbZ@(K zuivBNA5~1|1fW6H6bE-If;oRBdJ(0ia;h}Fb^Wd{Q}dHguFkVKjl|@D`*u05NTocR zzBoc6JR~C4k_pYciMiAg_6CVZVWE0|JoukA-Z~1{TP zE*p)Ib3Vd7xG+~&fl2ME+%{|ok@p?+&M8TP6Xn7Xg0K8@KB# zVdvjBB#cL5QH{v?NB5hzVDF4$43yKjBGKA!55DxV0!|7N7X2S^pJL1c-zJEN-eAwF z*zR*|-1JOdkH@;FFHx2}WEEiXWJ(|E9Z)DtFV_6K6|En%55qG|5GOCstmpD?897$k z3!|6U4tmFyo?A`lZ4ChM!fy*I%$#>3>%CR?-+2tLgX#nGXDrG=5*mD*fbqB^LOxoi z+|*o+fCjj29sRV{H$}39i{OKFp|`lcD+Alf?aZay1cuP=2PBgqHCYKLv#DjEfWH)| zrOvzNL@QA_Z$#4*PJ#PF-7!wzZG#p@BFFyRq)%=P)piG26)p{-o4o=Ed4R?t=HBbC z(AMREF{u-kOxTWmzX3VSr%(8ebB4*E`zw#I=fOC9pkV+V%NZ{g66D;(olc@oFV-Ry zX74&Er3Ie{zRB7nTg>yYPyx4;zU|q&qm8%155j6~>SR6+Jo?7@Eo33sR;!x;Q?TxRA#JiwIUvtntotBsibco0;|@S zFV%{QTE#R?zEFQj+V;pNrMm=pM6W40yL1aKu8DJB{L_Wj()wZ9VZOqs=GRk!=ACQO zxCCSoKriDGdHU!qfQv`r(0H|SIl!c~VF*Uhh^^=4=%9W1v?F3Y^g(YZ1TLkyfZ2oS z=cHIze?241i89lA;I68HtgjpUGIsgMD-DtbMqqbhVEKBXcMj`9CpW@r3u{LE+bJn# z+oAiL8fhtya>R_Y^fdChL(C9|7)#aa^Zoca?|I~k?0k2UopP@Dhj-40q(5zV?!s^X zc1@)#*I`MJ0DV*if@mXzqhA7ufpmfDEfzgF5O8UL*W{xx1{AzklaRJBE}471E5*Aj)=y z_4Y(%uK%|w1rNB6N8Q@b0*YwCd@}EErX_Z$*x=$hBp@V~I&^H?aQcS+MVTW@Y0e`J zSds#6Wgj4Y^52S#a;cFSM>IpBdoM30E0UE29Q{^Wp_mdN+DnlC+~x`2%e~hJUAw z*SDyxfAc3QCip0sVV+u!nRP8KbtwCqiqs>r^0X%eA6#ri1Qs~;k3p&pP+$iX%NiFV zFiz!0ZjC~~lM_{FgY@EBDlt&WE$Z}#p3{B;klz#@9r}q}j)2i7jkag`*yY_WSK6-Y zpB0a&@AzaVeeDdAw%kA9v%75#=X?@DQV6bZ2z2-m zUBZv0u3u5?)Zq~?q?K%ySJ3EhX|}e7Fe@SpGYSEkZL%Wm1I+N*^$ODT<6JwC`GM$a znL@Ioe5JgYtOZV#qeIwj`YCGo%cxMB#USouDO@r+gF*4Av30}E`ZDTf2pJ~b+@jfm z7T?osk}fXroM_TBCOn^1k&mu~+Qqap7|oHL#> ze)LZGh(e#|tCMyJ>ohj9pKQC_{GL5#B2w7jo=Gmn9(R$87BR<+IYonxreCr!Pu9OK2kjxYT1X>(_S#1-bs% z;Fn@52I9c(h+)^)W98?_Hnf#&@t2ogAGZ+qadkmGa#3>CZHnJyf$ziju6Q^Mg=#lT zCfs(Qp6VM1H9tEB)@N3z%;EZli0D%UDmqiQdI ztbvc=1!5QW`z<2=4*WBD+7ynG4TM8;`rGohtFg`E8yBW?SFb!u8Y!H=eiRuC2^xfa zs^T1`06=vbYkbMPXA0r6`)18BWP1BTBqz@r9bwk;h``k5jQLcelS(CUK#69gS|wIC z{vJp*W<7wTXQHT!JXcqXakPE(_k0K9k7{lT91CXtj@9OS?y%n=7&HR|{yNmEmB+uS z1QPV`=d(zDfEW55BPGaQ$}oYEQf;6Jqc1Se1Qty*k7z=glaMGD_AbVFF;St2FRAJ6 z_0`fKI^3U)5XEyDkv1AUasl6zct3%2k3FRlJC!;6M)6M*OP?V0ZdDKSHq4*A~Oy(fA0rN;bk>ccr6Q!-Rxpvj$%_0ti{dzUtZVUnB*JSGE zJT1$VIzkn%g<1`fnssg(@tN{9cb@OMbk8516(MZ(9s1 zKC%IdT~fUn)PkF+0rNV49Ba~uxBJsYFv7FTQTgi&55am|+KYc?hpi>kQEd=-?LdauVE8UHPW{qnBx3MZlJ0l1J|7s+o^2wVh3*uKg+2Ju=zvC24O{(g%X z=FcQE71`OAwrpj#Z<&h(39}*bM+M^Kwg5Pgi$Q*7?EcF{5GK}<=LjI5E!T9UF&Rn>IjlF1vH}f%f zACpUZ2LR4qUD3IkkV{teYJLTg)baNTT(c85Xe5)RkX{*IyW{&K>Vq?n_ZEopw!TfU zA6QKWTv*LnD}^T3aJbn=i)t1VpLZ42Y2Gn0+6b>9)jp1S3q2`iWAa>2iEjOpGk zj=_t=V#R@~S9i*4IXAYfri}|@w(#xV!a4*89$3J>1iC((0ws(-V4lFF0d9j#bLm35 zs?i;dD&{ll@EU`wMuUKYP64zpmxPPR_3F&I9-TyL9yO0l|8ZoqtBwXdS^|Fc;m3(9 zLOe+Z*CjEgX|PLsG|4iFb;?4p{T>P;KK3_R;i!*BZ$|le<5rJHPAktNg5nBpp)|(Q zIKkG3AmpZ9H@@h~N*U5wj-Xlo+x&`j-B0xYZRKFVq@FMN>Yu^>+L{RapH>bhcPrz6 zdN|q>x-Hfi5JE1!p>jR*>T`1`8yqQUKz}KGZD~ugUZ%#76vLuFFNaiA9u-+)hQl!#do?#$rf#kr@kA!Ls3TsTAl93>#{=akE}b#G@Jtb* zOz3TS&2KR-okId^$4vvN!YCoKf1br=RSiEYuuqHY)iY*~FY>4(r}6PjrLR+DbM7V0 z?@^v{tx^J|fcrgQ@tv{a=o?~Y*hK8K#WFWFA<<$~KY`V6z{ZaC)AD-NByR2RWo$1S zX}Luzxz1m6QwC%*DSE3GV>E2j$wYj@*7R#cI$ zg?p9PlwGJ*c&s@Mz8G1l=g&-!XQCRjk*&o-QL4@kpgdL+@w#W|^C#0hHj4ar!`n#C z)t!T#S-fMoT3OEb|6hL&#*3J86&L{U^`-cq{ia|2IZkHA*2e#y|Jk^9t|1$Ee4uV2)Qd6RC ztpcd%iSE|Dc>J|V&3Hp3pq`T?>-yHm_X+p#q#m46SX@M6B%xtnt3o4;gb&^fsiB-Z z@S`9(IKM~I4^v)Z^%s)4_Ug+P)rK-+I3)0mh|Nv|DrJ(*O2eH#_UI@NDdZZqJ)$9) zG~$YX*I*Z$iA%ex#(%>KNFou1sN(1orrKI zATv8!?Tt+*I(4pe2kt%EZ6?!hM}d|eyT6-4o<3;GnGPs{pr(wrwX#+os9s}z4j}pb zo;`NMM)flEcTKq}|1rB>p>}lvd`Ww{xqRW0j@s7P#h!BYPK_lO5tVQoY*~K_6Z10y^$6g&FBZVgytZUz-jBp&w@Y4e*ZbrmWQ-=EF z!r(|`pz5;PTuT&6bdYRMcC7j5+V-wZ(C%Cjvn&zrYc7s(ghAb8k|Lx|d8`$zeb?}a z-DZJEW@Xvs6ay2vgo`QBNQL%sC!_3y#Xyu0yU%8n>6Nt`OPbV<6SPk7D?Zx>E|25` zY8!H?N`S@012+@%jkFo7@pJ`P2vcz1LJfKm;Q>9dfU!%k zKn-;|9ucMr8^SACjK9&zB zBpOG$k85M_=!Sl|*yV+yzxUrFe=HRWON$Zjl%jG+8oS5HqacPV+C_y}5y3GWoue(I zDnL#~y;u?E2(2?+mn7+`b|0&;7(44Fi#44|($YSF@pNXL##XZ2pP{yN=2AvOv-Eqq z$__K34=jkq;uq<`AYd4Ct~4|$AcoQZRHzh3V5FPm(h-uTw3uDVEM}aYR!Tj)<0|nJ0^4YQb?mHBf#sB}fY@Q} zIA{6K30BDa=hfbBMXJ{X9z0sW+U*!o#ll^^c=y0+vE78Y^}3b)y5_55de1h9vF=n; z@zt_H2ksR}7~e1XLCDt_{&A+9@)mGAbA%N~BMxb16EtTXy|FlY zheyiCK&x)bGG_*lNBb*1LUSZKP|cL4iJ+StF`sw8L2MO)rhn;{mg1oHXi9MnqfkAp zz_Qx-Exq0^lBcs53K*G&fd`<(jW~@+`4m*gUx5u2V0a|p4~Lf!wN=aRr{vS1uS}OA z71AE8aVnXy#!XLdxcR7|`*R^RduwujbhXKX>fVkDuBaHaa1980fEBfaW@nr0pqDDk zjaZQ9;y2%|i`L#0I8ivz5#fd8GevgRqVyaJr@Mq?)zAYmAYaG-E2+Mn-M@-<%bouHLBrRN0(hOj6W%FviWT{( zUx^f;sbibYsB<*zW9B7S4oc3)0#nztFgrSXGc`1=)U%xupFXJYq_|T)%MY^*sLh=D#O=TC#)hpwwE8zf1n) zmwb7VeJZb9YR!Yo3q2gOFTB5aA?{g|zxT##rw$pho{oxSIk;cykzC0B%~Op8m%hlj z*r+CNY$BhTT|8~CR*T&7l(}7QmPzcl7fNj5^$6Ve<3}4W!@>v2#S?#bq&{3TMDe1ggDnzram@kdEsHU=9wAMbjuUH3uO|JSs=UvdNVbSLz`I=I8UhD*AB zF-PBPo7}F_#Y!PB*^ZU=^A^tEvFPpJ({4L;L)_1wcdY+;zMWmK`=^fo{etJm=RJte zkE~+~@MdHZVa9!w1~8_;prH{&VL4a>-2n8nARyWq7#bQIfU%EsG6cFt^dl1x8YcmB zA?^bd(9J?_ zHXuxxZ2>U>-ibihirzRyXf3b;YejFWq8or-_#q4kbih&+qH9Ob=m_mg9Fep`vweU! SD;r1!7Z5f8 literal 0 HcmV?d00001 diff --git a/Automation/src/GSoC-Organizations-Data/scraper.py b/Automation/src/GSoC-Organizations-Data/scraper.py new file mode 100644 index 00000000..12437aaa --- /dev/null +++ b/Automation/src/GSoC-Organizations-Data/scraper.py @@ -0,0 +1,53 @@ +from selenium import webdriver +from selenium.webdriver.support.ui import WebDriverWait +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.common.by import By +from selenium.common.exceptions import TimeoutException +import time +import xlsxwriter + +browser = webdriver.Firefox() +url="https://summerofcode.withgoogle.com/organizations/?sp-page=5" +browser.get(url) + +delay = 5 + +try: + elms = WebDriverWait(browser, delay).until(EC.presence_of_element_located((By.CLASS_NAME, 'organization-card__container'))) + print("Page is ready!") + html=browser.page_source + workbook = xlsxwriter.Workbook('results.xlsx') + worksheet = workbook.add_worksheet() + row = 1 + col = 0 + bold = workbook.add_format({'bold': True}) + worksheet.set_column(0, 2, 70) + worksheet.set_column(3, 3, 150) + worksheet.write(0, 0, 'ORGANISATION NAME', bold) + worksheet.write(0, 1, 'TECHNOLOGIES', bold) + worksheet.write(0, 2, 'TOPIC CATEGORY', bold) + worksheet.write(0, 3, 'TOPIC NAMES', bold) + + orgs = browser.find_elements_by_class_name('organization-card__container') + for org in orgs: + org.click() + elms = WebDriverWait(browser, delay).until(EC.presence_of_element_located((By.CLASS_NAME, 'organization__tag--topic'))) + org_name = browser.find_element_by_class_name('organization-card__title').text + worksheet.write(row, col, org_name) + tech_tags = browser.find_elements_by_class_name('organization__tag--technology') + tags_text = '' + for tag in tech_tags: + tags_text += tag.text + ',' + worksheet.write(row, col+1, tags_text) + topic_cat = browser.find_element_by_class_name('organization__tag--category').text + worksheet.write(row, col+2, topic_cat) + topics = browser.find_elements_by_class_name('organization__tag--topic') + topics_text ='' + for topic in topics: + topics_text += topic.text + ',' + worksheet.write(row, col+3, topics_text) + row += 1 + workbook.close() + +except TimeoutException: + print("Loading took too much time!") \ No newline at end of file From 406e16691d558e09b2300050a17fbf235f8b024f Mon Sep 17 00:00:00 2001 From: rocky <41517713+Gateway2745@users.noreply.github.com> Date: Sat, 5 Oct 2019 19:23:08 +0530 Subject: [PATCH 2/3] add readme.md --- Automation/src/GSoC-Organizations-Data/README.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 Automation/src/GSoC-Organizations-Data/README.md diff --git a/Automation/src/GSoC-Organizations-Data/README.md b/Automation/src/GSoC-Organizations-Data/README.md new file mode 100644 index 00000000..0bb19f8d --- /dev/null +++ b/Automation/src/GSoC-Organizations-Data/README.md @@ -0,0 +1,15 @@ +# GSoC 2018 Organizations Data +## What this script does +This python script retrieves all the organizations names, the technologies they use for their open source projects, topic categories and topic names given in this +[page](https://www.google.com) and converts it into a nice excel file. + +## Libraries Used +#### Selenium +#### XlsxWriter + +## Instructions +Run the following command in your terminal +```python +python scraper.py +``` + From a27a1dee19569de7a629daee5435a2f6bc4fc0c3 Mon Sep 17 00:00:00 2001 From: rocky <41517713+Gateway2745@users.noreply.github.com> Date: Sat, 5 Oct 2019 19:24:45 +0530 Subject: [PATCH 3/3] update link in readme --- Automation/src/GSoC-Organizations-Data/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Automation/src/GSoC-Organizations-Data/README.md b/Automation/src/GSoC-Organizations-Data/README.md index 0bb19f8d..1223c311 100644 --- a/Automation/src/GSoC-Organizations-Data/README.md +++ b/Automation/src/GSoC-Organizations-Data/README.md @@ -1,7 +1,7 @@ # GSoC 2018 Organizations Data ## What this script does This python script retrieves all the organizations names, the technologies they use for their open source projects, topic categories and topic names given in this -[page](https://www.google.com) and converts it into a nice excel file. +[page](https://summerofcode.withgoogle.com/archive/2018/organizations/) and converts it into a nice excel file. ## Libraries Used #### Selenium