-
Notifications
You must be signed in to change notification settings - Fork 2
/
index.js
190 lines (156 loc) · 7.34 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
const axios = require('axios')
const cheerio = require('cheerio')
const fsp = require('fs').promises;
const path = require('path')
// // Code for scraping orgs from archives of 2016-to-2019
// // Function to get list of orgs from 2k16 to 2k19
// const getList2k16_19 = async () => {
// for(var k=16; k<=19; k++){
// const list = []
// const {data} = await axios.get(`https://summerofcode.withgoogle.com/archive/20${k}/organizations/`)
// const $ = cheerio.load(data)
// const element = await $('body > main > section > div > ul > li')
// const len = element.length
// for(i=1;i<=len;i++){
// //title
// const title = await $(`body > main > section > div > ul > li:nth-child(${i}) > a > md-card > div > h4`).text()
// //link
// const piece = await $(`body > main > section > div > ul > li:nth-child(${i}) > a`).attr('href')
// const link = 'https://summerofcode.withgoogle.com' + piece
// //get orgs techs
// const {data} = await axios.get(link)
// const nP = cheerio.load(data)
// const el = await nP('div[class="org__meta"] > div:nth-child(4) > ul > li')
// const len2 = el.length
// const list2 = []
// list.push({title, link})
// for (var j=1; j<=len2; j++){
// const el2 = await nP(`div[class="org__meta"] > div:nth-child(4) > ul > li:nth-child(${j})`)
// const txt = await el2.text()
// list2.push(txt)
// }
// list[i-1].techStack = list2
// console.log(list[i-1])
// }
// let fileData = await fsp.readFile(path.resolve(__dirname, 'orgs.json'));
// let obj = JSON.parse(fileData);
// obj[`list20${k}`] = list
// await fsp.writeFile(path.resolve(__dirname, 'orgs.json'), JSON.stringify(obj));
// }
// }
// getList2k16_19()
// Code for scraping orgs from archives of 2016-to-2019
// Function to get list of orgs from 2k16 to 2k19
const getList2k16_19 = async () => {
for (var k = 19; k <= 19; k++) {
const list = []
const { data } = await axios.get(`https://summerofcode.withgoogle.com/archive/20${k}/organizations/`)
const $ = cheerio.load(data)
const element = await $('body > main > section > div > ul > li')
const len = element.length
//console.log(element)
for (i = 1; i <= len;i++) {
//title
const title = await $(`body > main > section > div > ul > li:nth-child(${i}) > a > md-card > div > h4`).text()
//link
const piece = await $(`body > main > section > div > ul > li:nth-child(${i}) > a`).attr('href')
const link = 'https://summerofcode.withgoogle.com' + piece
var orgLogo = await $(`body > main > section > div > ul > li:nth-child(${i}) > a > md-card > org-logo`).attr('data')
//console.log(orgLogo)
orgLogo = orgLogo.slice(16,orgLogo.length)
var orgLogoUrl = '';
for(let o = 0; o <= orgLogo.length;o++){
if(orgLogo[o] === "'"){
break
}
else{
orgLogoUrl = orgLogoUrl + orgLogo[o]
}
}
// console.log(orgLogoUrl)
//get orgs techs
const { data } = await axios.get(link)
const nP = cheerio.load(data)
//console.log(nP);
//get orgs Link
const orgLink = await nP('a[class="org__link"]').attr('href')
//get orgIdeaLink
//const orgIdeaLink = await nP('a[class="md-button md-soc-theme md-ink-ripple"]').attr('href')
const el = await nP('div[class="org__meta"] > div:nth-child(4) > ul > li')
const len2 = el.length
//console.log(el.length);
//get Organisations Projects
const projectSelector = await nP('section[id="projects"] > div > ul > li')
const projectLength = projectSelector.length
const projects = []
for (var l = 1; l <= projectLength; l++) {
const projecTitle = await nP(`section[id="projects"] > div > ul > li:nth-child(${l}) > md-card > div:nth-child(1) > div`)
const projectTitleText = await projecTitle.text()
const projecLink = await nP(`section[id="projects"] > div > ul > li:nth-child(${l}) > md-card > div:nth-child(1) > h5 > a`).attr('href')
const projectLinkText = 'https://summerofcode.withgoogle.com' + projecLink
const { data } = await axios.get(projectLinkText)
const nP2 = cheerio.load(data)
const projectContent = await nP2('div[class="org__long-description"] > p')
const projectContentText = await projectContent.text()
const projectCode = await nP2('div[class="org__meta"] > div:nth-child(1) > a').attr('href')
const project = {
title: projectTitleText,
projectLink: projectLinkText,
content: projectContentText,
projectCode: projectCode
}
projects.push(project)
}
const list2 = []
list.push({ title, orgLink, projects, orgLogoUrl })
for (var j = 1; j <= len2; j++) {
const el2 = await nP(`div[class="org__meta"] > div:nth-child(4) > ul > li:nth-child(${j})`)
const txt = await el2.text()
list2.push(txt)
}
list[i - 1].techStack = list2
console.log(list[i - 1])
}
let fileData = await fsp.readFile(path.resolve(__dirname, 'orgs19.json'));
let obj = JSON.parse(fileData);
obj[`list20${k}`] = list
await fsp.writeFile(path.resolve(__dirname, 'orgs19.json'), JSON.stringify(obj));
}
}
getList2k16_19()
// Code for scraping orgs from archives of 2009-to-2015
// Function to get list of orgs from 2k09 to 2k15
const getList2k09_15 = async () => {
for(var k=9;k<=15;k++){
const list = []
let elData
if(k==9){
const {data} = await axios.get(`https://www.google-melange.com/archive/gsoc/200${k}`)
elData = data
} else{
const {data} = await axios.get(`https://www.google-melange.com/archive/gsoc/20${k}`)
elData = data
}
const $ = cheerio.load(elData)
const element = await $('div.main > ul.mdl-list > li')
const len = element.length
for(i=1;i<=len;i++){
//title
const title = await $(`div.main > ul.mdl-list > li:nth-child(${i}) > span > a`).text()
//link
const piece = await $(`div.main > ul.mdl-list > li:nth-child(${i}) > span > a`).attr('href')
const link = 'https://www.google-melange.com' + piece
await list.push({title, link})
console.log(list[i-1])
}
let fileData = await fsp.readFile(path.resolve(__dirname, 'orgs.json'));
let obj = JSON.parse(fileData);
if(k==9){
obj[`list200${k}`] = list
}else{
obj[`list20${k}`] = list
}
await fsp.writeFile(path.resolve(__dirname, 'orgs.json'), JSON.stringify(obj));
}
}
// getList2k09_15()