Skip to content

Commit

Permalink
scrape.js: Search org for user
Browse files Browse the repository at this point in the history
This searches the user's organization for their display name. If
the user is found, their username will be linked.

Closes coala#8
  • Loading branch information
andrewda committed Dec 12, 2017
1 parent fd15318 commit 4ecec91
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 6 deletions.
1 change: 1 addition & 0 deletions .coafile
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ bears = LineLengthBear

[all.links]
bears = InvalidLinkBear
link_ignore_regex = (github\.com|\{|\$)

[js]
files = lib/**/*.js, static/js/**/*.js
Expand Down
40 changes: 34 additions & 6 deletions lib/scrape.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ const fs = require('fs')
const json2yaml = require('json2yaml')

const GH_USER_BASE = 'https://github.com/users'
const GH_ORG_BASE = 'https://github.com/orgs'
const GH_API_BASE = 'https://api.github.com'
const GCI_API_BASE = 'https://codein.withgoogle.com/api'

Expand All @@ -19,12 +20,23 @@ const CHAT_IMAGES = {
OTHER: 'static/images/chat.png',
}

const GITHUB_OPTIONS = {
const GH_API_OPTIONS = {
headers: process.env.GITHUB_TOKEN
? { Authorization: `token ${process.env.GITHUB_TOKEN}` }
: {},
}

const GH_WEB_OPTIONS = {
headers: {
Accept: 'text/html',
'Accept-Encoding': 'utf8',
'Accept-Language': 'en-US,en;q=0.9',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0',
},
compress: false,
}

async function fetchProgram() {
const res = await fetch(`${GCI_API_BASE}/program/2017/`)
return await res.json()
Expand All @@ -45,7 +57,7 @@ async function fetchLeaders(id) {
async function searchGitHubOrgs(query) {
const res = await fetch(
`${GH_API_BASE}/search/users?q=${query}%20type:org`,
GITHUB_OPTIONS
GH_API_OPTIONS
)
const { items } = await res.json()
return items || []
Expand Down Expand Up @@ -97,7 +109,7 @@ function findMatches(input, pattern) {
}

async function getGitHubUser(user) {
const res = await fetch(`${GH_API_BASE}/users/${user}`, GITHUB_OPTIONS)
const res = await fetch(`${GH_API_BASE}/users/${user}`, GH_API_OPTIONS)
let response = await res.json()
if (response && response.message) {
response = undefined
Expand Down Expand Up @@ -155,14 +167,17 @@ async function findOrganization({
async function findGitHubUser({ display_name }, org) {
if (!org) return

display_name = display_name.replace(/ /g, '')
const shortName = display_name.replace(/ /g, '')

const username = await findGitHubUserInOrg(display_name, org)
if (username) return username

const displayNamePattern = /^[a-zA-Z0-9-]{1,39}$/

const displayNameMatches = displayNamePattern.exec(display_name)
const displayNameMatches = displayNamePattern.exec(shortName)
if (!displayNameMatches) return

const user = await getGitHubUser(display_name)
const user = await getGitHubUser(shortName)
if (!user) return

const login = user.login
Expand All @@ -183,6 +198,19 @@ async function findGitHubUser({ display_name }, org) {
}
}

async function findGitHubUserInOrg(user, org) {
const pattern = new RegExp(
'<a class="css-truncate-target f4" href="/([a-zA-Z0-9-]{1,39})">'
)
const res = await fetch(
`${GH_ORG_BASE}/${org}/people?query=${user}`,
GH_WEB_OPTIONS
)
const body = await res.text()
const match = pattern.exec(body)
return match ? match[1] : null
}

async function fetchOrgsWithData() {
const orgs = await fetchOrgs()
const fetchingLeaders = orgs.map(org => fetchLeaders(org.id))
Expand Down

0 comments on commit 4ecec91

Please sign in to comment.