Skip to content

Commit

Permalink
Identify GCI users in repository information
Browse files Browse the repository at this point in the history
This begins to identify users in several meta repository stats,
including stargazers, watchers and forks.

Closes #100
  • Loading branch information
andrewda committed Dec 24, 2017
1 parent d6ec5df commit 3377081
Show file tree
Hide file tree
Showing 6 changed files with 204 additions and 944 deletions.
3 changes: 3 additions & 0 deletions lib/queries/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
const { loadQuery } = require('../utils')

module.exports.REPO_INFO_QUERY = loadQuery('repo_info')
28 changes: 28 additions & 0 deletions lib/queries/repo_info.graphql
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
query($org: String!) {
organization(login: $org) {
repositories(first: 100) {
nodes {
watchers(first: 100) {
nodes {
login
name
}
}
stargazers(last: 100) {
nodes {
login
name
}
}
forks(last: 100) {
nodes {
owner {
login
}
createdAt
}
}
}
}
}
}
126 changes: 117 additions & 9 deletions lib/scrape.js
Original file line number Diff line number Diff line change
@@ -1,21 +1,26 @@
const fetch = require('node-fetch')
const GraphQL = require('graphql-client')
const chattie = require('chattie')
const fs = require('fs')
const json2yaml = require('json2yaml')
const validUsername = require('valid-github-username')
const wdk = require('wikidata-sdk')
const cheerio = require('cheerio')

const { REPO_INFO_QUERY } = require('./queries')

const GH_BASE = 'https://github.com'
const GH_USER_BASE = `${GH_BASE}/users`
const GH_ORG_BASE = `${GH_BASE}/orgs`
const GH_API_BASE = 'https://api.github.com'
const GH_GQL_BASE = 'https://api.github.com/graphql'
const GCI_API_BASE = 'https://codein.withgoogle.com/api'

const MIN_SEARCH_SCORE = 10

// The time to cache GitHub usernames for in milliseconds
const GITHUB_CACHE_TIME = 2 * 24 * 60 * 60 * 1000
const BUST_GITHUB_CACHE = true

// P1482 is the Wikidata property for Stack Exchange tag
const WIKI_SE_TAG = 'P1482'
Expand Down Expand Up @@ -43,6 +48,13 @@ const GH_API_OPTIONS = {
: {},
}

const GH_GQL_OPTIONS = {
url: GH_GQL_BASE,
headers: process.env.GITHUB_TOKEN
? { Authorization: `bearer ${process.env.GITHUB_TOKEN}` }
: {},
}

const GH_WEB_OPTIONS = {
headers: {
Accept: 'text/html',
Expand All @@ -54,6 +66,10 @@ const GH_WEB_OPTIONS = {
compress: false,
}

const client = GraphQL(GH_GQL_OPTIONS)

let COMPETITION_OPEN

let existingData = []
try {
existingData = JSON.parse(
Expand All @@ -80,6 +96,71 @@ async function fetchLeaders(id) {
return leaders
}

let repositoryInfo = {}
async function fetchRepositoryInfo(org) {
if (repositoryInfo[org]) return repositoryInfo[org]

const { data } = await client.query(REPO_INFO_QUERY, { org })

if (data) {
const info = data.organization.repositories.nodes.map(node => ({
watchers: node.watchers.nodes,
stargazers: node.stargazers.nodes,
forks: node.forks.nodes,
}))

repositoryInfo[org] = info

return info
} else {
return []
}
}

async function getGitHubUserFromRepoInfo(org, displayName, shortName) {
let repos = []
try {
repos = await fetchRepositoryInfo(org)
} catch (e) {
console.error(`Could not fetch repository info for ${org}...`)
}

let logins = []
let names = {}

repos.forEach(repo => {
logins = logins
.concat(repo.watchers.map(u => u.login.toLowerCase()))
.concat(repo.stargazers.map(u => u.login.toLowerCase()))
.concat(
repo.forks
.map(u => {
const createdAt = new Date(u.createdAt)
if (createdAt.getTime() > COMPETITION_OPEN.getTime()) {
return u.owner.login.toLowerCase()
}
})
.filter(login => login)
)

repo.watchers.forEach(watcher => {
if (watcher.name) {
names[watcher.name.toLowerCase()] = watcher.login
}
})
})

logins = logins.filter((item, pos, self) => self.indexOf(item) === pos)

if (logins.includes(shortName.toLowerCase())) {
return shortName
}

if (names[displayName.toLowerCase()]) {
return names[displayName.toLowerCase()]
}
}

async function checkGitHubUserExists(user) {
const res = await fetch(`${GH_BASE}/${user}`)
return res.status === 200
Expand Down Expand Up @@ -189,6 +270,7 @@ async function findOrganization({
const searchResults = await searchGitHubOrgs(searchQuery)

if (searchResults.length > 0 && searchResults[0].score > MIN_SEARCH_SCORE) {
console.log('search', searchResults[0])
return searchResults[0].login
}

Expand Down Expand Up @@ -280,25 +362,43 @@ async function findGitHubUser(displayName, org) {

const shortName = validUsername(displayName)

const username = await findGitHubUserInOrg(displayName, org)
if (username) return username
let userInOrg
try {
userInOrg = await findGitHubUserInOrg(displayName, org)
} catch (e) {
console.error(`Failed to find user ${displayName} in org ${org}...`)
}
if (userInOrg) {
console.log(`${displayName}: ${userInOrg} (method: user in org)`)
return userInOrg
}

let user
try {
user = await getGitHubUser(shortName)
} catch (e) {
console.error(`Got error when finding user ${shortName}...`)
console.error(`Failed to find user ${shortName}...`)
}
if (!user) return

const login = user.login
if (!user) {
const userFromRepo = await getGitHubUserFromRepoInfo(
org,
displayName,
shortName
)

const { competition_open_starts } = await fetchProgram()
if (!userFromRepo) {
return
}

user = userFromRepo
}

const login = user.login

const updatedTime = new Date(user.updated_at)
const openTime = new Date(competition_open_starts)

if (updatedTime.getTime() - openTime.getTime() < 0) return
if (updatedTime.getTime() - COMPETITION_OPEN.getTime() < 0) return

let orgs = []
try {
Expand All @@ -309,7 +409,9 @@ async function findGitHubUser(displayName, org) {
} catch (e) {
console.error('Could not fetch user history...')
}

if (orgs.includes(org)) {
console.log(`${displayName}: ${user.login} (method: found user from name)`)
return user.login
}
}
Expand Down Expand Up @@ -339,7 +441,8 @@ async function freshenUserGitHubCache(user, existingUser, organization) {
existingUser &&
existingUser.github_updated &&
existingUser.github_account
)
) ||
BUST_GITHUB_CACHE
) {
return {
login: await findGitHubUser(user.display_name, organization),
Expand Down Expand Up @@ -394,6 +497,8 @@ async function fetchOrgsWithData() {
const orgWiki = await Promise.all(fetchingWiki)

const fetchingAll = orgs.map(async (org, index) => {
await fetchRepositoryInfo(orgGitHub[index])

const existingOrg = existingData.find(existing => existing.id === org.id)
const fetchingUsers = orgLeaders[index].map(async user => {
let existingUser
Expand Down Expand Up @@ -439,6 +544,9 @@ async function fetchDates() {
}

;(async () => {
const { competition_open_starts } = await fetchProgram()
COMPETITION_OPEN = new Date(competition_open_starts)

const data = await fetchOrgsWithData()
const dates = await fetchDates()

Expand Down
4 changes: 4 additions & 0 deletions lib/utils.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
const fs = require('fs')

module.exports.loadQuery = name =>
fs.readFileSync(`${__dirname}/queries/${name}.graphql`).toString()
Loading

0 comments on commit 3377081

Please sign in to comment.