Skip to content

Commit

Permalink
Identify GCI users in repository information
Browse files Browse the repository at this point in the history
This begins to identify users in several meta repository stats,
including stargazers, watchers and forks.

Closes #100
  • Loading branch information
andrewda committed Dec 23, 2017
1 parent ed4f74e commit 86560eb
Show file tree
Hide file tree
Showing 4 changed files with 147 additions and 913 deletions.
30 changes: 30 additions & 0 deletions lib/queries/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
module.exports.REPO_INFO_QUERY = `
query($org: String!) {
organization(login: $org) {
repositories(first: 100) {
nodes {
watchers(first: 100) {
nodes {
login
name
}
}
stargazers(last: 100) {
nodes {
login
name
}
}
forks(last: 100) {
nodes {
owner {
login
}
createdAt
}
}
}
}
}
}
`
99 changes: 93 additions & 6 deletions lib/scrape.js
Original file line number Diff line number Diff line change
@@ -1,16 +1,22 @@
const fetch = require('node-fetch')
const GraphQL = require('graphql-client')
const chattie = require('chattie')
const fs = require('fs')
const json2yaml = require('json2yaml')
const validUsername = require('valid-github-username')
const wdk = require('wikidata-sdk')

const { REPO_INFO_QUERY } = require('./queries')

const GH_BASE = 'https://github.com'
const GH_USER_BASE = `${GH_BASE}/users`
const GH_ORG_BASE = `${GH_BASE}/orgs`
const GH_API_BASE = 'https://api.github.com'
const GH_GQL_BASE = 'https://api.github.com/graphql'
const GCI_API_BASE = 'https://codein.withgoogle.com/api'

let COMPETITION_OPEN

const MIN_SEARCH_SCORE = 10

// The time to cache GitHub usernames for in milliseconds
Expand All @@ -32,6 +38,13 @@ const GH_API_OPTIONS = {
: {},
}

const GH_GQL_OPTIONS = {
url: GH_GQL_BASE,
headers: process.env.GITHUB_TOKEN
? { Authorization: `bearer ${process.env.GITHUB_TOKEN}` }
: {},
}

const GH_WEB_OPTIONS = {
headers: {
Accept: 'text/html',
Expand All @@ -52,6 +65,8 @@ try {
console.log('No existing data...')
}

const client = GraphQL(GH_GQL_OPTIONS)

async function fetchProgram() {
const res = await fetch(`${GCI_API_BASE}/program/2017/`)
return await res.json()
Expand All @@ -69,6 +84,69 @@ async function fetchLeaders(id) {
return leaders
}

let repositoryInfo = {}
async function fetchRepositoryInfo(org) {
if (repositoryInfo[org]) return repositoryInfo[org]

const { data } = await client.query(REPO_INFO_QUERY, { org })

if (data) {
const info = data.organization.repositories.nodes.map(node => ({
watchers: node.watchers.nodes,
stargazers: node.stargazers.nodes,
forks: node.forks.nodes,
}))

repositoryInfo[org] = info

return info
} else {
return []
}
}

async function getGitHubUserFromRepoInfo(org, displayName, shortName) {
let repos = []
try {
repos = await fetchRepositoryInfo(org)
} catch (e) {
console.error(`Could not fetch repository info for ${org}...`)
}

let logins = []
let names = {}

repos.forEach(repo => {
logins = logins
.concat(repo.watchers.map(u => u.login.toLowerCase()))
.concat(repo.stargazers.map(u => u.login.toLowerCase()))
.concat(
repo.forks.map(u => {
const createdAt = new Date(u.createdAt)
if (createdAt.getTime() > COMPETITION_OPEN.getTime()) {
return u.owner.login.toLowerCase()
}
}).filter(login => login)
)

repo.watchers.forEach(watcher => {
if (watcher.name) {
names[watcher.name.toLowerCase()] = watcher.login
}
})
})

logins = logins.filter((item, pos, self) => self.indexOf(item) == pos)

if (logins.includes(shortName.toLowerCase())) {
return shortName
}

if (names[displayName.toLowerCase()]) {
return names[displayName.toLowerCase()]
}
}

async function checkGitHubUserExists(user) {
const res = await fetch(`${GH_BASE}/${user}`)
return res.status === 200
Expand Down Expand Up @@ -232,8 +310,15 @@ async function findGitHubUser(displayName, org) {

const shortName = validUsername(displayName)

const username = await findGitHubUserInOrg(displayName, org)
if (username) return username
const userFromRepo = await getGitHubUserFromRepoInfo(
org,
displayName,
shortName
)
if (userFromRepo) return userFromRepo

const userInOrg = await findGitHubUserInOrg(displayName, org)
if (userInOrg) return userInOrg

let user
try {
Expand All @@ -245,12 +330,9 @@ async function findGitHubUser(displayName, org) {

const login = user.login

const { competition_open_starts } = await fetchProgram()

const updatedTime = new Date(user.updated_at)
const openTime = new Date(competition_open_starts)

if (updatedTime.getTime() - openTime.getTime() < 0) return
if (updatedTime.getTime() - COMPETITION_OPEN.getTime() < 0) return

let orgs = []
try {
Expand Down Expand Up @@ -346,6 +428,8 @@ async function fetchOrgsWithData() {
const orgWiki = await Promise.all(fetchingWiki)

const fetchingAll = orgs.map(async (org, index) => {
await fetchRepositoryInfo(orgGitHub[index])

const existingOrg = existingData.find(existing => existing.id === org.id)
const fetchingUsers = orgLeaders[index].map(async user => {
let existingUser
Expand Down Expand Up @@ -390,6 +474,9 @@ async function fetchDates() {
}

;(async () => {
const { competition_open_starts } = await fetchProgram()
COMPETITION_OPEN = new Date(competition_open_starts)

const data = await fetchOrgsWithData()
const dates = await fetchDates()

Expand Down
Loading

0 comments on commit 86560eb

Please sign in to comment.