Skip to content

Commit

Permalink
Merge pull request #6 from gwenwindflower/add-pre-commit
Browse files Browse the repository at this point in the history
  • Loading branch information
gwenwindflower authored Sep 24, 2023
2 parents cf8fe52 + b4d8165 commit 764a09d
Show file tree
Hide file tree
Showing 16 changed files with 115 additions and 75 deletions.
10 changes: 4 additions & 6 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,12 @@ jobs:
- name: Checkout
uses: actions/checkout@v4.1.0
- name: Setup Python
uses: actions/setup-python@v4.7.0
uses: actions/setup-python@v4.7.0
with:
python-version: '3.10.x'
python-version: "3.10.x"
- name: Install requirements
run: python3 -m pip install -r requirements.txt
- name: Load data-test
- name: Run EL
run: python3 el.py -lc
- name: Run transformations
- name: Run T
run: dbt deps && dbt build


28 changes: 28 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.3.0
hooks:
- id: check-yaml
exclude: reports/evidence.plugins.yaml
- id: end-of-file-fixer
- id: trailing-whitespace
- id: requirements-txt-fixer
- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: v0.0.291
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix]
- repo: https://github.com/PyCQA/isort
rev: 5.12.0
hooks:
- id: isort
- repo: https://github.com/psf/black
rev: 23.9.1
hooks:
- id: black
# - repo: https://github.com/sqlfluff/sqlfluff
# rev: 2.3.2
# hooks:
# - id: sqlfluff-fix
# additional_dependencies:
# ["dbt-metricflow[duckdb]~=0.3.0", "sqlfluff-templater-dbt"]
7 changes: 7 additions & 0 deletions .sqlfluff
Original file line number Diff line number Diff line change
@@ -1,8 +1,15 @@
[sqlfluff]
dialect = duckdb
templater = dbt

[sqlfluff:templater:dbt]
profiles_dir = .

[sqlfluff:indentation]
tab_space_size = 4

[sqlfluff:rules:capitalisation.keywords]
capitalisation_policy = lower

[sqlfluff:convention]
select_trailing_comma = required
3 changes: 3 additions & 0 deletions .sqlfluffignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
target/
dbt_packages/
macros/
12 changes: 6 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,35 +17,35 @@ There are some basic tasks included using my preferred task runner [Task](https:
<summary>macOS</summary>
<br>
Using Homebrew:

```shell
brew install go-task
```

</details>
<details>

<summary>Windows</summary>
<br>
Using Chocolatey:

```shell
choco install go-task
```

Using Scoop:

```shell
scoop install task
```

</details>
<details>

<summary>Linux</summary>
<br>
Using Yay:

```shell
yay -S go-task-bin
```
Expand Down
Binary file modified data-test/2023-09-22-9.json.gz
Binary file not shown.
19 changes: 10 additions & 9 deletions el.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import argparse
import os
import requests
from datetime import date, datetime, timedelta

import duckdb
from tqdm import tqdm
import requests
from halo import Halo
from datetime import datetime, date, timedelta
from tqdm import tqdm


def validate_date(date_str):
Expand Down Expand Up @@ -65,18 +66,18 @@ def load_data():
else:
spinner_text = "🦆💾 Loading data into DuckDB..."
connection = "./reports/github_archive.db"

spinner = Halo(text=spinner_text, spinner="dots")
spinner.start()

con = duckdb.connect(database=connection, read_only=False)
con.execute(
"""
CREATE SCHEMA IF NOT EXISTS raw;
CREATE OR REPLACE TABLE raw.github_events AS
CREATE OR REPLACE TABLE raw.github_events AS
SELECT * FROM read_ndjson(
"""
"'" + data_path + '/*.json.gz' + "',"
"'" + data_path + "/*.json.gz" + "',"
"""
columns={
'id': 'VARCHAR',
Expand All @@ -91,8 +92,8 @@ def load_data():
)',
'repo': 'STRUCT(id VARCHAR, name VARCHAR, url VARCHAR)',
'payload': 'JSON',
'public': 'BOOLEAN',
'created_at': 'TIMESTAMP',
'public': 'BOOLEAN',
'created_at': 'TIMESTAMP',
'org': 'STRUCT(
id VARCHAR,
login VARCHAR,
Expand Down
1 change: 0 additions & 1 deletion models/marts/pull_request_events.sql
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ unnest_json as (
->> '$.comments' as pull_request_comment_count,
payload -> '$.pull_request' -> '$.user' ->> '$.login' as user_login,
payload -> '$.pull_request' -> '$.user' ->> '$.id' as user_id,
payload -> '$.pull_request' -> '$.repo' ->> '$.id' as repo_id,
payload
-> '$.pull_request'
-> '$.repo'
Expand Down
12 changes: 6 additions & 6 deletions models/marts/repos.sql
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ distill_repos_from_events as (
repo_id,
repo_name,
repo_url,
max(event_created_at) as repo_state_last_seen_at,
max(event_created_at) as repo_state_last_seen_at

from {{ ref('stg_events') }}

Expand All @@ -28,8 +28,8 @@ rank_most_recent_repo_state as (
row_number() over (
partition by repo_id
order by repo_state_last_seen_at desc
) as repo_recency_rank,
) as repo_recency_rank

from distill_repos_from_events

),
Expand All @@ -39,10 +39,10 @@ pull_most_recent_repo_state as (
select
repo_id,
repo_name,
repo_url,
repo_url

from rank_most_recent_repo_state

where repo_recency_rank = 1

)
Expand Down
14 changes: 7 additions & 7 deletions models/marts/users.sql
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,23 @@ with
distill_user_states_from_events as (

select
{{
{{
dbt_utils.generate_surrogate_key([
'actor_id',
'actor_gravatar_id',
'actor_login',
'actor_display_login',
'actor_url',
'actor_avatar_url'
])
])
}} as user_state_uuid,
actor_id,
actor_gravatar_id,
actor_login,
actor_display_login,
actor_url,
actor_avatar_url,
max(event_created_at) as user_state_last_seen_at,
max(event_created_at) as user_state_last_seen_at

from {{ ref('stg_events') }}

Expand All @@ -37,10 +37,10 @@ rank_user_state_recency as (
actor_url,
actor_avatar_url,
row_number() over (
partition by actor_id
partition by actor_id
order by user_state_last_seen_at desc
) as user_state_recency_rank,
) as user_state_recency_rank

from distill_user_states_from_events

),
Expand All @@ -53,7 +53,7 @@ pull_most_recent_user_state as (
actor_login,
actor_display_login,
actor_url,
actor_avatar_url,
actor_avatar_url

from rank_user_state_recency

Expand Down
63 changes: 31 additions & 32 deletions models/staging/github/stg_events.sql
Original file line number Diff line number Diff line change
Expand Up @@ -2,44 +2,43 @@ with

source as (

select * from {{ source('github', 'github_events') }}
select * from {{ source('github', 'github_events') }}

),


renamed as (

select
--- event
id as event_id,
type as event_type,
public as is_event_public,
created_at as event_created_at,

--- actor
actor.id as actor_id,
actor.gravatar_id as actor_gravatar_id,
actor.login as actor_login,
actor.display_login as actor_display_login,
actor.url as actor_url,
actor.avatar_url as actor_avatar_url,

--- repo
repo.id as repo_id,
repo.name as repo_name,
repo.url as repo_url,

--- org
org.id as org_id,
org.login as org_login,
org.gravatar_id as org_gravatar_id,
org.url as org_url,
org.avatar_url as org_avatar_url,

--- payload
payload

from source
select
--- event
id as event_id,
type as event_type,
public as is_event_public,
created_at as event_created_at,

payload,

--- actor
actor['id'] as actor_id,
actor['gravatar_id'] as actor_gravatar_id,
actor['login'] as actor_login,
actor['display_login'] as actor_display_login,
actor['url'] as actor_url,
actor['avatar_url'] as actor_avatar_url,

--- repo
repo['id'] as repo_id,
repo['name'] as repo_name,
repo['url'] as repo_url,

--- org
org['id'] as org_id,
org['login'] as org_login,
org['gravatar_id'] as org_gravatar_id,
org['url'] as org_url,
org['avatar_url'] as org_avatar_url

from source

)

Expand Down
2 changes: 1 addition & 1 deletion reports/.evidence/customization/custom-formatting.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{
"version": "1.0",
"customFormats": []
}
}
2 changes: 1 addition & 1 deletion reports/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ node_modules
.vscode/settings.json

.duckdb
.db
.db
2 changes: 1 addition & 1 deletion reports/.npmrc
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
loglevel=error
audit=false
fund=false
fund=false
6 changes: 3 additions & 3 deletions reports/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ Check out the docs for [alternative install methods](https://docs.evidence.dev/g

```shell
npx degit evidence-dev/template my-project
cd my-project
npm install
npm run dev
cd my-project
npm install
npm run dev
```

Once you've launched Evidence, this project includes a short tutorial to help you get started.
Expand Down
9 changes: 7 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
black~=23.9.1
dbt-metricflow[duckdb]~=0.3.0
halo~=0.0.31
isort~=5.12.0
pre-commit~=3.4.0
requests~=2.31.0
ruff~=0.0.291
sqlfluff-templater-dbt~=2.3.2
sqlfluff~=2.3.2
tqdm~=4.66.1
halo~=0.0.31
sqlfluff~=2.3.0

0 comments on commit 764a09d

Please sign in to comment.