Skip to content

Commit

Permalink
Merge pull request #35 from prrao87/zstd-parquet
Browse files Browse the repository at this point in the history
Zstd parquet
  • Loading branch information
prrao87 authored Dec 13, 2023
2 parents ba0f1e0 + 61c8d17 commit 8719536
Show file tree
Hide file tree
Showing 11 changed files with 7 additions and 11 deletions.
2 changes: 1 addition & 1 deletion data/create_edges_follows.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def main() -> None:
edges_df = edges_df.head(NUM)
print(f"Limiting edges to {NUM} per the `--num` argument")
# Write nodes
edges_df.write_parquet(Path("output/edges") / "follows.parquet", compression="snappy")
edges_df.write_parquet(Path("output/edges") / "follows.parquet")
print(f"Wrote {len(edges_df)} edges for {len(persons_df)} persons")


Expand Down
2 changes: 1 addition & 1 deletion data/create_edges_interests.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def main() -> None:
print(f"Limiting edges to {NUM} per the `--num` argument")
# Write nodes
edges_df = edges_df.rename({"id": "from", "interests": "to"})
edges_df.write_parquet(Path("output/edges") / "interests.parquet", compression="snappy")
edges_df.write_parquet(Path("output/edges") / "interests.parquet")
print(f"Wrote {len(edges_df)} edges for {len(persons_df)} persons")


Expand Down
1 change: 0 additions & 1 deletion data/create_edges_location.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ def main() -> None:
# Write nodes
edges_df = edges_df.rename({"city_id": "to", "id": "from"}).write_parquet(
Path("output/edges") / "lives_in.parquet",
compression="snappy",
)
print(f"Generated residence cities for persons. Top 5 common cities are: {', '.join(top_5)}")

Expand Down
2 changes: 1 addition & 1 deletion data/create_edges_location_city_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def main() -> None:
.rename({"city_id": "from", "state_id": "to"})
)
# Write nodes
edges_df.write_parquet(Path("output/edges") / "city_in.parquet", compression="snappy")
edges_df.write_parquet(Path("output/edges") / "city_in.parquet")
print(f"Wrote {len(edges_df)} edges for {len(cities_df)} cities")


Expand Down
2 changes: 1 addition & 1 deletion data/create_edges_location_state_country.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def main() -> None:
.rename({"state_id": "from", "country_id": "to"})
)
# Write nodes
edges_df.write_parquet(Path("output/edges") / "state_in.parquet", compression="snappy")
edges_df.write_parquet(Path("output/edges") / "state_in.parquet")
print(f"Wrote {len(edges_df)} edges for {len(states_df)} states")


Expand Down
1 change: 0 additions & 1 deletion data/create_nodes_interests.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ def main(filename: str) -> pl.DataFrame:
# Write to csv
interests_df.select(pl.col("id"), pl.all().exclude("id")).write_parquet(
Path("output/nodes") / "interests.parquet",
compression="snappy",
)
print(f"Wrote {interests_df.shape[0]} interests nodes to parquet")
return interests
Expand Down
1 change: 0 additions & 1 deletion data/create_nodes_location.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ def write_city_nodes(cities_of_interest: pl.DataFrame) -> pl.DataFrame:
# Write to csv
city_nodes.select(pl.col("id"), pl.all().exclude("id")).write_parquet(
Path("output/nodes") / "cities.parquet",
compression="snappy",
)
print(f"Wrote {city_nodes.shape[0]} cities to parquet")
return city_nodes
Expand Down
1 change: 0 additions & 1 deletion data/create_nodes_person.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ def main() -> None:
# Write nodes
persons_df.select(pl.col("id"), pl.all().exclude("id")).write_parquet(
Path("output/nodes") / "persons.parquet",
compression="snappy",
)
print(f"Wrote {persons_df.shape[0]} person nodes to parquet")

Expand Down
2 changes: 1 addition & 1 deletion kuzudb/benchmark_query.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
Use the `pytest-benchmark` library to more formally benchmark the Neo4j queries with warmup and iterations.
Use the `pytest-benchmark` library to more formally benchmark the Kùzu queries with warmup and iterations.
`pip install pytest-benchmark`
"""
import pytest
Expand Down
2 changes: 1 addition & 1 deletion kuzudb/query.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
Run a series of queries on the Neo4j database
Run a series of queries on an existing Kùzu database
"""
from typing import Any

Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ faker~=19.12.0
polars~=0.19.0
numpy>=1.25.0
pyarrow~=13.0.0
kuzu==0.0.11
kuzu==0.0.12
neo4j~=5.13.0
python-dotenv>=1.0.0
codetiming>=1.4.0
Expand Down

0 comments on commit 8719536

Please sign in to comment.