Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

replace UUIDs, implement batch insert #7

Merged
merged 6 commits into from
Jul 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 37 additions & 11 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ version = "0.1.0"
edition = "2021"

[dependencies]
rusqlite = { version = "0.30.0", features = ["bundled", "uuid"] }
rusqlite = { version = "0.30.0", features = ["bundled", "limits"] }
rusqlite_migration = { version = "1.1.0", features = ["from-directory"] }
uuid = { version = "1.8.0", features = ["v4"] }
rand = "0.8.5"

# SeaHash chosen due to:
# - widely used
Expand Down
149 changes: 104 additions & 45 deletions migrations/01-init/up.sql
Original file line number Diff line number Diff line change
@@ -1,76 +1,135 @@
-- See `src/report/models.rs` for complete, up-to-date schema documentation.

-- TODO: Measure size/perf impact of making this table `WITHOUT ROWID`
CREATE TABLE source_file (
-- This should be set to the hash of the `path` column so that we can
-- distribute processing across multiple different hosts and they will
-- all come up with the same ID.
id INTEGER PRIMARY KEY,

path VARCHAR NOT NULL
) WITHOUT ROWID;
);

-- TODO: Allow distinguishing between raw reports within a single upload
-- TODO: Measure size/perf impact of making this table `WITHOUT ROWID`
CREATE TABLE raw_upload (
-- This should be set to a random 64-bit integer so that we can
-- distribute processing across multiple different hosts and they will
-- not fight over autoincrementing ID values.
id INTEGER PRIMARY KEY,

timestamp INTEGER,
raw_upload_url VARCHAR,
flags VARCHAR, -- JSON
provider VARCHAR,
build VARCHAR,
name VARCHAR,
job_name VARCHAR,
ci_run_url VARCHAR,
state VARCHAR,
env VARCHAR,
session_type VARCHAR,
session_extras VARCHAR -- JSON,
);

-- TODO: Measure size/perf impact of making this table `WITHOUT ROWID`
CREATE TABLE context (
-- This should be set to the hash of the `name` column so that we can
-- distribute processing across multiple different hosts and they will
-- all come up with the same ID.
Comment on lines +37 to +39
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mentioned concerns around id collisions. Both hashes and random ints have potential for collision (although negligibly small).
Here in particular, context_type should ideally be part of the hash as well.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think you're right, but as a separate change (since this PR is pretty big) we probably want to get rid of ContextType and rename context to label or test_case or something which you also observed

id INTEGER PRIMARY KEY,

context_type VARCHAR NOT NULL,
name VARCHAR NOT NULL
);

-- TODO: Measure size/perf impact of making this table `WITHOUT ROWID`
CREATE TABLE context_assoc (
context_id INTEGER REFERENCES context(id) NOT NULL,

raw_upload_id INTEGER NOT NULL,
local_sample_id INTEGER,
local_span_id INTEGER,

-- TODO: Figure out how to re-enable these
-- FOREIGN KEY (raw_upload_id, local_sample_id) REFERENCES coverage_sample(raw_upload_id, local_sample_id),
-- FOREIGN KEY (raw_upload_id, local_span_id) REFERENCES span_data(raw_upload_id, local_span_id),

PRIMARY KEY (context_id, raw_upload_id, local_sample_id, local_span_id)
);

-- TODO: Measure size/perf impact of making this table `WITHOUT ROWID`
CREATE TABLE coverage_sample (
id BLOB PRIMARY KEY,
raw_upload_id INTEGER REFERENCES raw_upload(id) NOT NULL,

-- This should be an application-managed auto-incremented integer.
local_sample_id INTEGER NOT NULL,

source_file_id INTEGER REFERENCES source_file(id) NOT NULL,
line_no INTEGER NOT NULL,

coverage_type VARCHAR NOT NULL,
hits INTEGER,
hit_branches INTEGER,
total_branches INTEGER
) WITHOUT ROWID;
total_branches INTEGER,

PRIMARY KEY (raw_upload_id, local_sample_id)
);

-- TODO: Measure size/perf impact of making this table `WITHOUT ROWID`
CREATE TABLE branches_data (
id BLOB PRIMARY KEY,
raw_upload_id INTEGER REFERENCES raw_upload(id) NOT NULL,
local_sample_id INTEGER NOT NULL,

-- This should be an application-managed auto-incremented integer.
local_branch_id INTEGER NOT NULL,

source_file_id INTEGER REFERENCES source_file(id) NOT NULL,
sample_id BLOB REFERENCES coverage_sample(id) NOT NULL,

hits INTEGER NOT NULL,
branch_format VARCHAR NOT NULL,
branch VARCHAR NOT NULL
) WITHOUT ROWID;
branch VARCHAR NOT NULL,

FOREIGN KEY (raw_upload_id, local_sample_id) REFERENCES coverage_sample(raw_upload_id, local_sample_id),
PRIMARY KEY (raw_upload_id, local_branch_id)
);

-- TODO: Measure size/perf impact of making this table `WITHOUT ROWID`
CREATE TABLE method_data (
id BLOB PRIMARY KEY,
raw_upload_id INTEGER REFERENCES raw_upload(id) NOT NULL,
local_sample_id INTEGER NOT NULL,

-- This should be an application-managed auto-incremented integer.
local_method_id INTEGER NOT NULL,

source_file_id INTEGER REFERENCES source_file(id) NOT NULL,
sample_id BLOB REFERENCES coverage_sample(id),
line_no INTEGER,

hit_branches INTEGER,
total_branches INTEGER,
hit_complexity_paths INTEGER,
total_complexity INTEGER
) WITHOUT ROWID;
total_complexity INTEGER,

FOREIGN KEY (raw_upload_id, local_sample_id) REFERENCES coverage_sample(raw_upload_id, local_sample_id),
PRIMARY KEY (raw_upload_id, local_method_id)
);

-- TODO: Measure size/perf impact of making this table `WITHOUT ROWID`
CREATE TABLE span_data (
id BLOB PRIMARY KEY,
raw_upload_id INTEGER REFERENCES raw_upload(id) NOT NULL,
local_sample_id INTEGER,

-- This should be an application-managed auto-incremented integer.
local_span_id INTEGER NOT NULL,

source_file_id INTEGER REFERENCES source_file(id) NOT NULL,
sample_id BLOB REFERENCES coverage_sample(id),

hits INTEGER NOT NULL,
start_line INTEGER,
start_col INTEGER,
end_line INTEGER,
end_col INTEGER
) WITHOUT ROWID;

CREATE TABLE context (
id INTEGER PRIMARY KEY,
context_type VARCHAR NOT NULL,
name VARCHAR NOT NULL
);
end_col INTEGER,

CREATE TABLE context_assoc (
context_id INTEGER NOT NULL,
sample_id BLOB,
branch_id BLOB,
method_id BLOB,
span_id BLOB,
PRIMARY KEY(context_id, sample_id)
) WITHOUT ROWID;

CREATE TABLE upload_details (
context_id INTEGER REFERENCES context(id) NOT NULL,
timestamp INTEGER,
raw_upload_url VARCHAR,
flags VARCHAR, -- JSON
provider VARCHAR,
build VARCHAR,
name VARCHAR,
job_name VARCHAR,
ci_run_url VARCHAR,
state VARCHAR,
env VARCHAR,
session_type VARCHAR,
session_extras VARCHAR -- JSON,
FOREIGN KEY (raw_upload_id, local_sample_id) REFERENCES coverage_sample(raw_upload_id, local_sample_id),
PRIMARY KEY (raw_upload_id, local_span_id)
);
Loading
Loading