forked from cBioPortal/cbioportal
-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Moved from the pvannierop/cbioportal-clickhouse-pilot repository.
- Loading branch information
1 parent
a317afe
commit 25f780a
Showing
3 changed files
with
221 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
# Setup for development on Clickhouse integration | ||
|
||
## Pre-requisites | ||
|
||
1. cBioPortal database: | ||
|
||
- provisioned with one or more study in the _cbioportal_ database. | ||
- user: _cbio_ password: _P@ssword1_ with all permissions on the _cbioportal_ database. | ||
- available on port 3306 on the host system. | ||
|
||
3. System with docker and docker compose installed. | ||
|
||
## Setup | ||
|
||
All commands start from the root repository location. | ||
|
||
1. Start cBioPortal database. | ||
2. Create MySQL views in the cBioPortal database by running the commands | ||
in [cbio_database_views.sql](mysql_provisioning/cbio_database_views.sql). | ||
3. Start Clickhouse (provisioned automatically). | ||
|
||
``` | ||
cd ./dev/clickhouse | ||
docker compose up -d | ||
``` | ||
|
||
This will start a Clickhouse instance that is available on the host system: | ||
- port: _8123_ | ||
- database: _cbioportal_ | ||
- username: _cbio_ | ||
- password: _P@ssword1_ | ||
|
||
## TODO | ||
|
||
- Extend [cbio_database_views.sql](mysql_provisioning/cbio_database_views.sql) | ||
and [c_import_mysql_data.sql](clickhouse_provisioning/c_import_mysql_data.sql) to cover all clickhouse tables. |
8 changes: 8 additions & 0 deletions
8
dev/clickhouse/clickhouse_provisioning/c_import_mysql_data.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
insert into cbioportal.sample_in_genetic_profile | ||
select * from mysql( | ||
'127.0.0.1:3306', | ||
'cbioportal', | ||
'view_sample_in_genetic_profile', | ||
'cbio', | ||
'P@ssword1' | ||
); |
177 changes: 177 additions & 0 deletions
177
dev/clickhouse/mysql_provisioning/cbio_database_views.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,177 @@ | ||
# sample | ||
DROP VIEW IF EXISTS view_sample; | ||
CREATE VIEW view_sample AS | ||
SELECT | ||
concat(cs.CANCER_STUDY_IDENTIFIER, '_', sample.STABLE_ID) as sample_unique_id, | ||
sample.STABLE_ID as sample_stable_id, | ||
concat(cs.CANCER_STUDY_IDENTIFIER, '_', p.STABLE_ID) as patient_unique_id, | ||
p.STABLE_ID as patient_stable_id, | ||
cs.CANCER_STUDY_IDENTIFIER as cancer_study_identifier | ||
FROM sample | ||
INNER JOIN patient p on sample.PATIENT_ID = p.INTERNAL_ID | ||
INNER JOIN cancer_study cs on p.CANCER_STUDY_ID = cs.CANCER_STUDY_ID; | ||
|
||
# sample list | ||
DROP VIEW IF EXISTS view_sample_list; | ||
CREATE VIEW view_sample_list AS | ||
SELECT | ||
concat(cs.CANCER_STUDY_IDENTIFIER, '_', s.STABLE_ID) as sample_unique_id, | ||
sl.STABLE_ID as sample_list_stable_id, | ||
sl.NAME as name, | ||
cs.CANCER_STUDY_IDENTIFIER as cancer_study_identifier | ||
FROM sample_list as sl | ||
INNER JOIN sample_list_list as sll ON sll.LIST_ID = sl.LIST_ID | ||
INNER JOIN sample as s ON s.INTERNAL_ID = sll.SAMPLE_ID | ||
INNER JOIN cancer_study cs on sl.CANCER_STUDY_ID = cs.CANCER_STUDY_ID; | ||
|
||
# genomic_event | ||
DROP TABLE IF EXISTS view_genomic_event; | ||
-- This view takes a long time to materialize. I store the data in a table to prevent repeated recalculations. | ||
# CREATE TABLE view_genomic_event AS | ||
# SELECT | ||
# concat(cs.CANCER_STUDY_IDENTIFIER, '_', sample.STABLE_ID) as sample_unique_id, | ||
# gene.HUGO_GENE_SYMBOL as hugo_gene_symbol, | ||
# me.PROTEIN_CHANGE as variant, | ||
# gp.STABLE_ID as gene_panel_stable_id, | ||
# cs.CANCER_STUDY_IDENTIFIER as cancer_study_identifier, | ||
# g.STABLE_ID as genetic_profile_stable_id | ||
# FROM mutation | ||
# LEFT JOIN mutation_event as me ON mutation.MUTATION_EVENT_ID = me.MUTATION_EVENT_ID | ||
# LEFT JOIN sample_profile sp on mutation.SAMPLE_ID = sp.SAMPLE_ID and mutation.GENETIC_PROFILE_ID = sp.GENETIC_PROFILE_ID | ||
# LEFT JOIN gene_panel gp on sp.PANEL_ID = gp.INTERNAL_ID | ||
# LEFT JOIN genetic_profile g on sp.GENETIC_PROFILE_ID = g.GENETIC_PROFILE_ID | ||
# LEFT JOIN cancer_study cs on g.CANCER_STUDY_ID = cs.CANCER_STUDY_ID | ||
# LEFT JOIN sample on mutation.SAMPLE_ID = sample.INTERNAL_ID | ||
# LEFT JOIN gene ON mutation.ENTREZ_GENE_ID = gene.ENTREZ_GENE_ID; | ||
# INSERT INTO view_genomic_event | ||
# SELECT | ||
# concat(cs.CANCER_STUDY_IDENTIFIER, '_', sample.STABLE_ID) as sample_unique_id, | ||
# gene.HUGO_GENE_SYMBOL as hugo_gene_symbol, | ||
# convert(ce.ALTERATION, char) as variant, | ||
# gene_panel.STABLE_ID as gene_panel_stable_id, | ||
# cs.CANCER_STUDY_IDENTIFIER as cancer_study_identifier, | ||
# gp.STABLE_ID as genetic_profile_stable_id | ||
# FROM sample_cna_event | ||
# LEFT JOIN cna_event ce on sample_cna_event.CNA_EVENT_ID = ce.CNA_EVENT_ID | ||
# LEFT JOIN gene on ce.ENTREZ_GENE_ID = gene.ENTREZ_GENE_ID | ||
# LEFT JOIN genetic_profile gp on sample_cna_event.GENETIC_PROFILE_ID = gp.GENETIC_PROFILE_ID | ||
# LEFT JOIN sample_profile sp on gp.GENETIC_PROFILE_ID = sp.GENETIC_PROFILE_ID | ||
# LEFT JOIN cancer_study cs on gp.CANCER_STUDY_ID = cs.CANCER_STUDY_ID | ||
# LEFT JOIN sample on sample_cna_event.SAMPLE_ID = sample.INTERNAL_ID | ||
# LEFT JOIN gene_panel ON sp.PANEL_ID = gene_panel.INTERNAL_ID; | ||
# INSERT INTO view_genomic_event | ||
# SELECT | ||
# concat(cs.CANCER_STUDY_IDENTIFIER, '_', s.STABLE_ID) as sample_unique_id, | ||
# hugo_gene_symbol, | ||
# Event_Info as variant, | ||
# g.STABLE_ID as gene_panel_stable_id, | ||
# cs.CANCER_STUDY_IDENTIFIER as cancer_study_identifier, | ||
# gp.STABLE_ID as genetic_profile_stable_id | ||
# FROM structural_variant as sv | ||
# LEFT JOIN (SELECT ENTREZ_GENE_ID, HUGO_GENE_SYMBOL as hugo_gene_symbol FROM gene) gene1 on gene1.ENTREZ_GENE_ID = sv.SITE1_ENTREZ_GENE_ID | ||
# LEFT OUTER JOIN genetic_profile gp on gp.GENETIC_PROFILE_ID = sv.GENETIC_PROFILE_ID | ||
# LEFT JOIN sample s on sv.SAMPLE_ID = s.INTERNAL_ID | ||
# LEFT JOIN cancer_study cs on gp.CANCER_STUDY_ID = cs.CANCER_STUDY_ID | ||
# LEFT JOIN sample_profile sp on gp.GENETIC_PROFILE_ID = sp.GENETIC_PROFILE_ID | ||
# LEFT JOIN gene_panel g on sp.PANEL_ID = g.INTERNAL_ID; | ||
# INSERT INTO view_genomic_event | ||
# SELECT | ||
# concat(cs.CANCER_STUDY_IDENTIFIER, '_', s.STABLE_ID) as sample_unique_id, | ||
# hugo_gene_symbol, | ||
# Event_Info as variant, | ||
# g.STABLE_ID as gene_panel_stable_id, | ||
# cs.CANCER_STUDY_IDENTIFIER as cancer_study_identifier, | ||
# gp.STABLE_ID as genetic_profile_stable_id | ||
# FROM structural_variant as sv | ||
# LEFT JOIN (SELECT ENTREZ_GENE_ID, HUGO_GENE_SYMBOL as hugo_gene_symbol FROM gene) gene2 on gene2.ENTREZ_GENE_ID = sv.SITE2_ENTREZ_GENE_ID | ||
# LEFT OUTER JOIN genetic_profile gp on gp.GENETIC_PROFILE_ID = sv.GENETIC_PROFILE_ID | ||
# LEFT JOIN sample s on sv.SAMPLE_ID = s.INTERNAL_ID | ||
# LEFT JOIN cancer_study cs on gp.CANCER_STUDY_ID = cs.CANCER_STUDY_ID | ||
# LEFT JOIN sample_profile sp on gp.GENETIC_PROFILE_ID = sp.GENETIC_PROFILE_ID | ||
# LEFT JOIN gene_panel g on sp.PANEL_ID = g.INTERNAL_ID; | ||
|
||
-- structural variant | ||
DROP VIEW IF EXISTS view_structural_variant; | ||
CREATE VIEW view_structural_variant AS | ||
SELECT | ||
concat(cs.CANCER_STUDY_IDENTIFIER, '_', s.STABLE_ID) as sample_unique_id, | ||
gene1.HUGO_GENE_SYMBOL as hugo_symbol_gene1, | ||
gene2.HUGO_GENE_SYMBOL as hugo_symbol_gene2, | ||
g.STABLE_ID as gene_panel_stable_id, | ||
cs.CANCER_STUDY_IDENTIFIER as cancer_study_identifier, | ||
gp.STABLE_ID as genetic_profile_stable_id | ||
FROM structural_variant as sv | ||
LEFT JOIN (SELECT ENTREZ_GENE_ID, HUGO_GENE_SYMBOL FROM gene) gene1 on gene1.ENTREZ_GENE_ID = sv.SITE1_ENTREZ_GENE_ID | ||
LEFT JOIN (SELECT ENTREZ_GENE_ID, HUGO_GENE_SYMBOL FROM gene) gene2 on gene2.ENTREZ_GENE_ID = sv.SITE2_ENTREZ_GENE_ID | ||
LEFT OUTER JOIN genetic_profile gp on gp.GENETIC_PROFILE_ID = sv.GENETIC_PROFILE_ID | ||
LEFT JOIN sample s on sv.SAMPLE_ID = s.INTERNAL_ID | ||
LEFT JOIN cancer_study cs on gp.CANCER_STUDY_ID = cs.CANCER_STUDY_ID | ||
LEFT JOIN sample_profile sp on gp.GENETIC_PROFILE_ID = sp.GENETIC_PROFILE_ID | ||
LEFT JOIN gene_panel g on sp.PANEL_ID = g.INTERNAL_ID; | ||
|
||
-- sample_clinical_attribute_numeric | ||
DROP VIEW IF EXISTS view_sample_clinical_attribute_numeric; | ||
CREATE VIEW view_sample_clinical_attribute_numeric AS | ||
SELECT | ||
concat(cs.CANCER_STUDY_IDENTIFIER, '_', s.STABLE_ID) as sample_unique_id, | ||
concat(cs.CANCER_STUDY_IDENTIFIER, '_', p.STABLE_ID) as patient_unique_id, | ||
ATTR_ID as attribute_name, | ||
ATTR_VALUE as attribute_value, | ||
cs.CANCER_STUDY_IDENTIFIER as cancer_study_identifier | ||
FROM cancer_study cs | ||
INNER JOIN patient p on cs.CANCER_STUDY_ID = p.CANCER_STUDY_ID | ||
INNER JOIN sample s on p.INTERNAL_ID = s.PATIENT_ID | ||
INNER JOIN clinical_sample cs on s.INTERNAL_ID = cs.INTERNAL_ID | ||
WHERE ATTR_VALUE REGEXP '^[0-9.]+$'; | ||
|
||
-- sample_clinical_attribute_categorical | ||
DROP VIEW IF EXISTS view_sample_clinical_attribute_categorical; | ||
CREATE VIEW view_sample_clinical_attribute_categorical AS | ||
SELECT | ||
concat(cs.CANCER_STUDY_IDENTIFIER, '_', s.STABLE_ID) as sample_unique_id, | ||
concat(cs.CANCER_STUDY_IDENTIFIER, '_', p.STABLE_ID) as patient_unique_id, | ||
ATTR_ID as attribute_name, | ||
ATTR_VALUE as attribute_value, | ||
cs.CANCER_STUDY_IDENTIFIER as cancer_study_identifier | ||
FROM cancer_study cs | ||
INNER JOIN patient p on cs.CANCER_STUDY_ID = p.CANCER_STUDY_ID | ||
INNER JOIN sample s on p.INTERNAL_ID = s.PATIENT_ID | ||
INNER JOIN clinical_sample cs on s.INTERNAL_ID = cs.INTERNAL_ID | ||
WHERE ATTR_VALUE NOT REGEXP '^[0-9.]+$'; | ||
|
||
-- patient_clinical_attribute_numeric | ||
DROP VIEW IF EXISTS view_patient_clinical_attribute_numeric; | ||
CREATE VIEW view_patient_clinical_attribute_numeric AS | ||
SELECT | ||
concat(cs.CANCER_STUDY_IDENTIFIER, '_', p.STABLE_ID) as patient_unique_id, | ||
ATTR_ID as attribute_name, | ||
ATTR_VALUE as attribute_value, | ||
cs.CANCER_STUDY_IDENTIFIER as cancer_study_identifier | ||
FROM cancer_study cs | ||
INNER JOIN patient p on cs.CANCER_STUDY_ID = p.CANCER_STUDY_ID | ||
INNER JOIN clinical_patient cp on p.INTERNAL_ID = cp.INTERNAL_ID | ||
WHERE ATTR_VALUE REGEXP '^[0-9.]+$'; | ||
|
||
-- patient_clinical_attribute_categorical | ||
DROP VIEW IF EXISTS view_patient_clinical_attribute_categorical; | ||
CREATE VIEW view_patient_clinical_attribute_categorical AS | ||
SELECT | ||
concat(cs.CANCER_STUDY_IDENTIFIER, '_', p.STABLE_ID) as patient_unique_id, | ||
ATTR_ID as attribute_name, | ||
ATTR_VALUE as attribute_value, | ||
cs.CANCER_STUDY_IDENTIFIER as cancer_study_identifier | ||
FROM cancer_study cs | ||
INNER JOIN patient p on cs.CANCER_STUDY_ID = p.CANCER_STUDY_ID | ||
INNER JOIN clinical_patient cp on p.INTERNAL_ID = cp.INTERNAL_ID | ||
WHERE ATTR_VALUE NOT REGEXP '^[0-9.]+$'; | ||
|
||
-- sample_in_genomic_profile | ||
DROP VIEW IF EXISTS view_sample_in_genetic_profile; | ||
CREATE VIEW view_sample_in_genetic_profile AS | ||
SELECT | ||
concat(cs.CANCER_STUDY_IDENTIFIER, '_', sample.STABLE_ID) as sample_unique_id, | ||
replace(gp.STABLE_ID, concat(cs.CANCER_STUDY_IDENTIFIER, '_'), '') as genetic_profile_stable_id_short | ||
FROM sample | ||
INNER JOIN sample_profile sp on sample.INTERNAL_ID = sp.SAMPLE_ID | ||
INNER JOIN genetic_profile gp on sp.GENETIC_PROFILE_ID = gp.GENETIC_PROFILE_ID | ||
INNER JOIN cancer_study cs on gp.CANCER_STUDY_ID = cs.CANCER_STUDY_ID; |