From 5d87f1f52ffde6cdb9bd77acabe47dec4fe96311 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Tue, 5 Mar 2024 11:59:30 +0100 Subject: [PATCH 01/21] Merge pull request #255 from opencb/TASK-4158 --- biodata-external/pom.xml | 2 +- biodata-formats/pom.xml | 2 +- biodata-models/pom.xml | 2 +- biodata-tools/pom.xml | 2 +- pom.xml | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/biodata-external/pom.xml b/biodata-external/pom.xml index bf843117..ad285906 100644 --- a/biodata-external/pom.xml +++ b/biodata-external/pom.xml @@ -6,7 +6,7 @@ biodata org.opencb.biodata - 3.0.0 + 3.0.0-SNAPSHOT ../pom.xml diff --git a/biodata-formats/pom.xml b/biodata-formats/pom.xml index aaf09fd8..5df2ddfe 100644 --- a/biodata-formats/pom.xml +++ b/biodata-formats/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 3.0.0 + 3.0.0-SNAPSHOT ../pom.xml diff --git a/biodata-models/pom.xml b/biodata-models/pom.xml index aa53fa2c..61629296 100644 --- a/biodata-models/pom.xml +++ b/biodata-models/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 3.0.0 + 3.0.0-SNAPSHOT ../pom.xml diff --git a/biodata-tools/pom.xml b/biodata-tools/pom.xml index 29615312..83fddd53 100644 --- a/biodata-tools/pom.xml +++ b/biodata-tools/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 3.0.0 + 3.0.0-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index 61383366..cbe89ab1 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 3.0.0 + 3.0.0-SNAPSHOT pom Biodata @@ -38,7 +38,7 @@ - 5.0.0 + 5.0.0-SNAPSHOT 2.14.3 4.4 From c139406bbaeba0a77a4c1d545d75572613476882 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Tue, 30 Apr 2024 10:28:15 +0200 Subject: [PATCH 02/21] Prepare next release 2.12.3-SNAPSHOT --- biodata-external/pom.xml | 2 +- biodata-formats/pom.xml | 2 +- biodata-models/pom.xml | 2 +- biodata-tools/pom.xml | 2 +- pom.xml | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/biodata-external/pom.xml b/biodata-external/pom.xml index 3a98861e..3c1b2425 100644 --- a/biodata-external/pom.xml +++ b/biodata-external/pom.xml @@ -6,7 +6,7 @@ biodata org.opencb.biodata - 2.12.2 + 2.12.3-SNAPSHOT ../pom.xml diff --git a/biodata-formats/pom.xml b/biodata-formats/pom.xml index fe80d521..24182d67 100644 --- a/biodata-formats/pom.xml +++ b/biodata-formats/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 2.12.2 + 2.12.3-SNAPSHOT ../pom.xml diff --git a/biodata-models/pom.xml b/biodata-models/pom.xml index 79a1954e..1eb260d6 100644 --- a/biodata-models/pom.xml +++ b/biodata-models/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 2.12.2 + 2.12.3-SNAPSHOT ../pom.xml diff --git a/biodata-tools/pom.xml b/biodata-tools/pom.xml index 45d41e63..a283a477 100644 --- a/biodata-tools/pom.xml +++ b/biodata-tools/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 2.12.2 + 2.12.3-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index a051029f..c4ba58fc 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 2.12.2 + 2.12.3-SNAPSHOT pom Biodata @@ -38,7 +38,7 @@ - 4.12.0 + 4.12.1-SNAPSHOT 2.11.4 4.4 1.7.7 From c52673185b0aa2cf31f8915689519d935e39a271 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Thu, 30 May 2024 17:52:46 +0200 Subject: [PATCH 03/21] cicd: Update action version to test for compatibility with test and release process #TASK-6264 --- .github/workflows/test-analysis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-analysis.yml b/.github/workflows/test-analysis.yml index d460871a..e9df51ec 100644 --- a/.github/workflows/test-analysis.yml +++ b/.github/workflows/test-analysis.yml @@ -10,11 +10,11 @@ jobs: name: Test and push Sonar analysis runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: '0' - name: Set up JDK 11 - uses: actions/setup-java@v3 + uses: actions/setup-java@v4 with: distribution: 'temurin' java-version: '11' From a4eb6dc3ee7760a3289e51dea0e25af96ade962f Mon Sep 17 00:00:00 2001 From: pfurio Date: Mon, 10 Jun 2024 12:26:59 +0200 Subject: [PATCH 04/21] models: deprecate status name, #TASK-5964 --- .../interpretation/Interpretation.java | 64 +++++++++++-------- .../opencb/biodata/models/common/Status.java | 19 +++--- 2 files changed, 48 insertions(+), 35 deletions(-) diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/clinical/interpretation/Interpretation.java b/biodata-models/src/main/java/org/opencb/biodata/models/clinical/interpretation/Interpretation.java index 99fbff5d..36f9c7e0 100644 --- a/biodata-models/src/main/java/org/opencb/biodata/models/clinical/interpretation/Interpretation.java +++ b/biodata-models/src/main/java/org/opencb/biodata/models/clinical/interpretation/Interpretation.java @@ -28,29 +28,29 @@ public class Interpretation { - private String id; - private String uuid; - private String description; - private String clinicalAnalysisId; + protected String id; + protected String uuid; + protected String name; + protected String description; + protected String clinicalAnalysisId; /** * Interpretation algorithm tool used to generate this interpretation. */ - private ClinicalAnalyst analyst; - private InterpretationMethod method; + protected ClinicalAnalyst analyst; + protected InterpretationMethod method; - private List primaryFindings; - private List secondaryFindings; + protected List primaryFindings; + protected List secondaryFindings; - private List comments; + protected List comments; - private InterpretationStats stats; + protected InterpretationStats stats; - private boolean locked; - private Status status; - private String creationDate; - private String modificationDate; - private int version; + protected boolean locked; + protected String creationDate; + protected String modificationDate; + protected int version; /** * Users can add custom information in this field. @@ -61,12 +61,23 @@ public class Interpretation { public Interpretation() { } + @Deprecated public Interpretation(String id, String uuid, String description, String clinicalAnalysisId, ClinicalAnalyst analyst, InterpretationMethod method, List primaryFindings, List secondaryFindings, List comments, InterpretationStats stats, Status status, String creationDate, String modificationDate, boolean locked, int version, Map attributes) { + this(id, uuid, id, description, clinicalAnalysisId, analyst, method, primaryFindings, secondaryFindings, + comments, stats, locked, creationDate, modificationDate, version, attributes); + } + + public Interpretation(String id, String uuid, String name, String description, String clinicalAnalysisId, + ClinicalAnalyst analyst, InterpretationMethod method, List primaryFindings, + List secondaryFindings, List comments, + InterpretationStats stats, boolean locked, String creationDate, String modificationDate, + int version, Map attributes) { this.id = id; this.uuid = uuid; + this.name = name; this.description = description; this.clinicalAnalysisId = clinicalAnalysisId; this.analyst = analyst; @@ -75,10 +86,9 @@ public Interpretation(String id, String uuid, String description, String clinica this.secondaryFindings = secondaryFindings; this.comments = comments; this.stats = stats; - this.status = status; + this.locked = locked; this.creationDate = creationDate; this.modificationDate = modificationDate; - this.locked = locked; this.version = version; this.attributes = attributes; } @@ -88,6 +98,7 @@ public String toString() { final StringBuilder sb = new StringBuilder("Interpretation{"); sb.append("id='").append(id).append('\''); sb.append(", uuid='").append(uuid).append('\''); + sb.append(", name='").append(name).append('\''); sb.append(", description='").append(description).append('\''); sb.append(", clinicalAnalysisId='").append(clinicalAnalysisId).append('\''); sb.append(", analyst=").append(analyst); @@ -96,7 +107,6 @@ public String toString() { sb.append(", secondaryFindings=").append(secondaryFindings); sb.append(", comments=").append(comments); sb.append(", stats=").append(stats); - sb.append(", status=").append(status); sb.append(", creationDate='").append(creationDate).append('\''); sb.append(", modificationDate='").append(modificationDate).append('\''); sb.append(", locked='").append(locked).append('\''); @@ -124,6 +134,15 @@ public Interpretation setUuid(String uuid) { return this; } + public String getName() { + return name; + } + + public Interpretation setName(String name) { + this.name = name; + return this; + } + public String getDescription() { return description; } @@ -196,15 +215,6 @@ public Interpretation setStats(InterpretationStats stats) { return this; } - public Status getStatus() { - return status; - } - - public Interpretation setStatus(Status status) { - this.status = status; - return this; - } - public String getCreationDate() { return creationDate; } diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/common/Status.java b/biodata-models/src/main/java/org/opencb/biodata/models/common/Status.java index 7563ce9e..65ffd889 100644 --- a/biodata-models/src/main/java/org/opencb/biodata/models/common/Status.java +++ b/biodata-models/src/main/java/org/opencb/biodata/models/common/Status.java @@ -19,17 +19,22 @@ public class Status { protected String id; - protected String name; protected String description; protected String date; public Status() { - this("", "", "", ""); + this("", "", ""); } + public Status(String id, String description, String date) { + this.id = id; + this.description = description; + this.date = date; + } + + @Deprecated public Status(String id, String name, String description, String date) { this.id = id; - this.name = name; this.description = description; this.date = date; } @@ -38,7 +43,6 @@ public Status(String id, String name, String description, String date) { public String toString() { final StringBuilder sb = new StringBuilder("Status{"); sb.append("id='").append(id).append('\''); - sb.append(", name='").append(name).append('\''); sb.append(", description='").append(description).append('\''); sb.append(", date='").append(date).append('\''); sb.append('}'); @@ -53,7 +57,6 @@ public boolean equals(Object o) { Status status = (Status) o; if (!id.equals(status.id)) return false; - if (name != null ? !name.equals(status.name) : status.name != null) return false; if (description != null ? !description.equals(status.description) : status.description != null) return false; return date != null ? date.equals(status.date) : status.date == null; } @@ -61,7 +64,6 @@ public boolean equals(Object o) { @Override public int hashCode() { int result = id.hashCode(); - result = 31 * result + (name != null ? name.hashCode() : 0); result = 31 * result + (description != null ? description.hashCode() : 0); result = 31 * result + (date != null ? date.hashCode() : 0); return result; @@ -76,12 +78,13 @@ public Status setId(String id) { return this; } + @Deprecated public String getName() { - return name; + return id; } + @Deprecated public Status setName(String name) { - this.name = name; return this; } From 50a2be94aed6556d7cb83eee48f3a4170fd4d20f Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Wed, 26 Jun 2024 20:07:24 +0200 Subject: [PATCH 05/21] cicd: Modify pull request approve #TASK-6399 --- .github/workflows/pull-request-approved.yml | 34 +++++++++++++--- .github/workflows/scripts/xetabase-branch.sh | 42 ++++++++++++++++++++ 2 files changed, 70 insertions(+), 6 deletions(-) create mode 100644 .github/workflows/scripts/xetabase-branch.sh diff --git a/.github/workflows/pull-request-approved.yml b/.github/workflows/pull-request-approved.yml index eb410c9c..8a60928f 100644 --- a/.github/workflows/pull-request-approved.yml +++ b/.github/workflows/pull-request-approved.yml @@ -1,15 +1,37 @@ name: Pull request approve workflow +run-name: 'Pull request approve workflow ${{ github.event.pull_request.head.ref }} -> ${{ github.event.pull_request.base.ref }} by @${{ github.actor }}' on: pull_request_review: types: [ submitted ] jobs: - build: - uses: opencb/java-common-libs/.github/workflows/build-java-app-workflow.yml@develop + calculate-xetabase-branch: + name: Calculate Xetabase branch + runs-on: ubuntu-22.04 + outputs: + xetabase_branch: ${{ steps.get_xetabase_branch.outputs.xetabase_branch }} + steps: + - name: Clone java-common-libs + uses: actions/checkout@v4 + with: + fetch-depth: '10' + - id: get_xetabase_branch + name: "Get current branch for Xetabase from target branch" + run: | + chmod +x ./.github/workflows/scripts/xetabase-branch.sh + ls ./.github/workflows/scripts/ + ls ./.github/workflows/ + bash --version + xetabase_branch=$(./.github/workflows/scripts/xetabase-branch.sh ${{ github.event.pull_request.base.ref }}) + echo "__Xetabase ref:__ \"${xetabase_branch}\"" | tee -a ${GITHUB_STEP_SUMMARY} + echo "xetabase_branch=${xetabase_branch}" >> $GITHUB_OUTPUT test: - name: "Test analysis" - uses: ./.github/workflows/test-analysis.yml - needs: build - secrets: inherit + name: "Run all tests before merging" + needs: calculate-xetabase-branch + uses: opencb/java-common-libs/.github/workflows/test-xetabase-workflow.yml@TASK-6399 + with: + branch: ${{ needs.calculate-xetabase-branch.outputs.xetabase_branch }} + task: ${{ github.event.pull_request.head.ref }} + secrets: inherit \ No newline at end of file diff --git a/.github/workflows/scripts/xetabase-branch.sh b/.github/workflows/scripts/xetabase-branch.sh new file mode 100644 index 00000000..af17f7f1 --- /dev/null +++ b/.github/workflows/scripts/xetabase-branch.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +# Function to calculate the corresponding branch of Xetabase project +get_xetabase_branch() { + # Input parameter (branch name) + input_branch="$1" + + # Check if the branch name is "develop" in that case return the same branch name + if [[ "$input_branch" == "develop" ]]; then + echo "develop" + return 0 + fi + + # Check if the branch name starts with "release-" and follows the patterns "release-a.b.x" or "release-a.b.c.x" + if [[ "$input_branch" =~ ^release-([0-9]+)\.([0-9]+)\.x$ ]] || [[ "$input_branch" =~ ^release-([0-9]+)\.([0-9]+)\.([0-9]+)\.x$ ]]; then + # Extract the MAJOR part of the branch name + MAJOR=${BASH_REMATCH[1]} + # Calculate the XETABASE_MAJOR by subtracting 3 from MAJOR + XETABASE_MAJOR=$((MAJOR - 3)) + # Check if the XETABASE_MAJOR is negative + if (( XETABASE_MAJOR < 0 )); then + echo "Error: 'MAJOR' digit after subtraction results in a negative number." + return 1 + fi + # Construct and echo the new branch name + echo "release-$XETABASE_MAJOR.${input_branch#release-$MAJOR.}" + return 0 + fi + + # If the branch name does not match any of the expected patterns + echo "Error: The branch name is not correct." + return 1 +} + +# Check if the script receives exactly one argument +if [ "$#" -ne 1 ]; then + echo "Usage: $0 " + exit 1 +fi + +# Call the function with the input branch name +get_xetabase_branch "$1" From 10aa6c8019c3a4e876d2477f1c1e67c493c5b2b7 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Mon, 1 Jul 2024 17:36:40 +0200 Subject: [PATCH 06/21] cicd: Modify pull request approve #TASK-6399 --- .github/workflows/pull-request-approved.yml | 7 ++----- .../{xetabase-branch.sh => get-xetabase-branch.sh} | 10 +++++++++- 2 files changed, 11 insertions(+), 6 deletions(-) rename .github/workflows/scripts/{xetabase-branch.sh => get-xetabase-branch.sh} (80%) diff --git a/.github/workflows/pull-request-approved.yml b/.github/workflows/pull-request-approved.yml index 8a60928f..a0d481fa 100644 --- a/.github/workflows/pull-request-approved.yml +++ b/.github/workflows/pull-request-approved.yml @@ -19,11 +19,8 @@ jobs: - id: get_xetabase_branch name: "Get current branch for Xetabase from target branch" run: | - chmod +x ./.github/workflows/scripts/xetabase-branch.sh - ls ./.github/workflows/scripts/ - ls ./.github/workflows/ - bash --version - xetabase_branch=$(./.github/workflows/scripts/xetabase-branch.sh ${{ github.event.pull_request.base.ref }}) + chmod +x ./.github/workflows/scripts/get-xetabase-branch.sh + xetabase_branch=$(./.github/workflows/scripts/get-xetabase-branch.sh ${{ github.event.pull_request.base.ref }}) echo "__Xetabase ref:__ \"${xetabase_branch}\"" | tee -a ${GITHUB_STEP_SUMMARY} echo "xetabase_branch=${xetabase_branch}" >> $GITHUB_OUTPUT diff --git a/.github/workflows/scripts/xetabase-branch.sh b/.github/workflows/scripts/get-xetabase-branch.sh similarity index 80% rename from .github/workflows/scripts/xetabase-branch.sh rename to .github/workflows/scripts/get-xetabase-branch.sh index af17f7f1..e971f990 100644 --- a/.github/workflows/scripts/xetabase-branch.sh +++ b/.github/workflows/scripts/get-xetabase-branch.sh @@ -5,6 +5,14 @@ get_xetabase_branch() { # Input parameter (branch name) input_branch="$1" + # If the branch begins with 'TASK' and exists in the opencga-enterprise repository, I return it + if [[ $input_branch == TASK* ]]; then + if [ "$(git ls-remote https://github.com/zetta-genomics/opencga-enterprise.git "$input_branch" )" ] ; then + echo "$GIT_BRANCH"; + exit 0; + fi + fi + # Check if the branch name is "develop" in that case return the same branch name if [[ "$input_branch" == "develop" ]]; then echo "develop" @@ -16,7 +24,7 @@ get_xetabase_branch() { # Extract the MAJOR part of the branch name MAJOR=${BASH_REMATCH[1]} # Calculate the XETABASE_MAJOR by subtracting 3 from MAJOR - XETABASE_MAJOR=$((MAJOR - 3)) + XETABASE_MAJOR=$((MAJOR - 1)) # Check if the XETABASE_MAJOR is negative if (( XETABASE_MAJOR < 0 )); then echo "Error: 'MAJOR' digit after subtraction results in a negative number." From b37690016e70f7cf0b2abe72d13bbdbeba733f88 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Mon, 8 Jul 2024 15:01:10 +0200 Subject: [PATCH 07/21] exclude distlib dependency --- pom.xml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pom.xml b/pom.xml index c4ba58fc..6166063d 100644 --- a/pom.xml +++ b/pom.xml @@ -198,6 +198,12 @@ com.databricks SnpEff ${SnpEff.version} + + + distlib + distlib + + com.google.guava From aacf0bbe1bacc7ce0f5cd0decb7986236348616a Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Mon, 8 Jul 2024 15:51:26 +0200 Subject: [PATCH 08/21] exclude distlib dependency --- biodata-tools/pom.xml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/biodata-tools/pom.xml b/biodata-tools/pom.xml index a283a477..fdb21cbf 100644 --- a/biodata-tools/pom.xml +++ b/biodata-tools/pom.xml @@ -53,6 +53,12 @@ com.databricks SnpEff + + + distlib + distlib + + org.rocksdb From b4c2afb5bd4659e3d90a11446935797d595685bb Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Wed, 17 Jul 2024 10:42:33 +0200 Subject: [PATCH 09/21] Prepare new development branch release-3.2.x --- biodata-external/pom.xml | 2 +- biodata-formats/pom.xml | 2 +- biodata-models/pom.xml | 2 +- biodata-tools/pom.xml | 2 +- pom.xml | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/biodata-external/pom.xml b/biodata-external/pom.xml index d6d1acdd..bb85a556 100644 --- a/biodata-external/pom.xml +++ b/biodata-external/pom.xml @@ -6,7 +6,7 @@ biodata org.opencb.biodata - 3.3.0-SNAPSHOT + 3.2.1-SNAPSHOT ../pom.xml diff --git a/biodata-formats/pom.xml b/biodata-formats/pom.xml index c3332c2e..238588ca 100644 --- a/biodata-formats/pom.xml +++ b/biodata-formats/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 3.3.0-SNAPSHOT + 3.2.1-SNAPSHOT ../pom.xml diff --git a/biodata-models/pom.xml b/biodata-models/pom.xml index 7325a383..f9cf2628 100644 --- a/biodata-models/pom.xml +++ b/biodata-models/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 3.3.0-SNAPSHOT + 3.2.1-SNAPSHOT ../pom.xml diff --git a/biodata-tools/pom.xml b/biodata-tools/pom.xml index e8d90ada..c7c32361 100644 --- a/biodata-tools/pom.xml +++ b/biodata-tools/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 3.3.0-SNAPSHOT + 3.2.1-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index faf0f65b..cee50cbd 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 3.3.0-SNAPSHOT + 3.2.1-SNAPSHOT pom Biodata @@ -38,7 +38,7 @@ - 5.3.0-SNAPSHOT + 5.2.1-SNAPSHOT 2.14.3 4.4 From 75345478c664b4fdae9803ab8138e370313717b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Wed, 17 Jul 2024 10:57:11 +0100 Subject: [PATCH 10/21] tools: Centralise SV normalization at VariantKeyFields.sv #TASK-6558 --- .../tools/variant/VariantNormalizer.java | 110 +++++-- .../variant/VariantNormalizerGenericTest.java | 12 +- .../tools/variant/VariantNormalizerTest.java | 309 +++++++++++++----- 3 files changed, 306 insertions(+), 125 deletions(-) diff --git a/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/VariantNormalizer.java b/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/VariantNormalizer.java index e3bb3d6e..1b8992c0 100644 --- a/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/VariantNormalizer.java +++ b/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/VariantNormalizer.java @@ -306,19 +306,18 @@ public List normalize(List batch, boolean reuse) throws NonSta Integer start = variant.getStart(); Integer end = variant.getEnd(); String chromosome = variant.getChromosome(); - StructuralVariation sv = variant.getSv(); if (variant.getStudies() == null || variant.getStudies().isEmpty()) { List keyFieldsList; if (isSymbolic(variant)) { - keyFieldsList = normalizeSymbolic(start, end, reference, alternate, sv); + keyFieldsList = normalizeSymbolic(start, end, reference, alternate, variant.getSv()); } else { keyFieldsList = normalize(chromosome, start, reference, alternate); } // Iterate keyFields sorting by position, so the generated variants are ordered. Do not modify original order! for (VariantKeyFields keyFields : sortByPosition(keyFieldsList)) { OriginalCall call = new OriginalCall(variant.toString(), keyFields.getNumAllele()); - Variant normalizedVariant = newVariant(variant, keyFields, sv); + Variant normalizedVariant = newVariant(variant, keyFields); if (keyFields.getPhaseSet() != null) { StudyEntry studyEntry = new StudyEntry(); studyEntry.setSamples( @@ -346,7 +345,7 @@ public List normalize(List batch, boolean reuse) throws NonSta List keyFieldsList; List originalKeyFieldsList; if (isSymbolic(variant)) { - keyFieldsList = normalizeSymbolic(start, end, reference, alternates, sv); + keyFieldsList = normalizeSymbolic(start, end, reference, alternates, variant.getSv()); } else { keyFieldsList = normalize(chromosome, start, reference, alternates); } @@ -400,6 +399,9 @@ public List normalize(List batch, boolean reuse) throws NonSta variant.setEnd(keyFields.getEnd()); variant.setReference(keyFields.getReference()); variant.setAlternate(keyFields.getAlternate()); + if (keyFields.getSv() != null) { + variant.setSv(keyFields.getSv()); + } variant.reset(); // Variant is being reused, must ensure the SV field si appropriately created // if (isSymbolic(variant)) { @@ -415,7 +417,7 @@ public List normalize(List batch, boolean reuse) throws NonSta } samples = entry.getSamples(); } else { - normalizedVariant = newVariant(variant, keyFields, sv); + normalizedVariant = newVariant(variant, keyFields); normalizedEntry = new StudyEntry(); normalizedEntry.setStudyId(entry.getStudyId()); @@ -624,6 +626,36 @@ public List normalizeSymbolic(final Integer start, final Integ Integer copyNumber = sv == null ? null : sv.getCopyNumber(); keyFields = normalizeSymbolic(start, end, reference, alternate, alternates, copyNumber, numAllelesIdx); } + + if (alternate.equals(VariantBuilder.DUP_TANDEM_ALT)) { + if (keyFields.getSv() == null) { + keyFields.setSv(new StructuralVariation()); + } + keyFields.getSv().setType(StructuralVariantType.TANDEM_DUPLICATION); + } + + if (sv != null) { + StructuralVariation normalizedSv = keyFields.getSv(); + if (normalizedSv == null) { + normalizedSv = new StructuralVariation(); + } + // CI positions may change during the normalization. Update them. + normalizedSv.setCiStartLeft(sv.getCiStartLeft()); + normalizedSv.setCiStartRight(sv.getCiStartRight()); + normalizedSv.setCiEndLeft(sv.getCiEndLeft()); + normalizedSv.setCiEndRight(sv.getCiEndRight()); + normalizedSv.setLeftSvInsSeq(sv.getLeftSvInsSeq()); + normalizedSv.setRightSvInsSeq(sv.getRightSvInsSeq()); + + if (keyFields.getSv() == null) { + if (normalizedSv.getCiStartLeft() != null || normalizedSv.getCiStartRight() != null + || normalizedSv.getCiEndLeft() != null || normalizedSv.getCiEndRight() != null + || normalizedSv.getLeftSvInsSeq() != null || normalizedSv.getRightSvInsSeq() != null) { + keyFields.setSv(normalizedSv); + } + } + } + list.add(keyFields); } @@ -695,7 +727,7 @@ private static VariantKeyFields normalizeMateBreakend( } VariantKeyFields keyFields = new VariantKeyFields(newStart, newStart - 1, numAllelesIdx, newReference, newAlternate); - keyFields.getSv().setBreakend(breakend); + keyFields.setBreakend(breakend); return keyFields; } @@ -718,20 +750,23 @@ private VariantKeyFields normalizeSymbolic( + "contain 0 or 1 nt, but no more. Please, check."); } - Integer cn = VariantBuilder.getCopyNumberFromAlternate(alternate); // if (cn != null) { // // Alternate with the form , being xxx the number of copies, must be normalized into "" // newAlternate = ""; // } String newAlternate; + Integer newCn; if (alternate.equals("") && copyNumber != null) { // Alternate must be of the form , being xxx the number of copies newAlternate = ""; + newCn = copyNumber; } else { newAlternate = alternate; + newCn = VariantBuilder.getCopyNumberFromAlternate(alternate); } + return new VariantKeyFields(newStart, end, numAllelesIdx, newReference, newAlternate, - null, cn, false); + null, newCn, false); } @@ -1380,34 +1415,24 @@ private int[] getGenotypesReorderingMap(int numAllele, int[] alleleMap) { } } - - private Variant newVariant(Variant variant, VariantKeyFields keyFields, StructuralVariation sv) { + private Variant newVariant(Variant variant, VariantKeyFields keyFields) { Variant normalizedVariant = new Variant(variant.getChromosome(), keyFields.getStart(), keyFields.getEnd(), keyFields.getReference(), keyFields.getAlternate()) .setId(variant.getId()) .setNames(variant.getNames()) .setStrand(variant.getStrand()); - if (sv != null) { - if (normalizedVariant.getSv() != null) { - // CI positions may change during the normalization. Update them. - normalizedVariant.getSv().setCiStartLeft(sv.getCiStartLeft()); - normalizedVariant.getSv().setCiStartRight(sv.getCiStartRight()); - normalizedVariant.getSv().setCiEndLeft(sv.getCiEndLeft()); - normalizedVariant.getSv().setCiEndRight(sv.getCiEndRight()); - normalizedVariant.getSv().setLeftSvInsSeq(sv.getLeftSvInsSeq()); - normalizedVariant.getSv().setRightSvInsSeq(sv.getRightSvInsSeq()); - - // Variant will never have CopyNumber, because the Alternate is normalized from to - normalizedVariant.getSv().setCopyNumber(keyFields.getCopyNumber()); - VariantType cnvSubtype = VariantBuilder.getCopyNumberSubtype(keyFields.getCopyNumber()); - if (cnvSubtype != null) { - normalizedVariant.setType(cnvSubtype); - } - } + if (keyFields.getSv() != null) { + normalizedVariant.setSv(keyFields.getSv()); } - normalizedVariant.setAnnotation(variant.getAnnotation()); + if (keyFields.getCopyNumber() != null) { + VariantType cnvSubtype = VariantBuilder.getCopyNumberSubtype(keyFields.getCopyNumber()); + if (cnvSubtype != null) { + normalizedVariant.setType(cnvSubtype); + } + } + return normalizedVariant; // normalizedVariant.setAnnotation(variant.getAnnotation()); // if (isSymbolic(variant)) { @@ -1527,8 +1552,10 @@ public VariantKeyFields(int start, int end, int numAllele, String reference, Str this.alternate = alternate; this.originalKeyFields = originalKeyFields == null ? this : originalKeyFields; this.referenceBlock = referenceBlock; - this.sv = new StructuralVariation(); - setCopyNumber(copyNumber); + this.sv = null; + if (copyNumber != null) { + setCopyNumber(copyNumber); + } } @@ -1604,7 +1631,28 @@ public Integer getCopyNumber() { } public VariantKeyFields setCopyNumber(Integer copyNumber) { - sv.setCopyNumber(copyNumber); + if (sv == null) { + if (copyNumber != null) { + sv = new StructuralVariation(); + sv.setCopyNumber(copyNumber); + sv.setType(VariantBuilder.getCNVSubtype(copyNumber)); + } + } else { + sv.setCopyNumber(copyNumber); + sv.setType(VariantBuilder.getCNVSubtype(copyNumber)); + } + return this; + } + + public VariantKeyFields setBreakend(Breakend breakend) { + if (sv == null) { + if (breakend != null) { + sv = new StructuralVariation(); + sv.setBreakend(breakend); + } + } else { + sv.setBreakend(breakend); + } return this; } diff --git a/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/VariantNormalizerGenericTest.java b/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/VariantNormalizerGenericTest.java index f097d1e1..e59ad530 100644 --- a/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/VariantNormalizerGenericTest.java +++ b/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/VariantNormalizerGenericTest.java @@ -230,7 +230,7 @@ protected Variant newVariant(int position, String ref, String altsCsv) { return newVariant(position, position, ref, Arrays.asList(altsCsv.split(",")), "2"); } - protected Variant newVariant(int start, int end, String ref, String altsCsv) { + protected Variant newVariant(int start, Integer end, String ref, String altsCsv) { return newVariant(start, end, ref, Arrays.asList(altsCsv.split(",")), "2"); } @@ -238,12 +238,16 @@ protected Variant newVariant(int position, String ref, List altsList, St return newVariant(position, position, ref, altsList, studyId); } - protected Variant newVariant(int start, int end, String ref, List altsList, String studyId) { + protected Variant newVariant(int start, Integer end, String ref, List altsList, String studyId) { return newVariantBuilder(start, end, ref, altsList, studyId).build(); } - protected VariantBuilder newVariantBuilder(int position, int end, String ref, List altsList, String studyId) { - return Variant.newBuilder("1", position, end, ref, String.join(",", altsList)) + protected VariantBuilder newVariantBuilder(int position, Integer end, String ref, List altsList, String studyId) { + return newVariantBuilder(position, end, ref, String.join(",", altsList), studyId); + } + + protected VariantBuilder newVariantBuilder(int position, Integer end, String ref, String alts, String studyId) { + return Variant.newBuilder("1", position, end, ref, alts) .setStudyId(studyId) .setSampleDataKeys("GT") .setSamples(new ArrayList<>()) diff --git a/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/VariantNormalizerTest.java b/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/VariantNormalizerTest.java index a4a62f06..4253d940 100644 --- a/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/VariantNormalizerTest.java +++ b/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/VariantNormalizerTest.java @@ -9,6 +9,7 @@ import org.opencb.biodata.models.variant.exceptions.NonStandardCompliantSampleField; import java.util.*; +import java.util.function.Consumer; import java.util.stream.Collectors; import static org.junit.Assert.*; @@ -582,9 +583,7 @@ public void testMultiSNP() throws NonStandardCompliantSampleField { public void testNormalizeMultiAllelicPL() throws NonStandardCompliantSampleField { Variant variant = generateVariantWithFormat("X:100:A:T", "GT:GL", "S01", "0/0", "1,2,3", "S02", "0", "1,2"); - List normalize1 = normalizer.normalize(Collections.singletonList(variant), false); - assertEquals("1,2,3", normalize1.get(0).getStudies().get(0).getSampleData("S01", "GL")); - assertEquals("1,2", normalize1.get(0).getStudies().get(0).getSampleData("S02", "GL")); + normalizeUnmodified(variant); Variant variant2 = generateVariantWithFormat("X:100:A:T,C", "GT:GL", "S01", "0/0", "1,2,3,4,5,6", "S02", "A", "1,2,3"); List normalize2 = normalizer.normalize(Collections.singletonList(variant2), false); @@ -614,14 +613,138 @@ public void testCNVsNormalization() throws Exception { .addSample("HG00096", "0|0") .build(); - List normalizedVariantList = normalizer.normalize(Collections.singletonList(variant), true); - assertEquals(1, normalizedVariantList.size()); - assertEquals(new StructuralVariation(86, 150, 150, 211, 0, null, null, - StructuralVariantType.COPY_NUMBER_LOSS, null), normalizedVariantList.get(0).getSv()); - // Normalize CNV alternate - assertEquals("", normalizedVariantList.get(0).getAlternate()); - assertEquals("1:86<100<150-150<200<211:C:", normalizedVariantList.get(0).getStudies().get(0).getFiles().get(0).getCall().getVariantId()); - assertEquals(0, normalizedVariantList.get(0).getStudies().get(0).getFiles().get(0).getCall().getAlleleIndex().intValue()); + normalizeOne(variant, normalizedVariant -> { + assertEquals(new StructuralVariation(86, 150, 150, 211, 0, null, null, + StructuralVariantType.COPY_NUMBER_LOSS, null), normalizedVariant.getSv()); + // Normalize CNV alternate + assertEquals("", normalizedVariant.getAlternate()); + assertEquals("1:86<100<150-150<200<211:C:", normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getVariantId()); + assertEquals(0, normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getAlleleIndex().intValue()); + }); + } + + @Test + public void testCNVsNormalizationNoNumber() throws Exception { + Variant variant = newVariantBuilder(100, 200, "C", Collections.singletonList(""), "2") + .addFileData("CIPOS", "-14,50") + .addFileData("CIEND", "-50,11") + .addSample("HG00096", "0|0") + .build(); + + normalizeOne(variant, normalizedVariant -> { + assertEquals(new StructuralVariation(86, 150, 150, 211, null, null, null, null, null), normalizedVariant.getSv()); + // Normalize CNV alternate + assertEquals("", normalizedVariant.getAlternate()); + assertEquals("1:86<100<150-150<200<211:C:", normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getVariantId()); + assertEquals(0, normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getAlleleIndex().intValue()); + }); + } + + @Test + public void testCNVsNormalizationNoNumberNoCipos() throws Exception { + Variant variant = newVariantBuilder(100, 200, "C", Collections.singletonList(""), "2") + .addSample("HG00096", "0|0") + .build(); + + normalizeOne(variant, normalizedVariant -> { + assertEquals(new StructuralVariation(null, null, null, null, null, null, null, null, null), normalizedVariant.getSv()); + // Normalize CNV alternate + assertEquals("", normalizedVariant.getAlternate()); + assertEquals("1:100-200:C:", normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getVariantId()); + assertEquals(0, normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getAlleleIndex().intValue()); + }); + } + + @Test + public void testCNVsNormalizationUnmodified() throws Exception { + Variant variant = newVariantBuilder(101, 200, "-", Collections.singletonList(""), "2") + .addSample("HG00096", "0|0") + .build(); + + normalizeUnmodified(variant); + } + + @Test + public void testINSsNormalizationWithCIEND() throws Exception { + Variant variant = newVariantBuilder(100, null, "C", Collections.singletonList(""), "2") + .addFileData("CIPOS", "-14,50") + .addFileData("CIEND", "-50,11") + .addFileData("LEFT_SVINSSEQ", "AAAA") + .addFileData("RIGHT_SVINSSEQ", "CCCC") + .addSample("HG00096", "0|0") + .build(); + + normalizeOne(variant, normalizedVariant -> { + assertEquals(new StructuralVariation(86, 150, null, null, null, "AAAA", "CCCC", null, null), normalizedVariant.getSv()); + // Normalize CNV alternate + assertEquals("", normalizedVariant.getAlternate()); + assertEquals("1:86<100<150-50<100<111:C:AAAA...CCCC", normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getVariantId()); + assertEquals("1:86<101<150:-:AAAA...CCCC", normalizedVariant.toString()); + assertEquals(0, normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getAlleleIndex().intValue()); + }); + } + + @Test + public void testDUPTANDEMNormalization() throws Exception { + Variant variant = newVariantBuilder(100, 200, "C", Collections.singletonList(""), "2") + .addFileData("CIPOS", "-14,50") + .addFileData("CIEND", "-50,11") + .addSample("HG00096", "0|0") + .build(); + normalizeOne(variant, normalizedVariant -> { + assertEquals(new StructuralVariation(86, 150, 150, 211, null, null, null, StructuralVariantType.TANDEM_DUPLICATION, null), normalizedVariant.getSv()); + // Normalize CNV alternate + assertEquals("", normalizedVariant.getAlternate()); + assertEquals("1:86<100<150-150<200<211:C:", normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getVariantId()); + assertEquals("1:86<101<150-150<200<211:-:", normalizedVariant.toString()); + assertEquals(0, normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getAlleleIndex().intValue()); + }); + } + + public void normalizeUnmodified(Variant variant) throws NonStandardCompliantSampleField { + normalizer.setGenerateReferenceBlocks(false); + + int hashCode = variant.hashCode(); + List list = normalizer.normalize(Collections.singletonList(variant), false); + assertEquals(1, list.size()); + Variant normVar = list.get(0); + + assertEquals(variant.toString(), normVar.toString()); + assertEquals("Ensure input variant is not modified", hashCode, variant.hashCode()); + assertEquals("Ensure norm variant is not modified", hashCode, normVar.hashCode()); + + list = normalizer.normalize(Collections.singletonList(variant), true); + assertEquals(1, list.size()); + normVar = list.get(0); + + assertEquals(variant.toString(), normVar.toString()); + assertEquals("Ensure input variant is not modified", hashCode, variant.hashCode()); + assertEquals("Ensure norm variant is not modified", hashCode, normVar.hashCode()); + + } + + public void normalizeOne(Variant variant, Consumer consumer) throws NonStandardCompliantSampleField { + normalizer.setGenerateReferenceBlocks(false); + + int hashCode = variant.hashCode(); + List list = normalizer.normalize(Collections.singletonList(variant), false); + assertEquals(1, list.size()); + consumer.accept(list.get(0)); + + int hashCode2 = variant.hashCode(); + + // Check that the original variant has not been modified, and check again, but reusing the input variant + assertEquals("Ensure input variant is not modified", hashCode, hashCode2); + + + list = normalizer.normalize(Collections.singletonList(variant), true); + assertEquals(1, list.size()); + assertSame(variant, list.get(0)); + consumer.accept(variant); + consumer.accept(list.get(0)); + + int hashCode3 = variant.hashCode(); + assertNotEquals(hashCode3, hashCode); } @Test @@ -670,23 +793,40 @@ public void testVNCNormalizationMultiallelic() throws NonStandardCompliantSample @Test public void testCNVsNormalizationCopyNumber() throws NonStandardCompliantSampleField { Variant variant; - List normalizedVariantList; variant = newVariantBuilder(100, 200, "C", Arrays.asList(""), "2") .setSampleDataKeys("GT", "CN") .addSample("HG00096", "0|1","3") .build(); - normalizedVariantList = normalizer.normalize(Collections.singletonList(variant), true); - assertEquals(1, normalizedVariantList.size()); - Variant normalizedVariant = normalizedVariantList.get(0); - assertEquals(new StructuralVariation(null, null, null, null, 3, null, null, - StructuralVariantType.COPY_NUMBER_GAIN, null), normalizedVariant.getSv()); - // Normalize CNV alternate - assertEquals("", normalizedVariant.getAlternate()); - assertEquals(101, normalizedVariant.getStart().intValue()); - assertEquals("", normalizedVariant.getReference()); - assertEquals("1:100-200:C:", normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getVariantId()); - assertEquals(0, normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getAlleleIndex().intValue()); + normalizeOne(variant, normalizedVariant->{ + assertEquals(new StructuralVariation(null, null, null, null, 3, null, null, + StructuralVariantType.COPY_NUMBER_GAIN, null), normalizedVariant.getSv()); + // Normalize CNV alternate + assertEquals("", normalizedVariant.getAlternate()); + assertEquals(101, normalizedVariant.getStart().intValue()); + assertEquals("", normalizedVariant.getReference()); + assertEquals("1:100-200:C:", normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getVariantId()); + assertEquals(0, normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getAlleleIndex().intValue()); + }); + } + @Test + public void testCNVsNormalizationCopyNumberWithCipos() throws NonStandardCompliantSampleField { + Variant variant; + variant = newVariantBuilder(100, 200, "C", Arrays.asList(""), "2") + .addFileData("CIPOS", "-10,50") + .setSampleDataKeys("GT", "CN") + .addSample("HG00096", "0|1","3") + .build(); + normalizeOne(variant, normalizedVariant->{ + assertEquals(new StructuralVariation(90, 150, null, null, 3, null, null, + StructuralVariantType.COPY_NUMBER_GAIN, null), normalizedVariant.getSv()); + // Normalize CNV alternate + assertEquals("", normalizedVariant.getAlternate()); + assertEquals(101, normalizedVariant.getStart().intValue()); + assertEquals("", normalizedVariant.getReference()); + assertEquals("1:90<100<150-200:C:", normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getVariantId()); + assertEquals(0, normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getAlleleIndex().intValue()); + }); } @Test @@ -725,38 +865,33 @@ public void testNormalizeSV() throws NonStandardCompliantSampleField { @Test public void testNormalizeDEL() throws NonStandardCompliantSampleField { - Variant variant = newVariant(100, 200, "N", Collections.singletonList(""), STUDY_ID); - List normalized = normalizer.normalize(Collections.singletonList(variant), false); - - assertEquals(1, normalized.size()); - assertEquals(101, normalized.get(0).getStart().intValue()); - assertEquals(200, normalized.get(0).getEnd().intValue()); - assertEquals(new StructuralVariation(), normalized.get(0).getSv()); - System.out.println(normalized.get(0).toJson()); + normalizeOne(variant, normalized -> { + assertEquals(101, normalized.getStart().intValue()); + assertEquals(200, normalized.getEnd().intValue()); + assertEquals(new StructuralVariation(), normalized.getSv()); +// System.out.println(normalized.toJson()); + }); } @Test public void testNormalizeINS() throws NonStandardCompliantSampleField { - String seq = "ACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTG"; Variant variant = newVariantBuilder(100, 100, "N", Collections.singletonList(""), STUDY_ID) .addFileData("SVINSSEQ", seq) .build(); - List list = new VariantNormalizer().normalize(Collections.singletonList(variant), false); - - assertEquals(1, list.size()); - Variant normalized = list.get(0); - assertEquals(101, normalized.getStart().intValue()); - assertEquals(100, normalized.getEnd().intValue()); - assertEquals(seq.length(), normalized.getLength().intValue()); - assertEquals(seq.length(), normalized.getLengthAlternate().intValue()); - assertEquals(0, normalized.getLengthReference().intValue()); - assertEquals("", normalized.getReference()); - assertEquals(seq, normalized.getAlternate()); - assertEquals(new StructuralVariation(), normalized.getSv()); - assertEquals("1:100-100:N:", normalized.getStudies().get(0).getFiles().get(0).getCall().getVariantId()); - assertEquals(0, normalized.getStudies().get(0).getFiles().get(0).getCall().getAlleleIndex().intValue()); + normalizeOne(variant, normalizedVariant -> { + assertEquals(101, normalizedVariant.getStart().intValue()); + assertEquals(100, normalizedVariant.getEnd().intValue()); + assertEquals(seq.length(), normalizedVariant.getLength().intValue()); + assertEquals(seq.length(), normalizedVariant.getLengthAlternate().intValue()); + assertEquals(0, normalizedVariant.getLengthReference().intValue()); + assertEquals("", normalizedVariant.getReference()); + assertEquals(seq, normalizedVariant.getAlternate()); + assertEquals(new StructuralVariation(), normalizedVariant.getSv()); + assertEquals("1:100-100:N:", normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getVariantId()); + assertEquals(0, normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getAlleleIndex().intValue()); + }); } @Test @@ -768,66 +903,60 @@ public void testNormalizeSvToIndel() throws NonStandardCompliantSampleField { assertEquals(Variant.SV_THRESHOLD + 1, variant.getLengthAlternate().intValue()); assertNotNull(variant.getSv()); - Variant normVar = new VariantNormalizer().normalize(Collections.singletonList(variant), false).get(0); - assertEquals(VariantType.INDEL, normVar.getType()); - assertEquals(Variant.SV_THRESHOLD, normVar.getLengthAlternate().intValue()); - assertNull(normVar.getSv()); - - // Check that the original variant has not been modified, and check again, but reusing the input variant - assertEquals(VariantType.INSERTION, variant.getType()); - assertEquals(Variant.SV_THRESHOLD + 1, variant.getLengthAlternate().intValue()); - assertNotNull(variant.getSv()); - Variant normVarReuse = new VariantNormalizer().normalize(Collections.singletonList(variant), true).get(0); - assertEquals(VariantType.INDEL, normVarReuse.getType()); - assertEquals(Variant.SV_THRESHOLD, normVarReuse.getLengthAlternate().intValue()); - assertNull(normVarReuse.getSv()); - + normalizeOne(variant, normVar -> { + assertEquals(VariantType.INDEL, normVar.getType()); + assertEquals(Variant.SV_THRESHOLD, normVar.getLengthAlternate().intValue()); + assertNull(normVar.getSv()); + }); } @Test public void testNormalizeWithInsSeq() throws NonStandardCompliantSampleField { Variant variant = new Variant("1:799984<800001<800022:-:ACCACACCCACACAACACACA...TGTGGTGTGTGTGGTGTG"); - Variant normVar = new VariantNormalizer().normalize(Collections.singletonList(variant), false).get(0); - assertEquals(variant, normVar); - assertEquals(variant.toString(), normVar.toString()); + normalizeUnmodified(variant); } @Test public void testNormalizeBND() throws NonStandardCompliantSampleField { - normalizeBnd(newVariant(101, 100, "", ".[9:10["), newVariant(100, 99, "A", "A[chr9:10[")); - normalizeBnd(newVariant(100, 99, "", "[22:10[."), newVariant(100, 99, "A", "[chr22:10[A")); - normalizeBnd(newVariant(101, 100, "", ".]9:10]"), newVariant(100, 99, "A", "A]chr9:10]")); - normalizeBnd(newVariant(100, 99, "", "]22:10]."), newVariant(100, 99, "A", "]chr22:10]A")); - normalizeBnd(newVariant(100, 99, "", "]22:10]NNN"), newVariant(100, 99, "A", "]chr22:10]NNNA")); + normalizeBnd(newVariant(101, 100, "", ".[9:10["), newVariant(100, null, "A", "A[chr9:10[")); + normalizeBnd(newVariant(100, 99, "", "[22:10[."), newVariant(100, null, "A", "[chr22:10[A")); + normalizeBnd(newVariant(101, 100, "", ".]9:10]"), newVariant(100, null, "A", "A]chr9:10]")); + normalizeBnd(newVariant(100, 99, "", "]22:10]."), newVariant(100, null, "A", "]chr22:10]A")); + normalizeBnd(newVariant(100, 99, "", "]22:10]NNN"), newVariant(100, null, "A", "]chr22:10]NNNA")); - normalizeBnd(newVariant(100, 99, "", "[1:10[T"), newVariant(100, 99, "A", "[1:10[TA")); - normalizeBnd(newVariant(100, 99, "", "[1:10[T"), newVariant(100, 99, "AC", "[1:10[TAC")); + normalizeBnd(newVariant(100, 99, "", "[1:10[T"), newVariant(100, null, "A", "[1:10[TA")); + normalizeBnd(newVariant(100, 99, "", "[1:10[T"), newVariant(100, null, "AC", "[1:10[TAC")); - normalizeBnd(newVariant(100, 99, "TAC", "[1:10[AC"), newVariant(100, 99, "TAC", "[1:10[AC")); - normalizeBnd(newVariant(100, 99, "TAC", "TA[1:10["), newVariant(100, 99, "TAC", "TA[1:10[")); + normalizeBnd(newVariant(100, 99, "TAC", "[1:10[AC"), newVariant(100, null, "TAC", "[1:10[AC")); + normalizeBnd(newVariant(100, 99, "TAC", "TA[1:10["), newVariant(100, null, "TAC", "TA[1:10[")); + + normalizeBnd(newVariantBuilder(101, 100, "", ".[9:10[", "s1").setCiStart(95, 105).build(), + newVariantBuilder(100, null, "A", "A[chr9:10[", "s1").setCiStart(95,105).setCiEnd(95,105).build()); } private void normalizeBnd(Variant expectedVariant, Variant variant) throws NonStandardCompliantSampleField { - System.out.println("---"); +// System.out.println("---"); boolean expectsNormalization = !expectedVariant.equals(variant); - System.out.println(" - Actual"); - System.out.println(" " + variant.toString()); - System.out.println(" " + variant.toJson()); - System.out.println(" - Expected"); - System.out.println(" " + expectedVariant.toString()); - System.out.println(" " + expectedVariant.toJson()); - System.out.println(" - Normalized (same = " + !expectsNormalization + ")"); - List normalized = normalizer.normalize(Collections.singletonList(variant), false); - - for (Variant v : normalized) { - System.out.println(" " + v.toString()); - System.out.println(" " + v.toJson()); - if (expectsNormalization) { - assertNotNull(v.getStudies().get(0).getFiles().get(0).getCall()); - v.getStudies().get(0).getFiles().get(0).setCall(null); - } - assertEquals(expectedVariant, v); +// System.out.println(" - Actual"); +// System.out.println(" " + variant.toString()); +// System.out.println(" " + variant.toJson()); +// System.out.println(" - Expected"); +// System.out.println(" " + expectedVariant.toString()); +// System.out.println(" " + expectedVariant.toJson()); +// System.out.println(" - Normalized (same = " + !expectsNormalization + ")"); + if (expectsNormalization) { + normalizeOne(variant, normVar -> { + System.out.println(" " + normVar.toString()); + System.out.println(" " + normVar.toJson()); + OriginalCall call = normVar.getStudies().get(0).getFiles().get(0).getCall(); + assertNotNull(call); + normVar.getStudies().get(0).getFiles().get(0).setCall(null); + assertEquals(expectedVariant, normVar); + normVar.getStudies().get(0).getFiles().get(0).setCall(call); + }); + } else { + normalizeUnmodified(variant); } } From a6abc515055de2f42805197c0859852441b91317 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Wed, 17 Jul 2024 11:03:56 +0100 Subject: [PATCH 11/21] tools: Remove sv.ciEnd from INSERTION and BREAKEND variants. #TASK-6558 --- .../biodata/tools/variant/VariantNormalizer.java | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/VariantNormalizer.java b/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/VariantNormalizer.java index 1b8992c0..e902ce99 100644 --- a/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/VariantNormalizer.java +++ b/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/VariantNormalizer.java @@ -642,8 +642,18 @@ public List normalizeSymbolic(final Integer start, final Integ // CI positions may change during the normalization. Update them. normalizedSv.setCiStartLeft(sv.getCiStartLeft()); normalizedSv.setCiStartRight(sv.getCiStartRight()); - normalizedSv.setCiEndLeft(sv.getCiEndLeft()); - normalizedSv.setCiEndRight(sv.getCiEndRight()); + + // Structural variants that affect a single point (INSERTIONS or Breakends) should not have CIEND. + // At this point, we're removing the CIEND from the normalized variant. + // Do not remove the value from the INFO field (if any). + // The END is the same as the start (which, in base-1 means that "end == start -1" , so "end < start") + if (keyFields.getEnd() < keyFields.getStart()) { + normalizedSv.setCiEndLeft(null); + normalizedSv.setCiEndRight(null); + } else { + normalizedSv.setCiEndLeft(sv.getCiEndLeft()); + normalizedSv.setCiEndRight(sv.getCiEndRight()); + } normalizedSv.setLeftSvInsSeq(sv.getLeftSvInsSeq()); normalizedSv.setRightSvInsSeq(sv.getRightSvInsSeq()); From 58bee081bda9506b26f224aad484ad8ad4c38a8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Thu, 18 Jul 2024 12:38:37 +0100 Subject: [PATCH 12/21] tools: Normalize sv for non-symbolic variants. #TASK-6558 --- .../tools/variant/VariantNormalizer.java | 160 +++++++++++------- .../tools/variant/VariantNormalizerTest.java | 26 +++ .../variant/merge/VariantMergerTest.java | 3 +- 3 files changed, 126 insertions(+), 63 deletions(-) diff --git a/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/VariantNormalizer.java b/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/VariantNormalizer.java index e902ce99..3e16977f 100644 --- a/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/VariantNormalizer.java +++ b/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/VariantNormalizer.java @@ -301,19 +301,16 @@ public List normalize(List batch, boolean reuse) throws NonSta normalizedVariants.add(variant); continue; } - String reference = variant.getReference(); //Save original values, as they can be changed + //Save original values, as they can be changed + String reference = variant.getReference(); String alternate = variant.getAlternate(); Integer start = variant.getStart(); Integer end = variant.getEnd(); String chromosome = variant.getChromosome(); if (variant.getStudies() == null || variant.getStudies().isEmpty()) { - List keyFieldsList; - if (isSymbolic(variant)) { - keyFieldsList = normalizeSymbolic(start, end, reference, alternate, variant.getSv()); - } else { - keyFieldsList = normalize(chromosome, start, reference, alternate); - } + List keyFieldsList = normalizeAlleles(variant); + // Iterate keyFields sorting by position, so the generated variants are ordered. Do not modify original order! for (VariantKeyFields keyFields : sortByPosition(keyFieldsList)) { OriginalCall call = new OriginalCall(variant.toString(), keyFields.getNumAllele()); @@ -331,25 +328,16 @@ public List normalize(List batch, boolean reuse) throws NonSta normalizedVariants.add(normalizedVariant); } } else { - for (StudyEntry entry : variant.getStudies()) { - List originalAlternates = new ArrayList<>(1 + entry.getSecondaryAlternates().size()); - List alternates = new ArrayList<>(1 + entry.getSecondaryAlternates().size()); - alternates.add(alternate); - originalAlternates.add(alternate); - for (String secondaryAlternatesAllele : entry.getSecondaryAlternatesAlleles()) { - alternates.add(secondaryAlternatesAllele); - originalAlternates.add(secondaryAlternatesAllele); - } + if (variant.getStudies().size() != 1) { + throw new IllegalStateException("Only one study per variant is supported when normalizing variants. Found " + + variant.getStudies().size() + " studies. Variant: " + variant); + } else { + StudyEntry entry = variant.getStudies().get(0); + List alternates = getAllAlternates(variant); // FIXME: assumes there wont be multinucleotide positions with CNVs and short variants mixed - List keyFieldsList; - List originalKeyFieldsList; - if (isSymbolic(variant)) { - keyFieldsList = normalizeSymbolic(start, end, reference, alternates, variant.getSv()); - } else { - keyFieldsList = normalize(chromosome, start, reference, alternates); - } - originalKeyFieldsList = keyFieldsList + List keyFieldsList = normalizeAlleles(variant); + List originalKeyFieldsList = keyFieldsList .stream() .filter(k -> !k.isReferenceBlock()) .map(k -> k.originalKeyFields) @@ -372,8 +360,8 @@ public List normalize(List batch, boolean reuse) throws NonSta originalCall = entry.getFiles().get(0).getCall().getVariantId(); } else { StringBuilder sb = new StringBuilder(variant.toString()); - for (int i = 1; i < originalAlternates.size(); i++) { - sb.append(",").append(originalAlternates.get(i)); + for (int i = 1; i < alternates.size(); i++) { + sb.append(",").append(alternates.get(i)); } originalCall = sb.toString(); } @@ -600,17 +588,54 @@ private Collection sortByPosition(List keyFi // } // } + protected List normalizeAlleles(Variant variant) { + List alternates = getAllAlternates(variant); + + List keyFieldsList; + if (isSymbolic(variant)) { + keyFieldsList = normalizeSymbolic(variant.getStart(), variant.getEnd(), variant.getReference(), alternates, variant.getSv()); + } else { + keyFieldsList = normalize(variant.getChromosome(), variant.getStart(), variant.getReference(), alternates, variant.getSv()); + } + return keyFieldsList; + } + + private static List getAllAlternates(Variant variant) { + List alternates; + if (variant.getStudies() != null && !variant.getStudies().isEmpty()) { + StudyEntry entry = variant.getStudies().get(0); + String alternate = variant.getAlternate(); + alternates = new ArrayList<>(1 + entry.getSecondaryAlternates().size()); + alternates.add(alternate); + for (AlternateCoordinate secondaryAlternate : entry.getSecondaryAlternates()) { + if (secondaryAlternate.getStart() != null && !secondaryAlternate.getStart().equals(variant.getStart())) { + throw new IllegalStateException("Unable to normalize variant where secondary alternates do not start at the same position. " + + "Variant: " + variant + " , secondaryAlternate: " + secondaryAlternate); + } + if (secondaryAlternate.getEnd() != null && !secondaryAlternate.getEnd().equals(variant.getEnd())) { + throw new IllegalStateException("Unable to normalize variant where secondary alternates do not end at the same position. " + + "Variant: " + variant + " (end=" + variant.getEnd() + ") , secondaryAlternate: " + secondaryAlternate); + } + alternates.add(secondaryAlternate.getAlternate()); + } + } else { + alternates = Collections.singletonList(variant.getAlternate()); + } + return Collections.unmodifiableList(alternates); + } + + @Deprecated // Test purposes only public List normalizeSymbolic(Integer start, Integer end, String reference, String alternate, StructuralVariation sv) { return normalizeSymbolic(start, end, reference, Collections.singletonList(alternate), sv); } - @Deprecated + @Deprecated // Test purposes only public List normalizeSymbolic(final Integer start, final Integer end, final String reference, final List alternates) { return normalizeSymbolic(start, end, reference, alternates, null); } - public List normalizeSymbolic(final Integer start, final Integer end, final String reference, + protected List normalizeSymbolic(final Integer start, final Integer end, final String reference, final List alternates, StructuralVariation sv) { List list = new ArrayList<>(alternates.size()); @@ -634,37 +659,7 @@ public List normalizeSymbolic(final Integer start, final Integ keyFields.getSv().setType(StructuralVariantType.TANDEM_DUPLICATION); } - if (sv != null) { - StructuralVariation normalizedSv = keyFields.getSv(); - if (normalizedSv == null) { - normalizedSv = new StructuralVariation(); - } - // CI positions may change during the normalization. Update them. - normalizedSv.setCiStartLeft(sv.getCiStartLeft()); - normalizedSv.setCiStartRight(sv.getCiStartRight()); - - // Structural variants that affect a single point (INSERTIONS or Breakends) should not have CIEND. - // At this point, we're removing the CIEND from the normalized variant. - // Do not remove the value from the INFO field (if any). - // The END is the same as the start (which, in base-1 means that "end == start -1" , so "end < start") - if (keyFields.getEnd() < keyFields.getStart()) { - normalizedSv.setCiEndLeft(null); - normalizedSv.setCiEndRight(null); - } else { - normalizedSv.setCiEndLeft(sv.getCiEndLeft()); - normalizedSv.setCiEndRight(sv.getCiEndRight()); - } - normalizedSv.setLeftSvInsSeq(sv.getLeftSvInsSeq()); - normalizedSv.setRightSvInsSeq(sv.getRightSvInsSeq()); - - if (keyFields.getSv() == null) { - if (normalizedSv.getCiStartLeft() != null || normalizedSv.getCiStartRight() != null - || normalizedSv.getCiEndLeft() != null || normalizedSv.getCiEndRight() != null - || normalizedSv.getLeftSvInsSeq() != null || normalizedSv.getRightSvInsSeq() != null) { - keyFields.setSv(normalizedSv); - } - } - } + normalizeSvField(sv, keyFields); list.add(keyFields); } @@ -672,6 +667,40 @@ public List normalizeSymbolic(final Integer start, final Integ return list; } + private static void normalizeSvField(StructuralVariation sv, VariantKeyFields keyFields) { + if (sv != null) { + StructuralVariation normalizedSv = keyFields.getSv(); + if (normalizedSv == null) { + normalizedSv = new StructuralVariation(); + } + // CI positions may change during the normalization. Update them. + normalizedSv.setCiStartLeft(sv.getCiStartLeft()); + normalizedSv.setCiStartRight(sv.getCiStartRight()); + + // Structural variants that affect a single point (INSERTIONS or Breakends) should not have CIEND. + // At this point, we're removing the CIEND from the normalized variant. + // Do not remove the value from the INFO field (if any). + // The END is the same as the start (which, in base-1 means that "end == start -1" , so "end < start") + if (keyFields.getEnd() < keyFields.getStart()) { + normalizedSv.setCiEndLeft(null); + normalizedSv.setCiEndRight(null); + } else { + normalizedSv.setCiEndLeft(sv.getCiEndLeft()); + normalizedSv.setCiEndRight(sv.getCiEndRight()); + } + normalizedSv.setLeftSvInsSeq(sv.getLeftSvInsSeq()); + normalizedSv.setRightSvInsSeq(sv.getRightSvInsSeq()); + + if (keyFields.getSv() == null) { + if (normalizedSv.getCiStartLeft() != null || normalizedSv.getCiStartRight() != null + || normalizedSv.getCiEndLeft() != null || normalizedSv.getCiEndRight() != null + || normalizedSv.getLeftSvInsSeq() != null || normalizedSv.getRightSvInsSeq() != null) { + keyFields.setSv(normalizedSv); + } + } + } + } + private boolean isNonRef(String alternate) { return alternate.equals(Allele.NO_CALL_STRING) || alternate.equals(VariantBuilder.NON_REF_ALT) @@ -780,12 +809,17 @@ private VariantKeyFields normalizeSymbolic( } + @Deprecated // Test purposes only public List normalize(String chromosome, int position, String reference, String alternate) { - return normalize(chromosome, position, reference, Collections.singletonList(alternate)); + return normalize(chromosome, position, reference, Collections.singletonList(alternate), null); } - public List normalize(String chromosome, int position, String reference, List alternates) - { + @Deprecated // Test purposes only + public List normalize(String chromosome, int position, String reference, List alternates) { + return normalize(chromosome, position, reference, alternates, null); + } + + protected List normalize(String chromosome, int position, String reference, List alternates, StructuralVariation sv) { List list = new ArrayList<>(alternates.size()); int numAllelesIdx = 0; // This index is necessary for getting the samples where the mutated allele is present @@ -829,6 +863,8 @@ public List normalize(String chromosome, int position, String } } + normalizeSvField(sv, keyFields); + if (keyFields != null) { // To deal with cases such as A>GT diff --git a/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/VariantNormalizerTest.java b/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/VariantNormalizerTest.java index 4253d940..a1faf486 100644 --- a/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/VariantNormalizerTest.java +++ b/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/VariantNormalizerTest.java @@ -684,6 +684,32 @@ public void testINSsNormalizationWithCIEND() throws Exception { }); } + @Test + public void testNormalizeNonSymbolicInsertion() throws Exception { + Variant variant = newVariantBuilder(100, null, "C", Collections.singletonList("CAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"), "2") + .addFileData("CIPOS", "-14,50") + .addFileData("CIEND", "-50,11") + .addSample("HG00096", "0|0") + .build(); + + normalizeOne(variant, normalizedVariant -> { + assertEquals(new StructuralVariation(86, 150, null, null, null, null, null, null, null), normalizedVariant.getSv()); + }); + } + + @Test + public void testNormalizeNonSymbolicDeletion() throws Exception { + Variant variant = newVariantBuilder(100, null, "CAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", "C", "2") + .addFileData("CIPOS", "-14,50") + .addFileData("CIEND", "-1,1") + .addSample("HG00096", "0|1") + .build(); + + normalizeOne(variant, normalizedVariant -> { + assertEquals(new StructuralVariation(86, 150, 179, 181, null, null, null, null, null), normalizedVariant.getSv()); + }); + } + @Test public void testDUPTANDEMNormalization() throws Exception { Variant variant = newVariantBuilder(100, 200, "C", Collections.singletonList(""), "2") diff --git a/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/merge/VariantMergerTest.java b/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/merge/VariantMergerTest.java index 46ab5800..07533ab5 100644 --- a/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/merge/VariantMergerTest.java +++ b/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/merge/VariantMergerTest.java @@ -498,7 +498,8 @@ public void testMergeIndelCase1() throws NonStandardCompliantSampleField { Variant v1 = VariantTestUtils.generateVariantWithFormat("1:328:CTT:C", VCFConstants.GENOTYPE_KEY + "," + VCFConstants.GENOTYPE_FILTER_KEY, "S1", "1/2","PASS"); - v1.getStudies().get(0).getSecondaryAlternates().add(new AlternateCoordinate(null,null,331,"CTT", "CTTTC", VariantType.INDEL)); + + v1.getStudies().get(0).getSecondaryAlternates().add(new AlternateCoordinate(null, null, 330, "CTT", "CTTTC", VariantType.INDEL)); Variant v2 = VariantTestUtils.generateVariantWithFormat("1:331:T:TCT", VCFConstants.GENOTYPE_KEY + "," + VCFConstants.GENOTYPE_FILTER_KEY, From da73175ec437c88a62a572c4960c572272acf306 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Tue, 6 Aug 2024 17:35:52 +0200 Subject: [PATCH 13/21] Prepare Port Patch Cellbase 2.12.2 -> 3.2.1 #TASK-6647 --- biodata-external/pom.xml | 2 +- biodata-formats/pom.xml | 2 +- biodata-models/pom.xml | 2 +- biodata-tools/pom.xml | 2 +- pom.xml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/biodata-external/pom.xml b/biodata-external/pom.xml index 3a98861e..bb85a556 100644 --- a/biodata-external/pom.xml +++ b/biodata-external/pom.xml @@ -6,7 +6,7 @@ biodata org.opencb.biodata - 2.12.2 + 3.2.1-SNAPSHOT ../pom.xml diff --git a/biodata-formats/pom.xml b/biodata-formats/pom.xml index fe80d521..ce1f093f 100644 --- a/biodata-formats/pom.xml +++ b/biodata-formats/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 2.12.2 + 3.2.1-SNAPSHOT ../pom.xml diff --git a/biodata-models/pom.xml b/biodata-models/pom.xml index 79a1954e..b10170a1 100644 --- a/biodata-models/pom.xml +++ b/biodata-models/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 2.12.2 + 3.2.1-SNAPSHOT ../pom.xml diff --git a/biodata-tools/pom.xml b/biodata-tools/pom.xml index 45d41e63..9ccec982 100644 --- a/biodata-tools/pom.xml +++ b/biodata-tools/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 2.12.2 + 3.2.1-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index a051029f..bc1643f8 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 2.12.2 + 3.2.1-SNAPSHOT pom Biodata From 009756479fc77d9dfcad87422877622e9b5aa365 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Mon, 2 Sep 2024 16:10:28 +0200 Subject: [PATCH 14/21] Prepare release 2.12.3 --- biodata-external/pom.xml | 2 +- biodata-formats/pom.xml | 2 +- biodata-models/pom.xml | 2 +- biodata-tools/pom.xml | 2 +- pom.xml | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/biodata-external/pom.xml b/biodata-external/pom.xml index 3c1b2425..748ae79f 100644 --- a/biodata-external/pom.xml +++ b/biodata-external/pom.xml @@ -6,7 +6,7 @@ biodata org.opencb.biodata - 2.12.3-SNAPSHOT + 2.12.3 ../pom.xml diff --git a/biodata-formats/pom.xml b/biodata-formats/pom.xml index 24182d67..93a56dcc 100644 --- a/biodata-formats/pom.xml +++ b/biodata-formats/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 2.12.3-SNAPSHOT + 2.12.3 ../pom.xml diff --git a/biodata-models/pom.xml b/biodata-models/pom.xml index 1eb260d6..a0862b06 100644 --- a/biodata-models/pom.xml +++ b/biodata-models/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 2.12.3-SNAPSHOT + 2.12.3 ../pom.xml diff --git a/biodata-tools/pom.xml b/biodata-tools/pom.xml index fdb21cbf..7d496042 100644 --- a/biodata-tools/pom.xml +++ b/biodata-tools/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 2.12.3-SNAPSHOT + 2.12.3 ../pom.xml diff --git a/pom.xml b/pom.xml index 6166063d..425f4543 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 2.12.3-SNAPSHOT + 2.12.3 pom Biodata @@ -38,7 +38,7 @@ - 4.12.1-SNAPSHOT + 4.12.0 2.11.4 4.4 1.7.7 From ded97e0c2065368fa4974954e70abc0778472e09 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Tue, 3 Sep 2024 16:16:33 +0200 Subject: [PATCH 15/21] preparing Port Patch 1.10.7 -> 2.2.1 --- biodata-external/pom.xml | 2 +- biodata-formats/pom.xml | 2 +- biodata-models/pom.xml | 2 +- biodata-tools/pom.xml | 2 +- pom.xml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/biodata-external/pom.xml b/biodata-external/pom.xml index 748ae79f..bb85a556 100644 --- a/biodata-external/pom.xml +++ b/biodata-external/pom.xml @@ -6,7 +6,7 @@ biodata org.opencb.biodata - 2.12.3 + 3.2.1-SNAPSHOT ../pom.xml diff --git a/biodata-formats/pom.xml b/biodata-formats/pom.xml index 93a56dcc..ce1f093f 100644 --- a/biodata-formats/pom.xml +++ b/biodata-formats/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 2.12.3 + 3.2.1-SNAPSHOT ../pom.xml diff --git a/biodata-models/pom.xml b/biodata-models/pom.xml index a0862b06..b10170a1 100644 --- a/biodata-models/pom.xml +++ b/biodata-models/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 2.12.3 + 3.2.1-SNAPSHOT ../pom.xml diff --git a/biodata-tools/pom.xml b/biodata-tools/pom.xml index 7d496042..fb5dbfa4 100644 --- a/biodata-tools/pom.xml +++ b/biodata-tools/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 2.12.3 + 3.2.1-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index 425f4543..9848510f 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 2.12.3 + 3.2.1-SNAPSHOT pom Biodata From cc133b9eeaad60eb035cb381959e636c02a46bfd Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Wed, 4 Sep 2024 18:31:05 +0200 Subject: [PATCH 16/21] cicd: Upload reference to develop branch in pull-request-approve to test-xetabase-workflow #TASK-6807 --- .github/workflows/pull-request-approved.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pull-request-approved.yml b/.github/workflows/pull-request-approved.yml index a0d481fa..23709720 100644 --- a/.github/workflows/pull-request-approved.yml +++ b/.github/workflows/pull-request-approved.yml @@ -27,7 +27,7 @@ jobs: test: name: "Run all tests before merging" needs: calculate-xetabase-branch - uses: opencb/java-common-libs/.github/workflows/test-xetabase-workflow.yml@TASK-6399 + uses: opencb/java-common-libs/.github/workflows/test-xetabase-workflow.yml@develop with: branch: ${{ needs.calculate-xetabase-branch.outputs.xetabase_branch }} task: ${{ github.event.pull_request.head.ref }} From b1d29ec0434f23b587d151b60c149257af1d4a58 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Mon, 9 Sep 2024 16:37:16 +0200 Subject: [PATCH 17/21] cicd: Fix xetabase branch calculation #TASK-6807 --- .github/workflows/pull-request-approved.yml | 7 ++++++- .github/workflows/scripts/get-xetabase-branch.sh | 6 +++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/.github/workflows/pull-request-approved.yml b/.github/workflows/pull-request-approved.yml index 23709720..e4535a19 100644 --- a/.github/workflows/pull-request-approved.yml +++ b/.github/workflows/pull-request-approved.yml @@ -20,9 +20,14 @@ jobs: name: "Get current branch for Xetabase from target branch" run: | chmod +x ./.github/workflows/scripts/get-xetabase-branch.sh - xetabase_branch=$(./.github/workflows/scripts/get-xetabase-branch.sh ${{ github.event.pull_request.base.ref }}) + echo "github.event.pull_request.base.ref: ${{ github.event.pull_request.base.ref }}" + echo "github.event.pull_request.head.ref: ${{ github.event.pull_request.head.ref }}" + echo "secrets.ZETTA_REPO_ACCESS_TOKEN: ${{ secrets.ZETTA_REPO_ACCESS_TOKEN }}" + xetabase_branch=$(./.github/workflows/scripts/get-xetabase-branch.sh ${{ github.event.pull_request.head.ref }}) echo "__Xetabase ref:__ \"${xetabase_branch}\"" | tee -a ${GITHUB_STEP_SUMMARY} echo "xetabase_branch=${xetabase_branch}" >> $GITHUB_OUTPUT + env: + ZETTA_REPO_ACCESS_TOKEN: ${{ secrets.ZETTA_REPO_ACCESS_TOKEN }} test: name: "Run all tests before merging" diff --git a/.github/workflows/scripts/get-xetabase-branch.sh b/.github/workflows/scripts/get-xetabase-branch.sh index e971f990..a1eb7e52 100644 --- a/.github/workflows/scripts/get-xetabase-branch.sh +++ b/.github/workflows/scripts/get-xetabase-branch.sh @@ -7,9 +7,9 @@ get_xetabase_branch() { # If the branch begins with 'TASK' and exists in the opencga-enterprise repository, I return it if [[ $input_branch == TASK* ]]; then - if [ "$(git ls-remote https://github.com/zetta-genomics/opencga-enterprise.git "$input_branch" )" ] ; then - echo "$GIT_BRANCH"; - exit 0; + if [ "$(git ls-remote "https://$ZETTA_REPO_ACCESS_TOKEN@github.com/zetta-genomics/opencga-enterprise.git" "$input_branch" )" ] ; then + echo $input_branch; + return 0; fi fi From 80791e4ba74ce3265c70c24b1a5bf174503e205b Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Tue, 10 Sep 2024 09:10:16 +0200 Subject: [PATCH 18/21] cicd: Fix xetabase branch calculation #TASK-6807 --- .github/workflows/pull-request-approved.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/pull-request-approved.yml b/.github/workflows/pull-request-approved.yml index e4535a19..c2fe27a0 100644 --- a/.github/workflows/pull-request-approved.yml +++ b/.github/workflows/pull-request-approved.yml @@ -16,6 +16,8 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: '10' + ## This is important to avoid the error in the next step: "fatal: repository 'https://github.com/zetta-genomics/opencga-enterprise.git/' not found" + persist-credentials: false - id: get_xetabase_branch name: "Get current branch for Xetabase from target branch" run: | From 0a09df23c5d585621a78157aa415dfcec48691bf Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Tue, 10 Sep 2024 09:27:07 +0200 Subject: [PATCH 19/21] cicd: Fix xetabase branch calculation #TASK-6807 --- .github/workflows/pull-request-approved.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/pull-request-approved.yml b/.github/workflows/pull-request-approved.yml index c2fe27a0..d339f65b 100644 --- a/.github/workflows/pull-request-approved.yml +++ b/.github/workflows/pull-request-approved.yml @@ -24,7 +24,6 @@ jobs: chmod +x ./.github/workflows/scripts/get-xetabase-branch.sh echo "github.event.pull_request.base.ref: ${{ github.event.pull_request.base.ref }}" echo "github.event.pull_request.head.ref: ${{ github.event.pull_request.head.ref }}" - echo "secrets.ZETTA_REPO_ACCESS_TOKEN: ${{ secrets.ZETTA_REPO_ACCESS_TOKEN }}" xetabase_branch=$(./.github/workflows/scripts/get-xetabase-branch.sh ${{ github.event.pull_request.head.ref }}) echo "__Xetabase ref:__ \"${xetabase_branch}\"" | tee -a ${GITHUB_STEP_SUMMARY} echo "xetabase_branch=${xetabase_branch}" >> $GITHUB_OUTPUT From 014f1ca23ae6386f7e3f5f6baf18eec90f68b31e Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Fri, 13 Sep 2024 11:56:18 +0200 Subject: [PATCH 20/21] Prepare release 3.2.1 --- biodata-external/pom.xml | 2 +- biodata-formats/pom.xml | 2 +- biodata-models/pom.xml | 2 +- biodata-tools/pom.xml | 2 +- pom.xml | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/biodata-external/pom.xml b/biodata-external/pom.xml index bb85a556..dafe64c2 100644 --- a/biodata-external/pom.xml +++ b/biodata-external/pom.xml @@ -6,7 +6,7 @@ biodata org.opencb.biodata - 3.2.1-SNAPSHOT + 3.2.1 ../pom.xml diff --git a/biodata-formats/pom.xml b/biodata-formats/pom.xml index 238588ca..e071844f 100644 --- a/biodata-formats/pom.xml +++ b/biodata-formats/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 3.2.1-SNAPSHOT + 3.2.1 ../pom.xml diff --git a/biodata-models/pom.xml b/biodata-models/pom.xml index f9cf2628..f2311443 100644 --- a/biodata-models/pom.xml +++ b/biodata-models/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 3.2.1-SNAPSHOT + 3.2.1 ../pom.xml diff --git a/biodata-tools/pom.xml b/biodata-tools/pom.xml index c7c32361..a9a269a7 100644 --- a/biodata-tools/pom.xml +++ b/biodata-tools/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 3.2.1-SNAPSHOT + 3.2.1 ../pom.xml diff --git a/pom.xml b/pom.xml index 1a6c71bc..cd18672d 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 3.2.1-SNAPSHOT + 3.2.1 pom Biodata @@ -38,7 +38,7 @@ - 5.2.1-SNAPSHOT + 5.2.1 2.14.3 4.4 From 8fbe11a7c5942534927c6ceff5566bfea868462c Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Thu, 3 Oct 2024 13:13:08 +0200 Subject: [PATCH 21/21] Port Patch 3.2.1 -> 4.0.0 XB 2.2.1 -> 3.0.0 #TASK-6780 --- biodata-external/pom.xml | 2 +- biodata-formats/pom.xml | 2 +- biodata-models/pom.xml | 2 +- biodata-tools/pom.xml | 2 +- pom.xml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/biodata-external/pom.xml b/biodata-external/pom.xml index dafe64c2..c425be02 100644 --- a/biodata-external/pom.xml +++ b/biodata-external/pom.xml @@ -6,7 +6,7 @@ biodata org.opencb.biodata - 3.2.1 + 4.0.0-SNAPSHOT ../pom.xml diff --git a/biodata-formats/pom.xml b/biodata-formats/pom.xml index e071844f..e56df070 100644 --- a/biodata-formats/pom.xml +++ b/biodata-formats/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 3.2.1 + 4.0.0-SNAPSHOT ../pom.xml diff --git a/biodata-models/pom.xml b/biodata-models/pom.xml index f2311443..9d6b4ef7 100644 --- a/biodata-models/pom.xml +++ b/biodata-models/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 3.2.1 + 4.0.0-SNAPSHOT ../pom.xml diff --git a/biodata-tools/pom.xml b/biodata-tools/pom.xml index a9a269a7..8b5d54a4 100644 --- a/biodata-tools/pom.xml +++ b/biodata-tools/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 3.2.1 + 4.0.0-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index cd18672d..dce46162 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 3.2.1 + 4.0.0-SNAPSHOT pom Biodata