diff --git a/.gitignore b/.gitignore index 55ae73f..168622f 100644 --- a/.gitignore +++ b/.gitignore @@ -79,4 +79,4 @@ refget.egg-info/ *ipynb_checkpoints* *.egg-info* - +coverage.xml diff --git a/README.md b/README.md index 7d9660d..af64a0a 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,27 @@ # Refget -[![Build Status](https://travis-ci.com/refgenie/refget.svg?branch=master)](https://travis-ci.com/refgenie/refget) +![Run pytests](https://github.com/pepkit/looper/workflows/Run%20pytests/badge.svg) The refget package provides a Python interface to both remote and local use of the refget protocol. This package provides clients and functions for both refget sequences and refget sequence collections (seqcol). Documentation is hosted at [refgenie.org/refget](https://refgenie.org/refget/). + +## Testing + +### Local unit tests of refget package + +- `pytest` to test `refget` package, local unit tests + +### Compliance testing + +Under `/test_api` are compliance tests for a service implementing the sequence collections API. This will test your collection and comparison endpoints to make sure the comparison function is working. + +- `pytest test_api` to tests API compliance +- `pytest test_api --api_root http://127.0.0.1:8100` to customize the API root URL to test + +1. Load the fasta files from the `test_fasta` folder into your API database. +2. Run `pytest test_api --api_root `, pointing to your URL to test + + diff --git a/README_seqcolapi.md b/README_seqcolapi.md new file mode 100644 index 0000000..dc2178e --- /dev/null +++ b/README_seqcolapi.md @@ -0,0 +1,105 @@ +# seqcolapi + +This repository contains: + +1. Sequence collections API software (the `seqcolapi` package). This package is based on the `refget` package. It simply provides an wrapper to implement the Sequence Collections API. +2. Configuration and GitHub Actions for demo server instance ([servers subfolder](/servers)). + +## Instructions + +### Run locally for development + +First, configure env vars: +- To run a local server with a **local database**:`source servers/localhost/dev_local.env` +- To run a local server with **the production database**:`source servers/seqcolapi.databio.org/production.env` + +Then, run service: + +``` +uvicorn seqcolapi.main:app --reload --port 8100 +``` + +### Running with docker + +To build the docker file, from the root of this repository: + +First you build the general-purpose image + +``` +docker build -f deployment/dockerhub/Dockerfile -t databio/seqcolapi seqcolapi +``` + +Next you build the wrapped image (this just wraps the config into the app): + +``` +docker build -f deployment/seqcolapi.databio.org/Dockerfile -t seqcolapi.databio.org deployment/seqcolapi.databio.org +``` + +To run in a container: + +``` +source deployment/seqcolapi.databio.org/production.env +docker run --rm -p 8000:80 --name seqcolapi seqcolapi.databio.org +``` + + +docker run --rm -p 8000:8000 --name sccon \ + --env "POSTGRES_PASSWORD" \ + --volume $CODE/seqcolapi.databio.org/config/seqcolapi.yaml:/config.yaml \ + scim +``` + +To deploy container to dockerhub: + +Use github action in this repo which deploys on release, or through manual dispatch. + + +Left to do: +- [x] it already retrieves from a refget server. +- [x] let me insert stuff using only checksums. +- [ ] make it take 2 refget servers correctly. + + +## To load new data into seqcolapi.databio.org + +``` +cd analysis +source ../servers/localhost/dev_local.env +ipython3 +``` + +Now run `load_fasta.py` + +## Deploy to AWS ECS + +### Testing locally first + +Build the seqcolapi image + +``` +cd +docker build -t docker.io/databio/seqcolapi:latest . +``` + +``` +docker pull docker.io/databio/seqcolapi:latest +cd servers/seqcolapi.databio.org +docker build -t scim . +docker run \ + -e POSTGRES_HOST=$POSTGRES_HOST \ + -e POSTGRES_PASSWORD=$POSTGRES_PASSWORD \ + --network=host \ + scim +``` + +### Deploying + +To upgrade the software: + +Use config file located in `/servers/seqcolapi.databio.org`. This will use the image in docker.io://databio/seqcolapi, github repo: [refgenie/seqcolapi](https://github.com/refgenie/seqcolapi) as base, bundle it with the above config, and deploy to the shefflab ECS. + +1. Ensure the [refget](https://github.com/refgenie/refget/) package master branch is as you want it. +2. Deploy the updated [secqolapi](https://github.com/refgenie/seqcolapi/) app to dockerhub (using manual dispatch, or deploy on github release). +3. Finally, deploy the instance with manual dispatch using the included GitHub action. + + diff --git a/deployment/dockerhub/Dockerfile b/deployment/dockerhub/Dockerfile new file mode 100644 index 0000000..6b6e8b4 --- /dev/null +++ b/deployment/dockerhub/Dockerfile @@ -0,0 +1,11 @@ +FROM tiangolo/uvicorn-gunicorn:python3.11-slim +LABEL authors="Nathan Sheffield" +RUN pip install https://github.com/databio/yacman/archive/dev.zip +RUN pip install https://github.com/refgenie/refget/archive/seqcolapi.zip + +COPY requirements/requirements-seqcolapi.txt requirements/requirements-seqcolapi.txt +RUN pip install -r requirements/requirements-seqcolapi.txt --no-cache-dir + +COPY . /app/seqcolapi + +CMD ["uvicorn", "seqcolapi.main:app", "--host", "0.0.0.0", "--port", "80"] diff --git a/deployment/localhost/dev_local.env b/deployment/localhost/dev_local.env new file mode 100644 index 0000000..1d969c7 --- /dev/null +++ b/deployment/localhost/dev_local.env @@ -0,0 +1,9 @@ +export POSTGRES_HOST=`pass databio/seqcol/postgres_host` +export POSTGRES_DB=`pass databio/seqcol/postgres_db` +export POSTGRES_USER=`pass databio/seqcol/postgres_user` +export POSTGRES_PASSWORD=`pass databio/seqcol/postgres_password` +export POSTGRES_TABLE="seqcol" + +export SEQCOLAPI_PORT="8100" +export SERVER_ENV="dev" +export SEQCOLAPI_CONFIG="servers/localhost/seqcolapi.yaml" diff --git a/deployment/localhost/seqcolapi.yaml b/deployment/localhost/seqcolapi.yaml new file mode 100644 index 0000000..9ad1a99 --- /dev/null +++ b/deployment/localhost/seqcolapi.yaml @@ -0,0 +1,13 @@ +refget_provider_apis: https://www.ebi.ac.uk/ena/cram/sequence/ +schemas: +- https://schema.databio.org/refget/SeqColArraySetInherent.yaml +database: + host: $POSTGRES_HOST + user: $POSTGRES_USER + password: $POSTGRES_PASSWORD + port: 5432 + name: $POSTGRES_DB + table: seqcol +server: + host: 0.0.0.0 + port: 80 diff --git a/deployment/seqcolapi.databio.org/Dockerfile b/deployment/seqcolapi.databio.org/Dockerfile new file mode 100644 index 0000000..7fcc959 --- /dev/null +++ b/deployment/seqcolapi.databio.org/Dockerfile @@ -0,0 +1,3 @@ +FROM databio/seqcolapi +COPY seqcolapi.yaml /seqcolapi_config.yaml +ENV SEQCOLAPI_CONFIG /seqcolapi_config.yaml diff --git a/deployment/seqcolapi.databio.org/primary_task_def.json b/deployment/seqcolapi.databio.org/primary_task_def.json new file mode 100644 index 0000000..1f88f81 --- /dev/null +++ b/deployment/seqcolapi.databio.org/primary_task_def.json @@ -0,0 +1,105 @@ +{ + "ipcMode": null, + "executionRoleArn": "arn:aws:iam::235728444054:role/ecsTaskExecutionRole", + "containerDefinitions": [ + { + "dnsSearchDomains": null, + "environmentFiles": null, + "logConfiguration": null, + "entryPoint": null, + "portMappings": [ + { + "hostPort": 8105, + "protocol": "tcp", + "containerPort": 80 + } + ], + "command": null, + "linuxParameters": null, + "cpu": 0, + "environment": [], + "resourceRequirements": null, + "ulimits": null, + "dnsServers": null, + "mountPoints": [], + "workingDirectory": null, + "secrets": [ + { + "valueFrom": "SEQCOLAPI_POSTGRES_PASSWORD", + "name": "POSTGRES_PASSWORD" + }, + { + "valueFrom": "BEDBASE_POSTGRES_HOST", + "name": "POSTGRES_HOST" + } + ], + "dockerSecurityOptions": null, + "memory": 2048, + "memoryReservation": 512, + "volumesFrom": [], + "stopTimeout": null, + "image": "235728444054.dkr.ecr.us-east-1.amazonaws.com/my-ecr-repo:170afd5cf39d9799e926e1d0ebf40b9051fb731f", + "startTimeout": null, + "firelensConfiguration": null, + "dependsOn": null, + "disableNetworking": null, + "interactive": null, + "healthCheck": null, + "essential": true, + "links": null, + "hostname": null, + "extraHosts": null, + "pseudoTerminal": null, + "user": null, + "readonlyRootFilesystem": null, + "dockerLabels": null, + "systemControls": null, + "privileged": null, + "name": "seqcolapi" + } + ], + "placementConstraints": [], + "memory": null, + "taskRoleArn": "ecsTaskExecutionRole", + "compatibilities": [ + "EC2" + ], + "family": "seqcolapi-task", + "requiresAttributes": [ + { + "targetId": null, + "targetType": null, + "value": null, + "name": "com.amazonaws.ecs.capability.ecr-auth" + }, + { + "targetId": null, + "targetType": null, + "value": null, + "name": "com.amazonaws.ecs.capability.docker-remote-api.1.21" + }, + { + "targetId": null, + "targetType": null, + "value": null, + "name": "com.amazonaws.ecs.capability.task-iam-role" + }, + { + "targetId": null, + "targetType": null, + "value": null, + "name": "ecs.capability.execution-role-ecr-pull" + } + ], + "pidMode": null, + "requiresCompatibilities": [ + "EC2" + ], + "networkMode": "bridge", + "cpu": "128", + "revision": 1, + "status": "ACTIVE", + "inferenceAccelerators": null, + "proxyConfiguration": null, + "volumes": [] +} diff --git a/deployment/seqcolapi.databio.org/production.env b/deployment/seqcolapi.databio.org/production.env new file mode 100644 index 0000000..5ddb740 --- /dev/null +++ b/deployment/seqcolapi.databio.org/production.env @@ -0,0 +1,9 @@ +export POSTGRES_HOST=`pass databio/seqcol/postgres_host` +export POSTGRES_DB=`pass databio/seqcol/postgres_db` +export POSTGRES_USER=`pass databio/seqcol/postgres_user` +export POSTGRES_PASSWORD=`pass databio/seqcol/postgres_password` +export POSTGRES_TABLE="seqcol" + +export SEQCOLAPI_PORT="5432" +export SERVER_ENV="production" +export SEQCOLAPI_CONFIG="deployment/seqcolapi.databio.org/seqcolapi.yaml" diff --git a/deployment/seqcolapi.databio.org/seqcolapi.yaml b/deployment/seqcolapi.databio.org/seqcolapi.yaml new file mode 100644 index 0000000..a9464ed --- /dev/null +++ b/deployment/seqcolapi.databio.org/seqcolapi.yaml @@ -0,0 +1,13 @@ +refget_provider_apis: https://www.ebi.ac.uk/ena/cram/sequence/ +schemas: +- https://schema.databio.org/refget/SeqColArraySetInherent.yaml +database: + host: $POSTGRES_HOST + user: seqcol_admin + password: $POSTGRES_PASSWORD + port: 5432 + name: seqcol + table: seqcol +server: + host: 0.0.0.0 + port: 80 diff --git a/refget/__init__.py b/refget/__init__.py index 897c9fa..eead32f 100644 --- a/refget/__init__.py +++ b/refget/__init__.py @@ -10,3 +10,10 @@ from .seqcol import * from .utilities import * from .seqcol_client import * + +try: + # Requires optional dependencies, so we catch the ImportError + from .seqcol_router import seqcol_router +except ImportError: + seqcol_router = None + pass diff --git a/refget/examples.py b/refget/examples.py new file mode 100644 index 0000000..2704252 --- /dev/null +++ b/refget/examples.py @@ -0,0 +1,143 @@ +# Models +# Used for documentation examples in OpenAPI + +from fastapi import Path, Body + +example_digest = Path( + ..., + description="Sequence collection digest", + pattern=r"^[-\w]+$", + max_length=64, + min_length=32, + examples="a6748aa0f6a1e165f871dbed5e54ba62", +) + +example_digest_2 = Path( + ..., + description="Sequence collection digest", + pattern=r"^[-\w]+$", + max_length=64, + min_length=32, + examples="2786eb8a921aa97018c214f64b9960a0", +) + +example_digest_hg38 = Path( + ..., + description="Sequence collection digest", + pattern=r"^[-\w]+$", + max_length=64, + min_length=32, + examples="514c871928a74885ce981faa61ccbb1a", +) + +example_digest_hg38_primary = Path( + ..., + description="Sequence collection digest", + pattern=r"^[-\w]+$", + max_length=64, + min_length=32, + examples="c345e091cce0b1df78bfc124b03fba1c", +) + +example_sequence = Path( + ..., + description="Refget sequence digest", + pattern=r"^[-\w]+$", + max_length=64, + min_length=32, + examples="76f9f3315fa4b831e93c36cd88196480", +) + +example_hg38_sc = Body( + { + "lengths": [ + "248956422", + "242193529", + "198295559", + "190214555", + "181538259", + "170805979", + "159345973", + "145138636", + "138394717", + "133797422", + "135086622", + "133275309", + "114364328", + "107043718", + "101991189", + "90338345", + "83257441", + "80373285", + "58617616", + "64444167", + "46709983", + "50818468", + "16569", + "156040895", + "57227415", + ], + "names": [ + "chr1", + "chr2", + "chr3", + "chr4", + "chr5", + "chr6", + "chr7", + "chr8", + "chr9", + "chr10", + "chr11", + "chr12", + "chr13", + "chr14", + "chr15", + "chr16", + "chr17", + "chr18", + "chr19", + "chr20", + "chr21", + "chr22", + "chrM", + "chrX", + "chrY", + ], + "sequences": [ + "a004bc1b0bf05fc668cab6bbfd93d3eb", + "0ccf3a67666ac53f99fcad19768f2dde", + "bda7b228789169ae811dd8d676d517ca", + "88a6091e2d9a609f4ea7eaef937cd4c2", + "0f1725f15e8046a6a04e32de629b1e10", + "08c3702d62a2c476a081d3ccd15ea30c", + "cac9e313d08cdf40c9eeafe62b17879a", + "9a2ebb88dc34c2af023d50219248c815", + "41bbec590d36e711864dc6f030f0264b", + "6b420cbb22daea77d7cc930c0a00f812", + "0d4e0be5c4e5bc0f12912894f21a5dd8", + "e1507ba70028a65b3f5a81b594e6f0fe", + "7110500758388b169fe631b212b7e56c", + "f37e77fdbacb1a0f1be5e2bf25df343d", + "3f14ce1984dada290682eb1f564934ee", + "88169bd58f0c5f9fd083030d1357d908", + "0bbc162a7d963574b5989adab5651ac5", + "388e8c7cd11a23eebf84a02d5e442bb7", + "1c927775585df1cb09ec7c7dd1b32a6a", + "c37960f60eff5e2cfbde87e53d262efa", + "f0324d60ccf85288a26a47a7ca25a54a", + "f7479d5a2a3169e2e44d97d7f2a13db1", + "6ab1f3c8f4941e148463c40408c89e43", + "6bdaf93397b486a58fd60b55aa2e21ca", + "9bd609da53b41a50a724f2a0131ee9c1", + ], + } +) + +reclimit_ex = Path( + ..., + description="Recursion limit, the number of times to recurse to populate digests in the structure", + gt=-1, + lt=2, + examples=0, +) diff --git a/refget/seqcol.py b/refget/seqcol.py index ba6ff56..5b81f72 100644 --- a/refget/seqcol.py +++ b/refget/seqcol.py @@ -1,6 +1,5 @@ import henge import logging -import yacman from itertools import compress @@ -12,26 +11,6 @@ henge.ITEM_TYPE = "_item_type" -class SeqColConf(yacman.YAMLConfigManager): - """ - Simple configuration manager object for SeqColHenge. - """ - - def __init__( - self, - entries={}, - filepath=None, - yamldata=None, - writable=False, - wait_max=60, - skip_read_lock=False, - ): - filepath = yacman.select_config( - config_filepath=filepath, config_env_vars=["SEQCOLAPI_CONFIG"], config_name="seqcol" - ) - super(SeqColConf, self).__init__(entries, filepath, yamldata, writable) - - class SeqColHenge(henge.Henge): """ Extension of henge that accommodates collections of sequences. diff --git a/refget/seqcol_router.py b/refget/seqcol_router.py new file mode 100644 index 0000000..2087279 --- /dev/null +++ b/refget/seqcol_router.py @@ -0,0 +1,151 @@ +""" +This module contains the FastAPI router for the sequence collection API. +It is designed to be attached to a FastAPI app instance, and provides +endpoints for retrieving and comparing sequence collections. + +To use, first import it, then attach it to the app, +then attach the schenge to the app state, like this: + +from refget import seqcol_router +app.include_router(seqcol_router, prefix="/seqcol") +app.state.schenge = schenge +""" + +import henge +import logging + +from fastapi import APIRouter, Response, HTTPException, Request, Depends +from fastapi.responses import JSONResponse +from typing import Union + +from .examples import * + +_LOGGER = logging.getLogger(__name__) + +seqcol_router = APIRouter() + + +async def get_schenge(request: Request): + """Dependency to get the schenge from the app state""" + return request.app.state.schenge + + +@seqcol_router.get("/test") +async def test(schenge=Depends(get_schenge)): + return str(schenge) + + +@seqcol_router.get( + "/sequence/{digest}", + summary="Retrieve raw sequence via refget protocol", + tags=["Refget endpoints"], +) +async def refget(request: Request, digest: str = example_sequence): + schenge = request.app.state.schenge + return Response(content=schenge.refget(digest)) + + +@seqcol_router.get( + "/collection/{digest}", + summary="Retrieve a sequence collection", + tags=["Retrieving sequence collections"], +) +async def collection( + schenge=Depends(get_schenge), + digest: str = example_digest, + level: Union[int, None] = None, + collated: bool = True, +): + print("Retrieving collection") + print(str(schenge)) + if level == None: + level = 2 + if level > 2: + raise HTTPException( + status_code=400, + detail="Error: recursion > 1 disabled. Use the /refget server to retrieve sequences.", + ) + try: + csc = schenge.retrieve(digest, reclimit=level - 1) + except henge.NotFoundException as e: + _LOGGER.debug(e) + raise HTTPException( + status_code=404, + detail="Error: collection not found. Check the digest and try again.", + ) + try: + if not collated: + if len(csc["lengths"]) > 10000: + raise HTTPException( + status_code=413, + detail="This server won't decollate collections with > 10000 sequences", + ) + else: + return JSONResponse(content=format_itemwise(csc)) + else: + return JSONResponse(content=csc) + except: + raise HTTPException( + status_code=404, + detail="Error: collection not found. Check the digest and try again.", + ) + + +@seqcol_router.get( + "/comparison/{digest1}/{digest2}", + summary="Compare two sequence collections hosted on the server", + tags=["Comparing sequence collections"], +) +async def compare_2_digests( + schenge=Depends(get_schenge), + digest1: str = example_digest_hg38, + digest2: str = example_digest_hg38_primary, +): + _LOGGER.info("Compare called") + result = {} + result["digests"] = {"a": digest1, "b": digest2} + try: + result.update(schenge.compare_digests(digest1, digest2)) + except henge.NotFoundException as e: + _LOGGER.debug(e) + raise HTTPException( + status_code=404, + detail="Error: collection not found. Check the digest and try again.", + ) + return JSONResponse(result) + + +@seqcol_router.post( + "/comparison/{digest1}", + summary="Compare a local sequence collection to one on the server", + tags=["Comparing sequence collections"], +) +async def compare_1_digest( + schenge=Depends(get_schenge), digest1: str = example_digest_hg38, B: dict = example_hg38_sc +): + _LOGGER.info(f"digest1: {digest1}") + _LOGGER.info(f"B: {B}") + A = schenge.retrieve(digest1, reclimit=1) + return JSONResponse(schenge.compat_all(A, B)) + + +@seqcol_router.get( + "/list-by-offset", + summary="List sequence collections on the server", + tags=["Listing sequence collections"], +) +async def list_collections_by_offset( + schenge=Depends(get_schenge), limit: int = 100, offset: int = 0 +): + return JSONResponse(schenge.list_by_offset(limit=limit, offset=offset)) + + +@seqcol_router.get( + "/list", + summary="List sequence collections on the server", + tags=["Listing sequence collections"], +) +async def list_collections_by_token( + schenge=Depends(get_schenge), page_size: int = 100, cursor: str = None +): + return JSONResponse(schenge.list(page_size=page_size, cursor=cursor)) diff --git a/refget/utilities.py b/refget/utilities.py index 8d2592b..c21e560 100644 --- a/refget/utilities.py +++ b/refget/utilities.py @@ -7,7 +7,7 @@ import pyfaidx from jsonschema import Draft7Validator -from typing import Optional, Callable +from typing import Optional, Callable, Union from yacman import load_yaml from .const import SeqCol @@ -23,18 +23,27 @@ def trunc512_digest(seq, offset=24) -> str: return hex_digest.decode() -def sha512t24u_digest(seq: str, offset: int = 24) -> str: +def sha512t24u_digest(seq: Union[str, bytes], offset: int = 24) -> str: """GA4GH digest function""" - digest = hashlib.sha512(seq.encode()).digest() + if isinstance(seq, str): + seq = seq.encode("utf-8") + digest = hashlib.sha512(seq).digest() + tdigest_b64us = base64.urlsafe_b64encode(digest[:offset]) + return tdigest_b64us.decode("ascii") + + +def sha512t24u_digest_bytes(seq: bytes, offset: int = 24) -> str: + """GA4GH digest function""" + digest = hashlib.sha512(seq).digest() tdigest_b64us = base64.urlsafe_b64encode(digest[:offset]) return tdigest_b64us.decode("ascii") -def canonical_str(item: dict) -> str: +def canonical_str(item: dict) -> bytes: """Convert a dict into a canonical string representation""" return json.dumps( item, separators=(",", ":"), ensure_ascii=False, allow_nan=False, sort_keys=True - ) + ).encode() def print_csc(csc: dict) -> str: @@ -99,7 +108,9 @@ def parse_fasta(fa_file_path: str) -> pyfaidx.Fasta: from gzip import open as gzopen from tempfile import NamedTemporaryFile - with gzopen(fa_file_path, "rt") as f_in, NamedTemporaryFile(mode="w+t", suffix=".fa") as f_out: + with gzopen(fa_file_path, "rt") as f_in, NamedTemporaryFile( + mode="w+t", suffix=".fa" + ) as f_out: f_out.writelines(f_in.read()) f_out.seek(0) return pyfaidx.Fasta(f_out.name) @@ -134,7 +145,7 @@ def chrom_sizes_to_seqcol( return CSC -def fasta_file_to_digest(fa_file_path: str, schema: dict=None) -> str: +def fasta_file_to_digest(fa_file_path: str, schema: dict = None) -> str: """Given a fasta, return a digest""" seqcol_obj = fasta_file_to_seqcol(fa_file_path) return seqcol_digest(seqcol_obj, schema) @@ -180,7 +191,9 @@ def fasta_obj_to_seqcol( return CSC -def build_sorted_name_length_pairs(obj: dict, digest_function: Callable[[str], str] = sha512t24u_digest): +def build_sorted_name_length_pairs( + obj: dict, digest_function: Callable[[str], str] = sha512t24u_digest +): """Builds the sorted_name_length_pairs attribute, which corresponds to the coordinate system""" sorted_name_length_pairs = [] for i in range(len(obj["names"])): diff --git a/seqcolapi/__init__.py b/seqcolapi/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/seqcolapi/__main__.py b/seqcolapi/__main__.py new file mode 100644 index 0000000..2e4396a --- /dev/null +++ b/seqcolapi/__main__.py @@ -0,0 +1,9 @@ +import sys +from .main import main + +if __name__ == "__main__": + try: + sys.exit(main()) + except KeyboardInterrupt: + print("Program canceled by user") + sys.exit(1) diff --git a/seqcolapi/_version.py b/seqcolapi/_version.py new file mode 100644 index 0000000..f102a9c --- /dev/null +++ b/seqcolapi/_version.py @@ -0,0 +1 @@ +__version__ = "0.0.1" diff --git a/seqcolapi/const.py b/seqcolapi/const.py new file mode 100644 index 0000000..ddd5504 --- /dev/null +++ b/seqcolapi/const.py @@ -0,0 +1,17 @@ +import os + +from refget._version import __version__ as refget_pkg_version +from platform import python_version + +from ._version import __version__ as seqcolapi_version + +PKG_NAME = "seqcolapi" +ALL_VERSIONS = { + "seqcolapi_version": seqcolapi_version, + "refget_pkg_version": refget_pkg_version, + "python_version": python_version(), + "seqcol_spec_version": "0.1.0", +} +STATIC_DIRNAME = "static" +STATIC_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), STATIC_DIRNAME) +TEMPLATES_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "templates") diff --git a/seqcolapi/examples.py b/seqcolapi/examples.py new file mode 100644 index 0000000..2704252 --- /dev/null +++ b/seqcolapi/examples.py @@ -0,0 +1,143 @@ +# Models +# Used for documentation examples in OpenAPI + +from fastapi import Path, Body + +example_digest = Path( + ..., + description="Sequence collection digest", + pattern=r"^[-\w]+$", + max_length=64, + min_length=32, + examples="a6748aa0f6a1e165f871dbed5e54ba62", +) + +example_digest_2 = Path( + ..., + description="Sequence collection digest", + pattern=r"^[-\w]+$", + max_length=64, + min_length=32, + examples="2786eb8a921aa97018c214f64b9960a0", +) + +example_digest_hg38 = Path( + ..., + description="Sequence collection digest", + pattern=r"^[-\w]+$", + max_length=64, + min_length=32, + examples="514c871928a74885ce981faa61ccbb1a", +) + +example_digest_hg38_primary = Path( + ..., + description="Sequence collection digest", + pattern=r"^[-\w]+$", + max_length=64, + min_length=32, + examples="c345e091cce0b1df78bfc124b03fba1c", +) + +example_sequence = Path( + ..., + description="Refget sequence digest", + pattern=r"^[-\w]+$", + max_length=64, + min_length=32, + examples="76f9f3315fa4b831e93c36cd88196480", +) + +example_hg38_sc = Body( + { + "lengths": [ + "248956422", + "242193529", + "198295559", + "190214555", + "181538259", + "170805979", + "159345973", + "145138636", + "138394717", + "133797422", + "135086622", + "133275309", + "114364328", + "107043718", + "101991189", + "90338345", + "83257441", + "80373285", + "58617616", + "64444167", + "46709983", + "50818468", + "16569", + "156040895", + "57227415", + ], + "names": [ + "chr1", + "chr2", + "chr3", + "chr4", + "chr5", + "chr6", + "chr7", + "chr8", + "chr9", + "chr10", + "chr11", + "chr12", + "chr13", + "chr14", + "chr15", + "chr16", + "chr17", + "chr18", + "chr19", + "chr20", + "chr21", + "chr22", + "chrM", + "chrX", + "chrY", + ], + "sequences": [ + "a004bc1b0bf05fc668cab6bbfd93d3eb", + "0ccf3a67666ac53f99fcad19768f2dde", + "bda7b228789169ae811dd8d676d517ca", + "88a6091e2d9a609f4ea7eaef937cd4c2", + "0f1725f15e8046a6a04e32de629b1e10", + "08c3702d62a2c476a081d3ccd15ea30c", + "cac9e313d08cdf40c9eeafe62b17879a", + "9a2ebb88dc34c2af023d50219248c815", + "41bbec590d36e711864dc6f030f0264b", + "6b420cbb22daea77d7cc930c0a00f812", + "0d4e0be5c4e5bc0f12912894f21a5dd8", + "e1507ba70028a65b3f5a81b594e6f0fe", + "7110500758388b169fe631b212b7e56c", + "f37e77fdbacb1a0f1be5e2bf25df343d", + "3f14ce1984dada290682eb1f564934ee", + "88169bd58f0c5f9fd083030d1357d908", + "0bbc162a7d963574b5989adab5651ac5", + "388e8c7cd11a23eebf84a02d5e442bb7", + "1c927775585df1cb09ec7c7dd1b32a6a", + "c37960f60eff5e2cfbde87e53d262efa", + "f0324d60ccf85288a26a47a7ca25a54a", + "f7479d5a2a3169e2e44d97d7f2a13db1", + "6ab1f3c8f4941e148463c40408c89e43", + "6bdaf93397b486a58fd60b55aa2e21ca", + "9bd609da53b41a50a724f2a0131ee9c1", + ], + } +) + +reclimit_ex = Path( + ..., + description="Recursion limit, the number of times to recurse to populate digests in the structure", + gt=-1, + lt=2, + examples=0, +) diff --git a/seqcolapi/main.py b/seqcolapi/main.py new file mode 100644 index 0000000..dedbabb --- /dev/null +++ b/seqcolapi/main.py @@ -0,0 +1,161 @@ +import henge +import json +import logging +import os +import sys +import uvicorn +import yacman + +from fastapi import Body, FastAPI, Response +from fastapi import HTTPException +from fastapi.middleware.cors import CORSMiddleware +from pydantic import BaseModel, Field +from fastapi.responses import JSONResponse, FileResponse +from starlette.requests import Request +from starlette.templating import Jinja2Templates +from starlette.staticfiles import StaticFiles +from typing import Union + +from .const import * +from .scconf import RDBDict +from .examples import * + +from refget import SeqColHenge, format_itemwise +from yacman import select_config, FutureYAMLConfigManager as YAMLConfigManager + + +class SeqColConf(YAMLConfigManager): + pass + + +global _LOGGER + +_LOGGER = logging.getLogger(__name__) + +templates = Jinja2Templates(directory=TEMPLATES_PATH) + +for key, value in ALL_VERSIONS.items(): + _LOGGER.info(f"{key}: {value}") + + +app = FastAPI( + title="Sequence Collections API", + description="An API providing metadata such as names, lengths, and other values for collections of reference sequences", + version=seqcolapi_version, +) + +from refget import seqcol_router + +app.include_router(seqcol_router) + +origins = ["*"] + +app.add_middleware( # This is a public API, so we allow all origins + CORSMiddleware, + allow_origins=origins, + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + + +@app.get("favicon.ico", include_in_schema=False) +async def favicon(): + return FileResponse(f"/static/favicon.ico") + + +@app.get("/", summary="Home page", tags=["General endpoints"]) +async def index(request: Request): + """ + Returns a landing page HTML with the server resources ready to download. No inputs required. + """ + templ_vars = {"request": request, "openapi_version": app.openapi()["openapi"]} + _LOGGER.debug("merged vars: {}".format(dict(templ_vars, **ALL_VERSIONS))) + return templates.TemplateResponse("index.html", dict(templ_vars, **ALL_VERSIONS)) + + +@app.get("/service-info", summary="GA4GH service info", tags=["General endpoints"]) +async def service_info(): + ret = { + "id": "org.databio.seqcolapi", + "name": "Sequence collections", + "type": { + "group": "org.ga4gh", + "artifact": "refget.seqcol", + "version": ALL_VERSIONS["seqcol_spec_version"], + }, + "description": "An API providing metadata such as names, lengths, and other values for collections of reference sequences", + "organization": {"name": "Databio Lab", "url": "https://databio.org"}, + "contactUrl": "https://github.com/refgenie/seqcol/issues", + "documentationUrl": "https://seqcolapi.databio.org", + "updatedAt": "2021-03-01T00:00:00Z", + "environment": "dev", + "version": ALL_VERSIONS["seqcolapi_version"], + "seqcol": {"schema": schenge.schemas, "sorted_name_length_pairs": True}, + } + return JSONResponse(content=ret) + + +# Mount statics after other routes for lower precedence +app.mount(f"/", StaticFiles(directory=STATIC_PATH), name=STATIC_DIRNAME) + + +def create_globals(scconf: yacman.YAMLConfigManager): + """ + Create global variables for the app to use. + """ + print(scconf) + _LOGGER.info(f"Connecting to database... {scconf.exp['database']['host']}") + global schenge + + pgdb = RDBDict( + db_name=scconf.exp["database"]["name"], + db_user=scconf.exp["database"]["user"], + db_password=scconf.exp["database"]["password"], + db_host=scconf.exp["database"]["host"], + db_port=scconf.exp["database"]["port"], + db_table=scconf.exp["database"]["table"], + ) + _LOGGER.info(f"Using schema: {scconf['schemas']}") + schenge = SeqColHenge( + database=pgdb, + schemas=scconf["schemas"], + ) + + return schenge + + +def main(injected_args=None): + # Entry point for running from console_scripts, installed package + parser = build_parser() + # parser = logmuse.add_logging_options(parser) + args = parser.parse_args() + if injected_args: + args.__dict__.update(injected_args) + if not args.command: + parser.print_help() + print("No subcommand given") + sys.exit(1) + + # _LOGGER = logmuse.logger_via_cli(args, make_root=True) + _LOGGER.info(f"args: {args}") + if "config" in args and args.config is not None: + scconf = SeqColConf.from_yaml_file(args.config) + create_globals(scconf) + app.state.schenge = schenge + port = args.port or scconf.exp["server"]["port"] + _LOGGER.info(f"Running on port {port}") + uvicorn.run(app, host=scconf.exp["server"]["host"], port=port) + else: + _LOGGER.error("Configure by passing -c SEQCOLAPI_CONFIG ") + + +if __name__ != "__main__": + # Entrypoint for running through uvicorn CLI (dev) + if os.environ.get("SEQCOLAPI_CONFIG") is not None: + _LOGGER.info(f"Loading config from SEQCOLAPI_CONFIG: {os.environ.get('SEQCOLAPI_CONFIG')}") + scconf = SeqColConf.from_yaml_file(os.environ.get("SEQCOLAPI_CONFIG")) + create_globals(scconf) + app.state.schenge = schenge + else: + _LOGGER.error("Configure by setting SEQCOLAPI_CONFIG env var") diff --git a/seqcolapi/requirements/requirements-seqcolapi.txt b/seqcolapi/requirements/requirements-seqcolapi.txt new file mode 100644 index 0000000..d9997e6 --- /dev/null +++ b/seqcolapi/requirements/requirements-seqcolapi.txt @@ -0,0 +1,10 @@ +aiofiles +fastapi +henge>=0.2.1 +jinja2 +pipestat +psycopg2-binary +refget +uvicorn>=0.7.1 +ubiquerg>=0.6.1 +yacman>=0.9.0 diff --git a/seqcolapi/scconf.py b/seqcolapi/scconf.py new file mode 100644 index 0000000..6d42259 --- /dev/null +++ b/seqcolapi/scconf.py @@ -0,0 +1,341 @@ +import logging +import os +import psycopg2 + +from collections.abc import Mapping +from psycopg2 import OperationalError, sql +from psycopg2.errors import UniqueViolation + +_LOGGER = logging.getLogger(__name__) + +# Use like: +# pgdb = RDBDict(...) # Open connection +# pgdb["key"] = "value" # Insert item +# pgdb["key"] # Retrieve item +# pgdb.close() # Close connection + + +def getenv(varname): + """Simple wrapper to make the Exception more informative for missing env var""" + try: + return os.environ[varname] + except KeyError: + raise Exception(f"Environment variable {varname} not set.") + + +import pipestat + + +class PipestatMapping(pipestat.PipestatManager): + """A wrapper class to allow using a PipestatManager as a dict-like object.""" + + def __getitem__(self, key): + # This little hack makes this work with `in`; + # e.g.: for x in rdbdict, which is now disabled, instead of infinite. + if isinstance(key, int): + raise IndexError + return self.retrieve(key) + + def __setitem__(self, key, value): + return self.insert({key: value}) + + def __len__(self): + return self.count_records() + + def _next_page(self): + self._buf["page_index"] += 1 + limit = self._buf["page_size"] + offset = self._buf["page_index"] * limit + self._buf["keys"] = self.get_records(limit, offset) + return self._buf["keys"][0] + + def __iter__(self): + _LOGGER.debug("Iterating...") + self._buf = { # buffered iterator + "current_view_index": 0, + "len": len(self), + "page_size": 100, + "page_index": -1, + "keys": self._next_page(), + } + return self + + def __next__(self): + if self._buf["current_view_index"] > self._buf["len"]: + raise StopIteration + + idx = self._buf["current_view_index"] - self._buf["page_index"] * self._buf["page_size"] + if idx <= self._buf["page_size"]: + self._buf["current_view_index"] += 1 + return self._buf["keys"][idx - 1] + else: # current index is beyond current page, but not beyond total + return self._next_page() + + +class RDBDict(Mapping): + """ + A Relational DataBase Dict. + + Simple database connection manager object that allows us to use a + PostgresQL database as a simple key-value store to back Python + dict-style access to database items. + """ + + def __init__( + self, + db_name: str = None, + db_user: str = None, + db_password: str = None, + db_host: str = None, + db_port: str = None, + db_table: str = None, + ): + self.connection = None + self.db_name = db_name or getenv("POSTGRES_DB") + self.db_user = db_user or getenv("POSTGRES_USER") + self.db_host = db_host or os.environ.get("POSTGRES_HOST") or "localhost" + self.db_port = db_port or os.environ.get("POSTGRES_PORT") or "5432" + self.db_table = db_table or os.environ.get("POSTGRES_TABLE") or "seqcol" + db_password = db_password or getenv("POSTGRES_PASSWORD") + + try: + self.connection = self.create_connection( + self.db_name, self.db_user, db_password, self.db_host, self.db_port + ) + if not self.connection: + raise Exception("Connection failed") + except Exception as e: + _LOGGER.info(f"{self}") + raise e + _LOGGER.info(self.connection) + self.connection.autocommit = True + + def __repr__(self): + return ( + "RDBD object\n" + + "db_table: {}\n".format(self.db_table) + + "db_name: {}\n".format(self.db_name) + + "db_user: {}\n".format(self.db_user) + + "db_host: {}\n".format(self.db_host) + + "db_port: {}\n".format(self.db_port) + ) + + def init_table(self): + # Wrap statements to prevent SQL injection attacks + stmt = sql.SQL( + """ + CREATE TABLE IF NOT EXISTS {table}( + key TEXT PRIMARY KEY, + value TEXT); + """ + ).format(table=sql.Identifier(self.db_table)) + return self.execute_query(stmt, params=None) + + def insert(self, key, value): + stmt = sql.SQL( + """ + INSERT INTO {table}(key, value) + VALUES (%(key)s, %(value)s); + """ + ).format(table=sql.Identifier(self.db_table)) + params = {"key": key, "value": value} + return self.execute_query(stmt, params) + + def update(self, key, value): + stmt = sql.SQL( + """ + UPDATE {table} SET value=%(value)s WHERE key=%(key)s + """ + ).format(table=sql.Identifier(self.db_table)) + params = {"key": key, "value": value} + return self.execute_query(stmt, params) + + def __getitem__(self, key): + # This little hack makes this work with `in`; + # e.g.: for x in rdbdict, which is now disabled, instead of infinite. + if isinstance(key, int): + raise IndexError + stmt = sql.SQL( + """ + SELECT value FROM {table} WHERE key=%(key)s + """ + ).format(table=sql.Identifier(self.db_table)) + params = {"key": key} + res = self.execute_read_query(stmt, params) + if not res: + _LOGGER.info("Not found: {}".format(key)) + return res + + def __setitem__(self, key, value): + try: + return self.insert(key, value) + except UniqueViolation as e: + _LOGGER.info("Updating existing value for {}".format(key)) + return self.update(key, value) + + def __delitem__(self, key): + stmt = sql.SQL( + """ + DELETE FROM {table} WHERE key=%(key)s + """ + ).format(table=sql.Identifier(self.db_table)) + params = {"key": key} + res = self.execute_query(stmt, params) + return res + + def create_connection(self, db_name, db_user, db_password, db_host, db_port): + connection = None + try: + connection = psycopg2.connect( + database=db_name, + user=db_user, + password=db_password, + host=db_host, + port=db_port, + ) + _LOGGER.info("Connection to PostgreSQL DB successful") + except OperationalError as e: + _LOGGER.info("Error: {e}".format(e=str(e))) + return connection + + def execute_read_query(self, query, params=None): + cursor = self.connection.cursor() + result = None + try: + cursor.execute(query, params) + result = cursor.fetchone() + if result: + return result[0] + else: + _LOGGER.debug(f"Query: {query}") + _LOGGER.debug(f"Result: {result}") + return None + except OperationalError as e: + _LOGGER.info("Error: {e}".format(e=str(e))) + raise Exception + return None + except TypeError as e: + _LOGGER.info("TypeError: {e}, item: {q}".format(e=str(e), q=query)) + raise Exception + return None + + def execute_multi_query(self, query, params=None): + cursor = self.connection.cursor() + result = None + try: + cursor.execute(query, params) + result = cursor.fetchall() + return result + except OperationalError as e: + _LOGGER.info("Error: {e}".format(e=str(e))) + raise Exception + return None + except TypeError as e: + pri_LOGGER.infont("TypeError: {e}, item: {q}".format(e=str(e), q=query)) + raise Exception + return None + + def execute_query(self, query, params=None): + cursor = self.connection.cursor() + try: + return cursor.execute(query, params) + _LOGGER.info("Query executed successfully") + except OperationalError as e: + _LOGGER.info("Error: {e}".format(e=str(e))) + + def close(self): + _LOGGER.info("Closing connection") + return self.connection.close() + + def __del__(self): + if self.connection: + self.close() + + def __len__(self): + stmt = sql.SQL( + """ + SELECT COUNT(*) FROM {table} + """ + ).format(table=sql.Identifier(self.db_table)) + print(stmt) + res = self.execute_read_query(stmt) + return res + + def get_paged_keys(self, limit=None, offset=None): + stmt_str = "SELECT key FROM {table}" + if limit: + stmt_str += f" LIMIT {limit}" + if offset != None: + stmt_str += f" OFFSET {offset}" + stmt = sql.SQL(stmt_str).format(table=sql.Identifier(self.db_table)) + res = self.execute_multi_query(stmt) + return res + + def _next_page(self): + self._buf["page_index"] += 1 + limit = self._buf["page_size"] + offset = self._buf["page_index"] * limit + self._buf["keys"] = self.get_paged_keys(limit, offset) + return self._buf["keys"][0] + + def __iter__(self): + _LOGGER.debug("Iterating...") + self._buf = { # buffered iterator + "current_view_index": 0, + "len": len(self), + "page_size": 10, + "page_index": 0, + "keys": self.get_paged_keys(10, 0), + } + return self + + def __next__(self): + if self._buf["current_view_index"] > self._buf["len"]: + raise StopIteration + + idx = self._buf["current_view_index"] - self._buf["page_index"] * self._buf["page_size"] + if idx <= self._buf["page_size"]: + self._buf["current_view_index"] += 1 + return self._buf["keys"][idx - 1] + else: # current index is beyond current page, but not beyond total + return self._next_page() + + # Old, non-paged iterator: + # def __iter__(self): + # self._current_idx = 0 + # return self + + # def __next__(self): + # stmt = sql.SQL( + # """ + # SELECT key,value FROM {table} LIMIT 1 OFFSET %(idx)s + # """ + # ).format(table=sql.Identifier(self.db_table)) + # res = self.execute_read_query(stmt, {"idx": self._current_idx}) + # self._current_idx += 1 + # if not res: + # _LOGGER.info("Not found: {}".format(self._current_idx)) + # raise StopIteration + # return res + + +# We don't need the full SeqColHenge, +# which also has loading capability, and requires pyfaidx, which requires +# biopython, which requires numpy, which is huge and can't compile the in +# default fastapi container. +# So, I had written the below class which provides retrieve only. +# HOWEVER, switching from alpine to slim allows install of numpy; +# This inflates the container size from 262Mb to 350Mb; perhaps that's worth paying. +# So I can avoid duplicating this and just use the full SeqColHenge from seqcol +# class SeqColHenge(refget.RefGetClient): +# def retrieve(self, druid, reclimit=None, raw=False): +# try: +# return super(SeqColHenge, self).retrieve(druid, reclimit, raw) +# except henge.NotFoundException as e: +# _LOGGER.debug(e) +# try: +# return self.refget(druid) +# except Exception as e: +# _LOGGER.debug(e) +# raise e +# return henge.NotFoundException("{} not found in database, or in refget.".format(druid)) diff --git a/seqcolapi/static/css/databio.css b/seqcolapi/static/css/databio.css new file mode 100644 index 0000000..c420865 --- /dev/null +++ b/seqcolapi/static/css/databio.css @@ -0,0 +1,179 @@ + +/* Stuff added by Nathan */ + +.mail:before { + content: attr(data-website) "\0040" attr(data-user); + unicode-bidi: bidi-override; + direction: rtl; + font-weight:bold; +} + +.box { + margin-top: 8px; + margin-bottom: 8px; + /*margin-left: 12px;*/ + /*margin-right: 12px;*/ + padding:10px; + border: 1px dashed #CCCCFF; + background-color:#FFF; +} + +.person { + float:left; + width: 50%; +} + +.person img { + float:left; + height: 140px; + margin-right:10px; + margin-bottom:10px; + border: 1px solid #777777; +} + +.persontext { + margin:3px; +} + +.col2 { + float: left; + width: 50%; +} + +.col2r { + float: right; + width: 50%; +} + +.box h2 { + width:100%; + clear:both; + float:left; +} +.title { + color:#224477; + font-size:1.2em; + font-weight:bold; +} + +.publications li { + margin: 18px 0 0 0; +} + +.bullet { + margin-bottom: 3px; + vertical-align: middle; + display: inline-block; + > svg { + display: inline-block; + width: 24px; + height: 22px; + vertical-align: middle; + + path { + fill: #000; + } + } +} + +.quickbox { + float:right; text-align:right; border-left: 1px solid #CCC; padding:15px; + margin:0px 0px 0px 6px; +} +.quickbox ul { + list-style-type: none; + padding-top:7px; +} + +.iconlist, .iconlist ul { + list-style-type: none; + padding-top:0px; + margin-left: 6px; +} + +.iconlist li { + list-style-type: none; + padding:10px 1px 1px 1px; + margin: 0px; +} + +.compact { + font-size:0.8em; +} + +.compact_title { + color:#224477; + font-weight:normal; +} + +.category { + background-color:#335588; +/* background-color:#117799; */ + color:white; + font-size:14px; + font-style:normal; + font-weight:bold; + text-align:right; + padding:5px; + margin-bottom:4px; +} + +.thumbnail-left { + background-color:#EDF4F2; + border-top-width: 1px; + border-right-width: 1px; + border-bottom-width: 1px; + border-left-width: 1px; + border-top-style: solid; + border-right-style: solid; + border-bottom-style: solid; + border-left-style: solid; + border-top-color: rgb(105, 105, 105); + border-right-color: rgb(105, 105, 105); + border-bottom-color: rgb(105, 105, 105); + border-left-color: rgb(105, 105, 105); + padding:3px; + margin-right:6px; + width:120px; + float:left; +} + +.thumbnail-right { + background-color:#555555; + border-top-width: 1px; + border-right-width: 1px; + border-bottom-width: 1px; + border-left-width: 1px; + border-top-style: solid; + border-right-style: solid; + border-bottom-style: solid; + border-left-style: solid; + border-top-color: rgb(105, 105, 105); + border-right-color: rgb(105, 105, 105); + border-bottom-color: rgb(105, 105, 105); + border-left-color: rgb(105, 105, 105); + margin-left:3px; + padding:3px; + width:120px; + float:right; +} +.thumbnail-right img, .thumbnail-left img { + display: block; +} + +a { + color: #0d6efd; + text-decoration: none; +} + +footer ul { + display: flex; +} + +footer ul li { + padding-left: 8px; + padding-right: 8px; + list-style: none; + display: flex; + flex-wrap: wrap; +} \ No newline at end of file diff --git a/seqcolapi/static/css/style.css b/seqcolapi/static/css/style.css new file mode 100644 index 0000000..cf37b98 --- /dev/null +++ b/seqcolapi/static/css/style.css @@ -0,0 +1,157 @@ +html, +body { + height: 100%; +} + +#vertical { + padding: 5px; +} + +#main { + overflow: auto; + padding-bottom: 40px; + padding-top: 20px; + /* must be same height as the footer */ +} + +#footer { + position: relative; + margin-top: -40px; + /* negative value of footer height */ + height: 40px; + padding-top: 5px; + padding-left: 10px; +} + +.divider-15 { + /* adds spacing 15px */ + width:90%; + min-height:1px; + margin-top:15px; + margin-bottom:15px; + margin-left:15px; + display:inline-block; + position:relative; +} + +.divider-vertical { + margin-top:15px; + /*margin-bottom:15px;*/ +} + +span { + margin-left: 5px; +} + +div.col-auto { + padding-left: 10px; + padding-right: 0px; +} + +label.col-form-label { + padding-bottom: 0px; + padding-top: 0px; +} + +.nav-item { + color: white; + font-size: 1rem; + } + +.nav-link { + color: white; + } + +.left_footer { + width: 40%; + float: left; + text-align:left; + display: inline-block; +} + +.right_footer { + width: 40%; + float: right; + text-align:right; + display: inline-block; +} + +.center_footer { + width: 20%; + overflow: hidden; + display: inline-block; + text-align: center; +} + +.wrapper { + padding-left: 30px; + padding-right: 30px; + clear: both; + position: relative; + overflow: hidden; +} + +div#navbar_links { + display: flex; +} + +div.col-12#genome-asset { + margin: auto; + max-width: 1400px; +} + +div.col-12#txt { + margin: auto; + max-width: 1400px; +} + +.asset_name { + width: 200px; +} + +.asset_desc { + width: 500px; + min-width: 250px; +} + +.actions { + min-width: 180px; + width: 180px; +} + +.size { + width: 150px; + min-width: 150px; +} + +.checksum { + width: 280px; + min-width: 280px; +} + +.container { + max-width: none; +} + +th.footer { + font-size:12px; + padding: 3px; + font-weight: bold; + text-align: center; + background-color: #464c56; + color: white; + min-width: 90px; +} + +td.footer { + font-size:12px; + padding: 3px; + text-align: center; + color: #464c56; + font-weight: bold; + font-family: Courier New; +} + +code.footer { + color:#464c56; +} diff --git a/seqcolapi/static/favicon.ico b/seqcolapi/static/favicon.ico new file mode 100644 index 0000000..117bca4 Binary files /dev/null and b/seqcolapi/static/favicon.ico differ diff --git a/seqcolapi/static/links_demo.html b/seqcolapi/static/links_demo.html new file mode 100644 index 0000000..a192233 --- /dev/null +++ b/seqcolapi/static/links_demo.html @@ -0,0 +1,269 @@ + + + + + + + + Sequence Collections API + + + + + + +
+ +

Sequence collection comparison matrix

+ +The square matrix below lists sequence collections on the x and y axes. At the intersection is a link to the /comparison endpoint that compares those two genomes. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
demo0
demo1
demo2
demo3
demo4
demo5
demo6
demo0=CompareCompareCompareCompareCompareCompare
demo1Compare=CompareCompareCompareCompareCompare
demo2CompareCompare=CompareCompareCompareCompare
demo3CompareCompareCompare=CompareCompareCompare
demo4CompareCompareCompareCompare=CompareCompare
demo5CompareCompareCompareCompareCompare=Compare
demo6CompareCompareCompareCompareCompareCompare=
+ +
+ +
+
+ seqcolapi + seqcol + + Python + +
+ Sheffield Computational Biology Lab +
+
+
+ + + + + + + \ No newline at end of file diff --git a/seqcolapi/static/links_ref.html b/seqcolapi/static/links_ref.html new file mode 100644 index 0000000..b14c71d --- /dev/null +++ b/seqcolapi/static/links_ref.html @@ -0,0 +1,317 @@ + + + + + + + + Sequence Collections API + + + + + + +
+ +

Sequence collection comparison matrix

+ +The square matrix below lists sequence collections on the x and y axes. At the intersection is a link to the /comparison endpoint that compares those two genomes. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Ensembl GRCh38 primary assembly
Ensembl GRCh38 toplevel assembly
UCSC hg38
NCBI GCA 000001405.28
chm13v2
chm13v1.1
Refgenie hg38
Refgenie hg38 primary
Ensembl GRCh38 primary assembly=CompareCompareCompareCompareCompareCompareCompare
Ensembl GRCh38 toplevel assemblyCompare=CompareCompareCompareCompareCompareCompare
UCSC hg38CompareCompare=CompareCompareCompareCompareCompare
NCBI GCA 000001405.28CompareCompareCompare=CompareCompareCompareCompare
chm13v2CompareCompareCompareCompare=CompareCompareCompare
chm13v1.1CompareCompareCompareCompareCompare=CompareCompare
Refgenie hg38CompareCompareCompareCompareCompareCompare=Compare
Refgenie hg38 primaryCompareCompareCompareCompareCompareCompareCompare=
+ +
+ +
+
+ seqcolapi + + Python +
+ Sheffield Computational Biology Lab +
+
+
+ + + + + + + \ No newline at end of file diff --git a/seqcolapi/static/logo_databio_long.svg b/seqcolapi/static/logo_databio_long.svg new file mode 100644 index 0000000..d046f99 --- /dev/null +++ b/seqcolapi/static/logo_databio_long.svg @@ -0,0 +1,223 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/seqcolapi/static/seqcol_list.html b/seqcolapi/static/seqcol_list.html new file mode 100644 index 0000000..8a71e8f --- /dev/null +++ b/seqcolapi/static/seqcol_list.html @@ -0,0 +1,454 @@ + + + + + + + + Sequence Collections API + + + + + + +
+ +

Sequence collection list

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AssemblySequence collection digest
T2T-CHM13v2.0cT0L9BSX-_X5Wv_lbpJRXAFOEPj0Dvav
HG02257.alt.pat.f1_v2MFxJDHkVdTBlPvUFRbYWDZYxmycvHSRp
HG02257.pri.mat.f1_v2PU_Crd5eY96THL1Jkf1oJ0ABf3hcQfPv
HG02559.alt.pat.f1_v2OCV0NxSbsWc-Xh3nPSQ0GrnAmYpRGFAL
HG02559.pri.mat.f1_v2-UZ3ln8PXKtOgREFMkdncoMzTSV-Eaj5
HG02486.alt.pat.f1_v28Qlx8ZKwKL4U5s100-rjUZCA7UcWxpJr
HG02486.pri.mat.f1_v2ZXK7B5SQ2iX_esmgYtwaOBtgTr3dwd7L
HG01891.pri.mat.f1_v2vUoInwW05o32k_Cq6-xkenrzLWF9oZ4N
HG01891.alt.pat.f1_v2Zu_hdPQWU5DTGGRPa3cf_0TVNjdspvj-
HG01258.pri.mat.f1_v2cH3KUtgjMAxCl6Tkn5aCPPl_Yqyo-mkl
HG03516.alt.pat.f1_v2mSSLkiEl3Y7Bub48I1Ng-iVLCZm3_wli
HG03516.pri.mat.f1_v2S3kEaQ_QvrAYuZs2R8Un6hg3f_1ibteZ
HG01123.pri.mat.f1_v2.1Ld18iSJB2oh7kTRGpFJD5bupqnHaXt-r
HG01258.alt.pat.f1_v2SQuRKoVbeO2JdcGRSO_WxmZsmAxbqSdb
HG01361.pri.mat.f1_v2uc3BXswbHLIuPEuBa-pY7MAT8qNNFHhY
HG01123.alt.pat.f1_v2.1RfpuPSsslqdCmPGat5yqoQF10qZXA2OJ
HG01361.alt.pat.f1_v24YXbbLQwvkukFrZDm-NUQpZggJ81HeoO
HG01358.pri.mat.f1_v2.1CxRryMc8SwVpuan6rL3Nw5-qnTU1JYCo
HG02622.pri.mat.f1_v2xCb3Ha1fm7Uz6dwvN5JXqdPLRVrMTTm9
HG02622.alt.pat.f1_v2s8IHAP13yprMBvOzZaOkDkCP6wks008y
HG02717.pri.mat.f1_v2rQTvFXRJg1fktSlSFGfkf1tGN6jV57n8
HG02630.alt.pat.f1_v2G45RS0KtkiRThy4lp_tzRzoTNe__YZ6M
HG02630.pri.mat.f1_v2JiBQCxx8g1OqKfuQdxW5H4-XNIsJ_E1B
HG01358.alt.pat.f1_v2.1f4NOc-aMUFU2JCm5tGQYAsBnzS9iTXwW
HG02717.alt.pat.f1_v2TppX7IsQ-f5TlUcLhqklj-Ya9KlytKIB
HG02572.alt.pat.f1_v2UIshAuHmM4BsVnAxjJEs3D7ZJV7Tx9uK
HG02572.pri.mat.f1_v2-xS7eWQFw9s3HxA-TQsHKtU-2qCc9J--
HG02886.pri.mat.f1_v25goiErAN_XrYjB-6MT9Og4QXZ0gsHca7
HG02886.alt.pat.f1_v2vTYYcCahSkIu5PME4JyLstVt7eOXekXD
HG01175.alt.pat.f1_v2zhDl_7kiYDY49LpM5_g3SJ99gkAn3hGj
HG01106.alt.pat.f1_v2b2veuoCX_cwS1bHf0E133npWU66K5X5Y
HG01175.pri.mat.f1_v2L2qjWuBkmwM1eDgVDCFuOjyqleUZ7X4U
HG00741.pri.mat.f1_v2sPeVwLWKqdHlatwcYf7u-OmcBJNmG_32
HG00741.alt.pat.f1_v2xZ0Xf-pTJSKc0oGUDV3zL3_TcpO5nl1d
HG01106.pri.mat.f1_v273AP6Fn5d7P7RB04axUk-24ssrnJb4Ab
HG00438.pri.mat.f1_v2-9M0kYfdvscDEVL13kKp87c8NCU2B5Hh
HG02148.alt.pat.f1_v2mmZHq6GxnHALdsbider2E5Jp-AhVxWVt
HG02148.pri.mat.f1_v2cq2ryytoR7YEu8UUHtRnNr5KDA4NErUQ
HG01952.pri.mat.f1_v2jcQVpDWreZwQAVQicTW4dNPQ8Fz3eYjh
HG01952.alt.pat.f1_v25Pum9BofAqDuL43GMQ6EqLwQVFQsTTSZ
HG00673.pri.mat.f1_v2fMW6DR6vGda_MfSmeScSip0XPmauTcLO
HG00621.alt.pat.f1_v27Z-1F3kSi-eXduHZIG5R3rZAGvJ5Depi
HG00673.alt.pat.f1_v24HylbyjZIYZP7XlgQ3N8jw4xJ8pgl7bb
HG00438.alt.pat.f1_v2Garq3gUkWDFi6vkyYkdQn_p5tUVu3_hm
HG00621.pri.mat.f1_v2BjbE7ZEeWbNVZeYqeyBXYScwlBVimZBB
HG01071.pri.mat.f1_v2x0CrFmo9JcS1QVIm1GY4XWjpl9p8WHRl
HG01928.pri.mat.f1_v2p2xs5S6BOA1LiQKMSMJ3snTQVIF0NtjW
HG01928.alt.pat.f1_v2emHHDrJgEwvhf03dJP-Pk8vXHK_KHeOr
HG00735.alt.pat.f1_v2ny1PsG4UOLGUzECAX2ZTxRaYaQGLiZZi
HG01071.alt.pat.f1_v2soFuT59nkwqdZ3i9FZ6GsPaC8v0UqpZU
HG00735.pri.mat.f1_v2BbH1ZrRSfhBhVHhYLBYq8FNiaSeJHryC
HG03579.pri.mat.f1_v2Hmdch7xzoR_8uyaCHQwvmGWn40e4c58U
HG03579.alt.pat.f1_v2cYrHK5KVoZeyjdS3Mo1srvDYoP44DtWn
HG01978.alt.pat.f1_v2FBe70obDKdaLQbFC6rigZBlrkjjhOJ6o
HG03453.pri.mat.f1_v2LXAPWhSKfrwQkCZsoGPy1W3XSGwwZJR1
HG01978.pri.mat.f1_v2r6ieZ0QGT0NLHuqtkpQQyCXiXXRBwExd
HG03540.pri.mat.f1_v2Kc-Q1c-dlAmYoTS9a5C-wnXXNTJZEL7Y
HG03453.alt.pat.f1_v2P0njMo_ngJB7KvnLAPjBPnGXoigeXAVA
HG03540.alt.pat.f1_v2y7kmkFlydCwj48nHNFJ76TMzNjfYPFno
HG03486.alt.pat.f1_v2yK-YzwV_nIfvoov5f5yCfX2WK18booij
NA18906.pri.mat.f1_v2JLxVTGB1J1NsHwQc9Ph1G1uvJoF0vcmX
NA19240.alt.pat.f1_v2EsxyPLersYlRzzZ-ccHQ5mJK0_sU2eUn
NA19240.pri.mat.f1_v2rMLfj_2guPmN7lVgtRCiSS2_baCN1whg
NA18906.alt.pat.f1_v2BqfaoAPziDGbqTeaTlLzQD_cf6frizzK
HG03486.pri.mat.f1_v2BuZj05B1DuUyRND9eYKf4LhrOMDBtyOk
HG02818.alt.pat.f1_v2WtvqOdQqFvqVdAzs8ql5-d8vwwtEOXUD
HG02818.pri.mat.f1_v2iB8zea9dHlENH43vvkCtcxZlJ4wT1Uz1
HG01243.alt.pat.f1_v2yFCTQeIXhkSUukadRWjWvOVRsz_OyTGq
HG02080.alt.pat.f1_v2tt6XTgH0zmAtOqbVHzM28MomIrnW4SuA
HG02723.pri.mat.f1_v2NcoX-bKBNYbA2o6EwJeQk1IH8ZtZZgps
HG02723.alt.pat.f1_v2Vdn0RtSTthV_5EImofY-lSeO3a3bwIQX
HG02080.pri.mat.f1_v2XVWtB0SreQEBLIfzbOjbKG5PEb1asj6k
HG01109.pri.mat.f1_v2PhKsGQm9pHaNLkimAYQPsornvIZpqvnH
HG01243.pri.mat.f1_v203eDIlAb6h7tiYvcU2f43-A-RmJzN632
NA20129.alt.pat.f1_v2tzg-YjJ70yBfIo5ijfN3LgsPAfc2fH-y
NA20129.pri.mat.f1_v2eYQOqMmclnUP_SYW5KnMCKcfCOtY2Jfn
HG01109.alt.pat.f1_v2t2DLI454ZtI0hxz3ublokln-RrQtZfLO
NA21309.pri.mat.f1_v2tJW0O7A3G4KGYoJ9OGwA0G2hLRm9MHQC
NA21309.alt.pat.f1_v2Sz7UMHSkMT6X29fRukTjWiKb2NPjkfpz
HG02109.pri.mat.f1_v2kfe4S1NWXJrzdkn7-PEmO14qQ_97iPcT
HG03492.alt.pat.f1_v2Lt9FTvjBEThMyHAm-_st-2ygIlqTUcSK
HG03492.pri.mat.f1_v2SXDV7XYbyfGfknp07q5u4Foz6P7j19pn
HG02055.alt.pat.f1_v2av3aqrEvSnd3t2EM0m6jXTtjC0ap8hGm
HG02109.alt.pat.f1_v2fEQuZ-F9oVaqzhXzl-6Ziq4Zj8XUpXQf
HG02055.pri.mat.f1_v2j1V8g5DkGVx7R_5W7y8M_yxMUwqgOXeF
HG03098.alt.pat.f1_v20PSK6PxA62LbmS0DRQb2VeH2UW2byEOl
HG03098.pri.mat.f1_v2tX5MdWuQMBqkg7lEbdozJNyseAKOdRqi
HG005.alt.pat.f1_v2MC8U95jiJqlpU7FkwtTWBILApRLW11tv
HG00733.alt.pat.f1_v2OdOkD0fYg_ryMFWq5fgNjxR4DxsdgAMo
HG005.pri.mat.f1_v2HhM9RLhBL4ggspYnU7_MI_QrLQZoUTqJ
HG00733.pri.mat.f1_v2tFKFtJwX3jVBaQcYgyLEOOt82rlJqou8
HG02145.pri.mat.f1_v2VgBPdkMRNxWi6UR2VXQDrYNJYtNn0Uf4
HG02145.alt.pat.f1_v2f0oF1fypWOJulxmykodrzqek3LEVHtTv
HG002.alt.pat.f1_v2rnrRAhWZmbGjP1CK-L9I1eHNfKJNqkoH
HG002.pri.mat.f1_v2tQLJhZVyD-ehl1lkqTMv-sieRJcejmvk
HG002.pat.cur.202110056BCEz05tbMxTTl6k4x9gFmgkNaXECShd
HG002.mat.cur.20211005XjCjW7CHIqVb6lTzxhAVL-vaMohzgXAt
+ +
+ +
+
+ seqcolapi + seqcol + + Python + +
+ Sheffield Computational Biology Lab +
+
+
+ + + + + + + \ No newline at end of file diff --git a/seqcolapi/static/seqcol_logo.svg b/seqcolapi/static/seqcol_logo.svg new file mode 100644 index 0000000..44202df --- /dev/null +++ b/seqcolapi/static/seqcol_logo.svg @@ -0,0 +1,95 @@ + + + + diff --git a/seqcolapi/static/seqcol_logo_old.svg b/seqcolapi/static/seqcol_logo_old.svg new file mode 100644 index 0000000..bcac5aa --- /dev/null +++ b/seqcolapi/static/seqcol_logo_old.svg @@ -0,0 +1,164 @@ + +image/svg+xml + \ No newline at end of file diff --git a/seqcolapi/templates/base.html b/seqcolapi/templates/base.html new file mode 100644 index 0000000..07a2ce8 --- /dev/null +++ b/seqcolapi/templates/base.html @@ -0,0 +1,58 @@ + + + + {% block head %} + + + + {% block title %}Sequence Collections API{% endblock %} + {% endblock %} + + + {% block nav %} + + {% endblock %} +
+ {% block content %} + No content + {% endblock %} +
+ {% block footer %} +
+
+ seqcolapi {{seqcolapi_version}} + seqcol {{seqcol_version}} + + Python {{python_version}} + +
+ Sheffield Computational Biology Lab +
+
+
+ {% endblock %} + + + + + + diff --git a/seqcolapi/templates/comparison_matrix.html b/seqcolapi/templates/comparison_matrix.html new file mode 100644 index 0000000..1ff72d0 --- /dev/null +++ b/seqcolapi/templates/comparison_matrix.html @@ -0,0 +1,47 @@ +{% extends "base.html" %} +{% block content %} +

Sequence collection comparison matrix

+ +The square matrix below lists sequence collections on the x and y axes. At the intersection is a link to the /comparison endpoint that compares those two genomes. + + + + + + + {% for x in results -%} + + {% endfor %} + +{%- for x in results -%} + + +{% for y in results %} + {% if x == y %} + + {% else %} + + {% endif %} +{%- endfor %} + +{% endfor %} + +
{{ x }}
{{ x }}=Compare
+{% endblock %} \ No newline at end of file diff --git a/seqcolapi/templates/index.html b/seqcolapi/templates/index.html new file mode 100644 index 0000000..aa38984 --- /dev/null +++ b/seqcolapi/templates/index.html @@ -0,0 +1,22 @@ +{% extends "base.html" %} +{% block title %}Sequence Collections API Demo{% endblock %} +{% block content %} +
+

Sequence Collections API Demo

+

+ Welcome to the Sequence Collections API. This server hosts an implementation of the GA4GH Sequence Collections specification (draft).

+ +

View the complete API documentation or try these examples:

+ +

Retrieving a sequence collection

+ + See demos here: + + +
+{% endblock %} diff --git a/seqcolapi/templates/seqcol_list.html b/seqcolapi/templates/seqcol_list.html new file mode 100644 index 0000000..4f116cd --- /dev/null +++ b/seqcolapi/templates/seqcol_list.html @@ -0,0 +1,18 @@ +{% extends "base.html" %} +{% block content %} +

Sequence collection list

+ + + + + + +{%- for x in results -%} + + + + +{% endfor %} + +
AssemblySequence collection digest
{{ x["assembly"] }}{{ x["seqcol_digest"] }}
+{% endblock %} \ No newline at end of file diff --git a/test_api/__init__.py b/test_api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test_api/collection/demo0_collection.json b/test_api/collection/demo0_collection.json new file mode 100644 index 0000000..8bc1cac --- /dev/null +++ b/test_api/collection/demo0_collection.json @@ -0,0 +1,22 @@ +{ + "lengths": [ + 8, + 4, + 4 + ], + "names": [ + "chrX", + "chr1", + "chr2" + ], + "sequences": [ + "SQ.iYtREV555dUFKg2_agSJW6suquUyPpMw", + "SQ.YBbVX0dLKG1ieEDCiMmkrTZFt_Z5Vdaj", + "SQ.AcLxtBuKEPk_7PGE_H4dGElwZHCujwH6" + ], + "sorted_name_length_pairs": [ + "ESV_rcaJ-zgrfBr6sJ7J9kqldWX2gG_K", + "I0_AywNhq4XLlcc1vZexw5cHygCK_bLh", + "V2tENgwhEWrKc8UNgFzrMwx7H3JgoIuq" + ] +} diff --git a/test_api/collection/demo1_collection.json b/test_api/collection/demo1_collection.json new file mode 100644 index 0000000..1d50798 --- /dev/null +++ b/test_api/collection/demo1_collection.json @@ -0,0 +1,18 @@ +{ + "lengths": [ + 4, + 4 + ], + "names": [ + "chr1", + "chr2" + ], + "sequences": [ + "SQ.aKF498dAxcJAqme6QYQ7EZ07-fiw8Kw2", + "SQ.ORLd3OQy8whca09ypkTExMc_ByFalnnO" + ], + "sorted_name_length_pairs": [ + "I0_AywNhq4XLlcc1vZexw5cHygCK_bLh", + "V2tENgwhEWrKc8UNgFzrMwx7H3JgoIuq" + ] +} \ No newline at end of file diff --git a/test_api/collection/demo2_collection.json b/test_api/collection/demo2_collection.json new file mode 100644 index 0000000..79a681a --- /dev/null +++ b/test_api/collection/demo2_collection.json @@ -0,0 +1,22 @@ +{ + "lengths": [ + 4, + 4, + 8 + ], + "names": [ + "chr1", + "chr2", + "chrX" + ], + "sequences": [ + "SQ.aKF498dAxcJAqme6QYQ7EZ07-fiw8Kw2", + "SQ.ORLd3OQy8whca09ypkTExMc_ByFalnnO", + "SQ.733WIVstq8gdCl6w8NhKc5sGdN6SZ5-I" + ], + "sorted_name_length_pairs": [ + "ESV_rcaJ-zgrfBr6sJ7J9kqldWX2gG_K", + "I0_AywNhq4XLlcc1vZexw5cHygCK_bLh", + "V2tENgwhEWrKc8UNgFzrMwx7H3JgoIuq" + ] +} \ No newline at end of file diff --git a/test_api/collection/demo3_collection.json b/test_api/collection/demo3_collection.json new file mode 100644 index 0000000..4735a12 --- /dev/null +++ b/test_api/collection/demo3_collection.json @@ -0,0 +1,22 @@ +{ + "lengths": [ + 4, + 4, + 8 + ], + "names": [ + "1", + "2", + "X" + ], + "sequences": [ + "SQ.aKF498dAxcJAqme6QYQ7EZ07-fiw8Kw2", + "SQ.ORLd3OQy8whca09ypkTExMc_ByFalnnO", + "SQ.733WIVstq8gdCl6w8NhKc5sGdN6SZ5-I" + ], + "sorted_name_length_pairs": [ + "FD2SCYHfvC0UE3PYFnavbAR3I76wtI5x", + "rkhUjVQn9nSaQ-FVrlxVBlc7nqifKvUk", + "zbEAQ7f3J3xyW-jWvzXeLKFc1qS-zh0h" + ] + } \ No newline at end of file diff --git a/test_api/collection/demo4_collection.json b/test_api/collection/demo4_collection.json new file mode 100644 index 0000000..1430b15 --- /dev/null +++ b/test_api/collection/demo4_collection.json @@ -0,0 +1,22 @@ +{ + "lengths": [ + 4, + 4, + 8 + ], + "names": [ + "chr2", + "chr1", + "chrX" + ], + "sequences": [ + "SQ.aKF498dAxcJAqme6QYQ7EZ07-fiw8Kw2", + "SQ.ORLd3OQy8whca09ypkTExMc_ByFalnnO", + "SQ.733WIVstq8gdCl6w8NhKc5sGdN6SZ5-I" + ], + "sorted_name_length_pairs": [ + "ESV_rcaJ-zgrfBr6sJ7J9kqldWX2gG_K", + "I0_AywNhq4XLlcc1vZexw5cHygCK_bLh", + "V2tENgwhEWrKc8UNgFzrMwx7H3JgoIuq" + ] +} \ No newline at end of file diff --git a/test_api/collection/demo5_collection.json b/test_api/collection/demo5_collection.json new file mode 100644 index 0000000..a4bcb7b --- /dev/null +++ b/test_api/collection/demo5_collection.json @@ -0,0 +1,22 @@ +{ + "lengths": [ + 4, + 4, + 8 + ], + "names": [ + "chr1", + "chr2", + "chrX" + ], + "sequences": [ + "SQ.YBbVX0dLKG1ieEDCiMmkrTZFt_Z5Vdaj", + "SQ.AcLxtBuKEPk_7PGE_H4dGElwZHCujwH6", + "SQ.iYtREV555dUFKg2_agSJW6suquUyPpMw" + ], + "sorted_name_length_pairs": [ + "ESV_rcaJ-zgrfBr6sJ7J9kqldWX2gG_K", + "I0_AywNhq4XLlcc1vZexw5cHygCK_bLh", + "V2tENgwhEWrKc8UNgFzrMwx7H3JgoIuq" + ] + } \ No newline at end of file diff --git a/test_api/collection/demo6_collection.json b/test_api/collection/demo6_collection.json new file mode 100644 index 0000000..b19cd1f --- /dev/null +++ b/test_api/collection/demo6_collection.json @@ -0,0 +1,22 @@ +{ + "lengths": [ + 8, + 4, + 4 + ], + "names": [ + "chrX", + "chr1", + "chr2" + ], + "sequences": [ + "SQ.iYtREV555dUFKg2_agSJW6suquUyPpMw", + "SQ.YBbVX0dLKG1ieEDCiMmkrTZFt_Z5Vdaj", + "SQ.AcLxtBuKEPk_7PGE_H4dGElwZHCujwH6" + ], + "sorted_name_length_pairs": [ + "ESV_rcaJ-zgrfBr6sJ7J9kqldWX2gG_K", + "I0_AywNhq4XLlcc1vZexw5cHygCK_bLh", + "V2tENgwhEWrKc8UNgFzrMwx7H3JgoIuq" + ] + } \ No newline at end of file diff --git a/test_api/comparison/0-vs-1-comparison.json b/test_api/comparison/0-vs-1-comparison.json new file mode 100644 index 0000000..b3a3086 --- /dev/null +++ b/test_api/comparison/0-vs-1-comparison.json @@ -0,0 +1,42 @@ +{ + "digests": { + "a": "fLf5M0BOIPIqcfbE6R8oYwxsy-PnoV32", + "b": "5TK2xeaOB3WmheaDFTzE6G2zdStHNQCM" + }, + "attributes": { + "a_only": [], + "b_only": [], + "a_and_b": [ + "lengths", + "names", + "sequences", + "sorted_name_length_pairs" + ] + }, + "array_elements": { + "a": { + "lengths": 3, + "names": 3, + "sequences": 3, + "sorted_name_length_pairs": 3 + }, + "b": { + "lengths": 2, + "names": 2, + "sequences": 2, + "sorted_name_length_pairs": 2 + }, + "a_and_b": { + "lengths": 2, + "names": 2, + "sequences": 0, + "sorted_name_length_pairs": 2 + }, + "a_and_b_same_order": { + "lengths": true, + "names": true, + "sequences": null, + "sorted_name_length_pairs": true + } + } +} \ No newline at end of file diff --git a/test_api/comparison/0-vs-2-comparison.json b/test_api/comparison/0-vs-2-comparison.json new file mode 100644 index 0000000..f34c9e3 --- /dev/null +++ b/test_api/comparison/0-vs-2-comparison.json @@ -0,0 +1,42 @@ +{ + "digests": { + "a": "fLf5M0BOIPIqcfbE6R8oYwxsy-PnoV32", + "b": "V7QPcpp5S1wYnPBmMGiO53LXRxr9tTQa" + }, + "attributes": { + "a_only": [], + "b_only": [], + "a_and_b": [ + "lengths", + "names", + "sequences", + "sorted_name_length_pairs" + ] + }, + "array_elements": { + "a": { + "lengths": 3, + "names": 3, + "sequences": 3, + "sorted_name_length_pairs": 3 + }, + "b": { + "lengths": 3, + "names": 3, + "sequences": 3, + "sorted_name_length_pairs": 3 + }, + "a_and_b": { + "lengths": 3, + "names": 3, + "sequences": 0, + "sorted_name_length_pairs": 3 + }, + "a_and_b_same_order": { + "lengths": false, + "names": false, + "sequences": null, + "sorted_name_length_pairs": true + } + } +} \ No newline at end of file diff --git a/test_api/comparison/1-vs-2-comparison.json b/test_api/comparison/1-vs-2-comparison.json new file mode 100644 index 0000000..7db299f --- /dev/null +++ b/test_api/comparison/1-vs-2-comparison.json @@ -0,0 +1,42 @@ +{ + "digests": { + "a": "5TK2xeaOB3WmheaDFTzE6G2zdStHNQCM", + "b": "V7QPcpp5S1wYnPBmMGiO53LXRxr9tTQa" + }, + "attributes": { + "a_only": [], + "b_only": [], + "a_and_b": [ + "lengths", + "names", + "sequences", + "sorted_name_length_pairs" + ] + }, + "array_elements": { + "a": { + "lengths": 2, + "names": 2, + "sequences": 2, + "sorted_name_length_pairs": 2 + }, + "b": { + "lengths": 3, + "names": 3, + "sequences": 3, + "sorted_name_length_pairs": 3 + }, + "a_and_b": { + "lengths": 2, + "names": 2, + "sequences": 2, + "sorted_name_length_pairs": 2 + }, + "a_and_b_same_order": { + "lengths": true, + "names": true, + "sequences": true, + "sorted_name_length_pairs": true + } + } +} \ No newline at end of file diff --git a/test_api/comparison/2-vs-3-comparison.json b/test_api/comparison/2-vs-3-comparison.json new file mode 100644 index 0000000..07e28a0 --- /dev/null +++ b/test_api/comparison/2-vs-3-comparison.json @@ -0,0 +1,42 @@ +{ + "digests": { + "a": "V7QPcpp5S1wYnPBmMGiO53LXRxr9tTQa", + "b": "_G-IybZWJOcCWYagm6qmjQTaRsKhNRT2" + }, + "attributes": { + "a_only": [], + "b_only": [], + "a_and_b": [ + "lengths", + "names", + "sequences", + "sorted_name_length_pairs" + ] + }, + "array_elements": { + "a": { + "lengths": 3, + "names": 3, + "sequences": 3, + "sorted_name_length_pairs": 3 + }, + "b": { + "lengths": 3, + "names": 3, + "sequences": 3, + "sorted_name_length_pairs": 3 + }, + "a_and_b": { + "lengths": 3, + "names": 0, + "sequences": 3, + "sorted_name_length_pairs": 0 + }, + "a_and_b_same_order": { + "lengths": true, + "names": null, + "sequences": true, + "sorted_name_length_pairs": null + } + } +} \ No newline at end of file diff --git a/test_api/comparison/5-vs-6-comparison.json b/test_api/comparison/5-vs-6-comparison.json new file mode 100644 index 0000000..90aafaa --- /dev/null +++ b/test_api/comparison/5-vs-6-comparison.json @@ -0,0 +1,42 @@ +{ + "digests": { + "a": "JPd9Y-hwnhGD7HPe3yka4Qtx2YsIL8tW", + "b": "fLf5M0BOIPIqcfbE6R8oYwxsy-PnoV32" + }, + "attributes": { + "a_only": [], + "b_only": [], + "a_and_b": [ + "lengths", + "names", + "sequences", + "sorted_name_length_pairs" + ] + }, + "array_elements": { + "a": { + "lengths": 3, + "names": 3, + "sequences": 3, + "sorted_name_length_pairs": 3 + }, + "b": { + "lengths": 3, + "names": 3, + "sequences": 3, + "sorted_name_length_pairs": 3 + }, + "a_and_b": { + "lengths": 3, + "names": 3, + "sequences": 3, + "sorted_name_length_pairs": 3 + }, + "a_and_b_same_order": { + "lengths": false, + "names": false, + "sequences": false, + "sorted_name_length_pairs": true + } + } +} \ No newline at end of file diff --git a/test_api/comparison/compare_different_names.json b/test_api/comparison/compare_different_names.json new file mode 100644 index 0000000..236c507 --- /dev/null +++ b/test_api/comparison/compare_different_names.json @@ -0,0 +1,42 @@ +{ + "digests": { + "a": "fLf5M0BOIPIqcfbE6R8oYwxsy-PnoV32", + "b": "TKB7n_14iKSFjljBA-TSVjeYpxPQe0-k" + }, + "attributes": { + "a_only": [], + "b_only": [], + "a_and_b": [ + "lengths", + "names", + "sequences", + "sorted_name_length_pairs" + ] + }, + "array_elements": { + "a": { + "lengths": 3, + "names": 3, + "sequences": 3, + "sorted_name_length_pairs": 3 + }, + "b": { + "lengths": 3, + "names": 3, + "sequences": 3, + "sorted_name_length_pairs": 3 + }, + "a_and_b": { + "lengths": 3, + "names": 0, + "sequences": 3, + "sorted_name_length_pairs": 0 + }, + "a_and_b_same_order": { + "lengths": true, + "names": null, + "sequences": true, + "sorted_name_length_pairs": null + } + } +} \ No newline at end of file diff --git a/test_api/comparison/compare_different_order.json b/test_api/comparison/compare_different_order.json new file mode 100644 index 0000000..0f28b06 --- /dev/null +++ b/test_api/comparison/compare_different_order.json @@ -0,0 +1,42 @@ +{ + "digests": { + "a": "fLf5M0BOIPIqcfbE6R8oYwxsy-PnoV32", + "b": "JPd9Y-hwnhGD7HPe3yka4Qtx2YsIL8tW" + }, + "attributes": { + "a_only": [], + "b_only": [], + "a_and_b": [ + "lengths", + "names", + "sequences", + "sorted_name_length_pairs" + ] + }, + "array_elements": { + "a": { + "lengths": 3, + "names": 3, + "sequences": 3, + "sorted_name_length_pairs": 3 + }, + "b": { + "lengths": 3, + "names": 3, + "sequences": 3, + "sorted_name_length_pairs": 3 + }, + "a_and_b": { + "lengths": 3, + "names": 3, + "sequences": 3, + "sorted_name_length_pairs": 3 + }, + "a_and_b_same_order": { + "lengths": false, + "names": false, + "sequences": false, + "sorted_name_length_pairs": true + } + } +} \ No newline at end of file diff --git a/test_api/comparison/compare_pair_swap.json b/test_api/comparison/compare_pair_swap.json new file mode 100644 index 0000000..0f95343 --- /dev/null +++ b/test_api/comparison/compare_pair_swap.json @@ -0,0 +1,42 @@ +{ + "digests": { + "a": "fLf5M0BOIPIqcfbE6R8oYwxsy-PnoV32", + "b": "E6zGtGuc8wKYmCMw5gaLW3ppyXsoO6p4" + }, + "attributes": { + "a_only": [], + "b_only": [], + "a_and_b": [ + "lengths", + "names", + "sequences", + "sorted_name_length_pairs" + ] + }, + "array_elements": { + "a": { + "lengths": 3, + "names": 3, + "sequences": 3, + "sorted_name_length_pairs": 3 + }, + "b": { + "lengths": 3, + "names": 3, + "sequences": 3, + "sorted_name_length_pairs": 3 + }, + "a_and_b": { + "lengths": 3, + "names": 3, + "sequences": 3, + "sorted_name_length_pairs": 1 + }, + "a_and_b_same_order": { + "lengths": true, + "names": false, + "sequences": true, + "sorted_name_length_pairs": true + } + } +} \ No newline at end of file diff --git a/test_api/comparison/compare_subset.json b/test_api/comparison/compare_subset.json new file mode 100644 index 0000000..3063daa --- /dev/null +++ b/test_api/comparison/compare_subset.json @@ -0,0 +1,42 @@ +{ + "digests": { + "a": "fLf5M0BOIPIqcfbE6R8oYwxsy-PnoV32", + "b": "8aA37TYgiVohRqfRhXEeklIAXf2Rs8jw" + }, + "attributes": { + "a_only": [], + "b_only": [], + "a_and_b": [ + "lengths", + "names", + "sequences", + "sorted_name_length_pairs" + ] + }, + "array_elements": { + "a": { + "lengths": 3, + "names": 3, + "sequences": 3, + "sorted_name_length_pairs": 3 + }, + "b": { + "lengths": 2, + "names": 2, + "sequences": 2, + "sorted_name_length_pairs": 2 + }, + "a_and_b": { + "lengths": 2, + "names": 2, + "sequences": 2, + "sorted_name_length_pairs": 2 + }, + "a_and_b_same_order": { + "lengths": null, + "names": true, + "sequences": true, + "sorted_name_length_pairs": true + } + } +} \ No newline at end of file diff --git a/test_api/comparison/compare_swap_wo_coords.json b/test_api/comparison/compare_swap_wo_coords.json new file mode 100644 index 0000000..1339b13 --- /dev/null +++ b/test_api/comparison/compare_swap_wo_coords.json @@ -0,0 +1,42 @@ +{ + "digests": { + "a": "fLf5M0BOIPIqcfbE6R8oYwxsy-PnoV32", + "b": "EkMSPx-_MdAzj2tWGfdFSVsuv03OznPn" + }, + "attributes": { + "a_only": [], + "b_only": [], + "a_and_b": [ + "lengths", + "names", + "sequences", + "sorted_name_length_pairs" + ] + }, + "array_elements": { + "a": { + "lengths": 3, + "names": 3, + "sequences": 3, + "sorted_name_length_pairs": 3 + }, + "b": { + "lengths": 3, + "names": 3, + "sequences": 3, + "sorted_name_length_pairs": 3 + }, + "a_and_b": { + "lengths": 3, + "names": 3, + "sequences": 3, + "sorted_name_length_pairs": 3 + }, + "a_and_b_same_order": { + "lengths": true, + "names": false, + "sequences": true, + "sorted_name_length_pairs": true + } + } +} \ No newline at end of file diff --git a/test_api/conftest.py b/test_api/conftest.py new file mode 100644 index 0000000..e5fb69e --- /dev/null +++ b/test_api/conftest.py @@ -0,0 +1,57 @@ +import pytest +import requests + + +def pytest_addoption(parser): + """ + Add an option to specify the API root + """ + parser.addoption("--api_root", action="store", default="http://0.0.0.0:8100") + + +@pytest.fixture() +def api_root(pytestconfig): + """ + Get the API root from the command line argument, --api_root + """ + return pytestconfig.getoption("api_root") + + +def check_server_is_running(api_root): + """ + Check if a server is responding at the given API root + """ + try: + print(f"Checking if service is running at {api_root}") + res = requests.get(f"{api_root}/") + assert res.status_code == 200, "Service is not running" + return True + except: + return False + + +REQ_SERVICE_MARK = "require_service" + + +def pytest_configure(config): + """ + Register a custom marker for tests that require a server. + You can add this marker to a test with `@pytest.mark.require_service`, + and it will be skipped if the server is not running. + """ + config.addinivalue_line( + "markers", f"{REQ_SERVICE_MARK}: test to only run when API root is available" + ) + + +def pytest_collection_modifyitems(config, items): + """ + Skip tests marked with `@pytest.mark.require_service` if the server is not running + """ + api_root = config.getoption("api_root") + skip_missing_service = pytest.mark.skip(reason="need API to run") + if not check_server_is_running(api_root): + print("Skipping tests that require a server to be running...") + for item in items: + if REQ_SERVICE_MARK in item.keywords: + item.add_marker(skip_missing_service) diff --git a/test_api/test_compliance.py b/test_api/test_compliance.py new file mode 100644 index 0000000..97f884b --- /dev/null +++ b/test_api/test_compliance.py @@ -0,0 +1,115 @@ +# Draft of a compliance suite for the API + +import json +import pytest +import requests + +# Collection endpoints +DEMO_FILES = [ + "demo0.fa", + "demo1.fa.gz", + "demo2.fa", + "demo3.fa", + "demo4.fa", + "demo5.fa.gz", + "demo6.fa", +] + +API_TEST_DIR = "test_api" + +COLLECTION_TESTS = [ + (DEMO_FILES[0], f"{API_TEST_DIR}/collection/demo0_collection.json"), + (DEMO_FILES[1], f"{API_TEST_DIR}/collection/demo1_collection.json"), + (DEMO_FILES[2], f"{API_TEST_DIR}/collection/demo2_collection.json"), + (DEMO_FILES[3], f"{API_TEST_DIR}/collection/demo3_collection.json"), + (DEMO_FILES[4], f"{API_TEST_DIR}/collection/demo4_collection.json"), + (DEMO_FILES[5], f"{API_TEST_DIR}/collection/demo5_collection.json"), + (DEMO_FILES[6], f"{API_TEST_DIR}/collection/demo6_collection.json"), +] + +COMPARISON_TESTS = [ + f"{API_TEST_DIR}/comparison/compare_subset.json", # subset + f"{API_TEST_DIR}/comparison/compare_different_names.json", # same sequences, different names + f"{API_TEST_DIR}/comparison/compare_different_order.json", # same sequences, name order switch, but equivalent coordinate system + f"{API_TEST_DIR}/comparison/compare_pair_swap.json", # swapped name-length-pairs + f"{API_TEST_DIR}/comparison/compare_swap_wo_coords.json", # swapped name-length-pairs, but no coord system change +] + +# This is optional, so we could turn off for a compliance test +TEST_SORTED_NAME_LENGTH_PAIRS = True + +# api_root = "http://0.0.0.0:8100" +demo_root = "/home/nsheff/code/refget/demo_fasta" +demo_file = "demo0.fa" +response_file = "tests/demo0_collection.json" +import refget + + +def read_url(url): + import yaml + + print("Reading URL: {}".format(url)) + from urllib.request import urlopen + from urllib.error import HTTPError + + try: + response = urlopen(url) + except HTTPError as e: + raise e + data = response.read() # a `bytes` object + text = data.decode("utf-8") + print(text) + return yaml.safe_load(text) + + +def check_collection(api_root, demo_file, response_file): + + # Need schema to make sure we eliminate inherent attributes correctly + schema_path = "https://schema.databio.org/refget/SeqColArraySetInherent.yaml" + + schema = read_url(schema_path) + print(f"Loading fasta file at '{demo_root}/{demo_file}'") + digest = refget.fasta_file_to_digest(f"{demo_root}/{demo_file}", schema=schema) + print(f"Checking digest: {digest}") + res = requests.get(f"{api_root}/collection/{digest}") + server_answer = json.loads(res.content) + with open(response_file) as fp: + correct_answer = json.load(fp) + + assert ( + server_answer["sequences"] == correct_answer["sequences"] + ), f"Collection endpoint failed: sequence mismatch for {demo_file}" + assert ( + server_answer["names"] == correct_answer["names"] + ), f"Collection endpoint failed: names mismatch for {demo_file}" + assert ( + server_answer["lengths"] == correct_answer["lengths"] + ), f"Collection endpoint failed: lengths mismatch for {demo_file}" + if TEST_SORTED_NAME_LENGTH_PAIRS: + assert ( + server_answer["sorted_name_length_pairs"] == correct_answer["sorted_name_length_pairs"] + ), f"Collection endpoint failed: sorted_name_length_pairs mismatch for {demo_file}" + + +def check_comparison(api_root, response_file): + with open(response_file) as fp: + correct_answer = json.load(fp) + res = requests.get( + f"{api_root}/comparison/{correct_answer['digests']['a']}/{correct_answer['digests']['b']}" + ) + server_answer = json.loads(res.content) + assert server_answer == correct_answer, "Comparison endpoint failed" + + +@pytest.mark.require_service +class TestAPI: + + @pytest.mark.parametrize("test_values", COLLECTION_TESTS) + def test_collection_endpoint(self, api_root, test_values): + # print("Service unavailable: ", SERVICE_UNAVAILABLE) + check_collection(api_root, *test_values) + + @pytest.mark.parametrize("response_file", COMPARISON_TESTS) + def test_comparison_endpoint(self, api_root, response_file): + # print("Service unavailable: ", SERVICE_UNAVAILABLE) + check_comparison(api_root, response_file) diff --git a/test_fasta/base.fa b/test_fasta/base.fa new file mode 100644 index 0000000..dd08063 --- /dev/null +++ b/test_fasta/base.fa @@ -0,0 +1,6 @@ +>chrX +TTGGGGAA +>chr1 +GGAA +>chr2 +GCGC diff --git a/test_fasta/base.fa.checksums b/test_fasta/base.fa.checksums new file mode 100644 index 0000000..35a2aba --- /dev/null +++ b/test_fasta/base.fa.checksums @@ -0,0 +1,3 @@ +chrX 8 SQ.iYtREV555dUFKg2_agSJW6suquUyPpMw 5f63cfaa3ef61f88c9635fb9d18ec945 +chr1 4 SQ.YBbVX0dLKG1ieEDCiMmkrTZFt_Z5Vdaj 31fc6ca291a32fb9df82b85e5f077e31 +chr2 4 SQ.AcLxtBuKEPk_7PGE_H4dGElwZHCujwH6 92c6a56c9e9459d8a42b96f7884710bc diff --git a/test_fasta/different_names.fa b/test_fasta/different_names.fa new file mode 100644 index 0000000..0358591 --- /dev/null +++ b/test_fasta/different_names.fa @@ -0,0 +1,6 @@ +>X +TTGGGGAA +>1 +GGAA +>2 +GCGC diff --git a/test_fasta/different_names.fa.checksums b/test_fasta/different_names.fa.checksums new file mode 100644 index 0000000..59e84c7 --- /dev/null +++ b/test_fasta/different_names.fa.checksums @@ -0,0 +1,3 @@ +X 8 SQ.iYtREV555dUFKg2_agSJW6suquUyPpMw 5f63cfaa3ef61f88c9635fb9d18ec945 +1 4 SQ.YBbVX0dLKG1ieEDCiMmkrTZFt_Z5Vdaj 31fc6ca291a32fb9df82b85e5f077e31 +2 4 SQ.AcLxtBuKEPk_7PGE_H4dGElwZHCujwH6 92c6a56c9e9459d8a42b96f7884710bc diff --git a/test_fasta/different_order.fa b/test_fasta/different_order.fa new file mode 100644 index 0000000..2a462d4 --- /dev/null +++ b/test_fasta/different_order.fa @@ -0,0 +1,6 @@ +>chr1 +GGAA +>chr2 +GCGC +>chrX +TTGGGGAA diff --git a/test_fasta/different_order.fa.checksums b/test_fasta/different_order.fa.checksums new file mode 100644 index 0000000..b9fccde --- /dev/null +++ b/test_fasta/different_order.fa.checksums @@ -0,0 +1,3 @@ +chr1 4 SQ.YBbVX0dLKG1ieEDCiMmkrTZFt_Z5Vdaj 31fc6ca291a32fb9df82b85e5f077e31 +chr2 4 SQ.AcLxtBuKEPk_7PGE_H4dGElwZHCujwH6 92c6a56c9e9459d8a42b96f7884710bc +chrX 8 SQ.iYtREV555dUFKg2_agSJW6suquUyPpMw 5f63cfaa3ef61f88c9635fb9d18ec945 diff --git a/test_fasta/pair_swap.fa b/test_fasta/pair_swap.fa new file mode 100644 index 0000000..51ceaad --- /dev/null +++ b/test_fasta/pair_swap.fa @@ -0,0 +1,6 @@ +>chr2 +TTGGGGAA +>chr1 +GGAA +>chrX +GCGC diff --git a/test_fasta/pair_swap.fa.checksums b/test_fasta/pair_swap.fa.checksums new file mode 100644 index 0000000..8886cb9 --- /dev/null +++ b/test_fasta/pair_swap.fa.checksums @@ -0,0 +1,3 @@ +chr2 8 SQ.iYtREV555dUFKg2_agSJW6suquUyPpMw 5f63cfaa3ef61f88c9635fb9d18ec945 +chr1 4 SQ.YBbVX0dLKG1ieEDCiMmkrTZFt_Z5Vdaj 31fc6ca291a32fb9df82b85e5f077e31 +chrX 4 SQ.AcLxtBuKEPk_7PGE_H4dGElwZHCujwH6 92c6a56c9e9459d8a42b96f7884710bc diff --git a/test_fasta/subset.fa b/test_fasta/subset.fa new file mode 100644 index 0000000..e036031 --- /dev/null +++ b/test_fasta/subset.fa @@ -0,0 +1,4 @@ +>chrX +TTGGGGAA +>chr1 +GGAA diff --git a/test_fasta/subset.fa.checksums b/test_fasta/subset.fa.checksums new file mode 100644 index 0000000..e5f4c86 --- /dev/null +++ b/test_fasta/subset.fa.checksums @@ -0,0 +1,2 @@ +chrX 8 SQ.iYtREV555dUFKg2_agSJW6suquUyPpMw 5f63cfaa3ef61f88c9635fb9d18ec945 +chr1 4 SQ.YBbVX0dLKG1ieEDCiMmkrTZFt_Z5Vdaj 31fc6ca291a32fb9df82b85e5f077e31 diff --git a/test_fasta/swap_coords.fa.checksums b/test_fasta/swap_coords.fa.checksums new file mode 100644 index 0000000..36b8f8f --- /dev/null +++ b/test_fasta/swap_coords.fa.checksums @@ -0,0 +1,3 @@ +chrX 8 SQ.iYtREV555dUFKg2_agSJW6suquUyPpMw 5f63cfaa3ef61f88c9635fb9d18ec945 +chr2 4 SQ.YBbVX0dLKG1ieEDCiMmkrTZFt_Z5Vdaj 31fc6ca291a32fb9df82b85e5f077e31 +chr1 4 SQ.AcLxtBuKEPk_7PGE_H4dGElwZHCujwH6 92c6a56c9e9459d8a42b96f7884710bc diff --git a/test_fasta/swap_wo_coords.fa b/test_fasta/swap_wo_coords.fa new file mode 100644 index 0000000..979307d --- /dev/null +++ b/test_fasta/swap_wo_coords.fa @@ -0,0 +1,6 @@ +>chrX +TTGGGGAA +>chr2 +GGAA +>chr1 +GCGC diff --git a/test_fasta/swap_wo_coords.fa.checksums b/test_fasta/swap_wo_coords.fa.checksums new file mode 100644 index 0000000..36b8f8f --- /dev/null +++ b/test_fasta/swap_wo_coords.fa.checksums @@ -0,0 +1,3 @@ +chrX 8 SQ.iYtREV555dUFKg2_agSJW6suquUyPpMw 5f63cfaa3ef61f88c9635fb9d18ec945 +chr2 4 SQ.YBbVX0dLKG1ieEDCiMmkrTZFt_Z5Vdaj 31fc6ca291a32fb9df82b85e5f077e31 +chr1 4 SQ.AcLxtBuKEPk_7PGE_H4dGElwZHCujwH6 92c6a56c9e9459d8a42b96f7884710bc