Skip to content

Commit

Permalink
Merge pull request #206 from CanDIG/daisieh/vault
Browse files Browse the repository at this point in the history
Vault stores s3 secrets
  • Loading branch information
daisieh authored Jun 11, 2022
2 parents 6b5be82 + 490808e commit f289068
Show file tree
Hide file tree
Showing 12 changed files with 112 additions and 107 deletions.
12 changes: 0 additions & 12 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -32,18 +32,6 @@ COPY . /app/htsget_server

WORKDIR /app/htsget_server

# copy env vars into config.ini
ARG opa_secret
ARG opa_url
ARG candig_auth
ARG minio_url
ARG minio_bucket_name
RUN sed -i s@\<CANDIG_OPA_SECRET\>@${opa_secret}@ config.ini \
&& sed -i s@\<OPA_URL\>@${opa_url}@ config.ini \
&& sed -i s@\<CANDIG_AUTHORIZATION\>@${candig_auth}@ config.ini \
&& sed -i s@\<MINIO_URL\>@${minio_url}@ config.ini \
&& sed -i s@\<MINIO_BUCKET_NAME\>@${minio_bucket_name}@ config.ini

RUN touch initial_setup && pip install --no-cache-dir -r requirements.txt

RUN sqlite3 data/files.db -init data/files.sql
Expand Down
7 changes: 1 addition & 6 deletions config.ini
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,8 @@ BufSize = 1000000
LocalFilesPath = ./data/files
DBPath = sqlite:///./data/files.db

[minio]
EndPoint = <MINIO_URL>
AccessKey = <MINIO_ACCESS_KEY>
SecretKey = <MINIO_SECRET_KEY>
BucketName = <MINIO_BUCKET_NAME>

[authz]
CANDIG_AUTHORIZATION = <CANDIG_AUTHORIZATION>
CANDIG_OPA_SECRET = <CANDIG_OPA_SECRET>
CANDIG_OPA_URL = <OPA_URL>
CANDIG_VAULT_URL = <VAULT_URL>
10 changes: 5 additions & 5 deletions data/files.sql
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,10 @@ CREATE TABLE access_method (
PRIMARY KEY (id),
FOREIGN KEY(drs_object_id) REFERENCES drs_object (id)
);
INSERT INTO access_method VALUES(1,'NA18537.vcf.gz.tbi','file','','','file:///app/htsget_server/data/files/NA18537.vcf.gz.tbi','[]');
INSERT INTO access_method VALUES(2,'NA18537.vcf.gz','file','','','file:///app/htsget_server/data/files/NA18537.vcf.gz','[]');
INSERT INTO access_method VALUES(3,'NA20787.vcf.gz.tbi','s3','NA20787.vcf.gz.tbi','','','[]');
INSERT INTO access_method VALUES(4,'NA20787.vcf.gz','s3','NA20787.vcf.gz','','','[]');
INSERT INTO access_method VALUES(1,'NA18537.vcf.gz.tbi','s3','docker.localhost:9000/samples/NA18537.vcf.gz.tbi','','','[]');
INSERT INTO access_method VALUES(2,'NA18537.vcf.gz','s3','docker.localhost:9000/samples/NA18537.vcf.gz','','','[]');
INSERT INTO access_method VALUES(3,'NA20787.vcf.gz.tbi','s3','docker.localhost:9000/samples/NA20787.vcf.gz.tbi','','','[]');
INSERT INTO access_method VALUES(4,'NA20787.vcf.gz','s3','docker.localhost:9000/samples/NA20787.vcf.gz','','','[]');
CREATE TABLE content_object (
id INTEGER NOT NULL,
drs_object_id INTEGER,
Expand All @@ -60,5 +60,5 @@ CREATE TABLE dataset_association (
FOREIGN KEY(dataset_id) REFERENCES dataset (id),
FOREIGN KEY(drs_object_id) REFERENCES drs_object (id)
);
INSERT INTO dataset_association VALUES('controlled4','NA20787');
INSERT INTO dataset_association VALUES('controlled4','NA18537');
COMMIT;
31 changes: 20 additions & 11 deletions entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,26 @@

set -Euo pipefail

export VAULT_S3_TOKEN=$(cat /run/secrets/vault-s3-token)

if [[ -f "initial_setup" ]]; then
if [[ -f "/run/secrets/cert.pem" ]]; then
cat /run/secrets/cert.pem >> /usr/local/lib/python3.7/site-packages/certifi/cacert.pem
fi

ACCESS=$(cat /run/secrets/access)
sed -i s@\<MINIO_ACCESS_KEY\>@$ACCESS@ config.ini

SECRET=$(cat /run/secrets/secret)
sed -i s@\<MINIO_SECRET_KEY\>@$SECRET@ config.ini
rm initial_setup
if [[ -f "/run/secrets/cert.pem" ]]; then
cat /run/secrets/cert.pem >> /usr/local/lib/python3.7/site-packages/certifi/cacert.pem
fi

sed -i s@\<CANDIG_OPA_SECRET\>@$OPA_SECRET@ config.ini
sed -i s@\<OPA_URL\>@$OPA_URL@ config.ini
sed -i s@\<VAULT_URL\>@$VAULT_URL@ config.ini
sed -i s@\<CANDIG_AUTHORIZATION\>@$CANDIG_AUTH@ config.ini

# set up crontab
sed -i s@\<VAULT_S3_TOKEN\>@$VAULT_S3_TOKEN@ renew_token.sh
crontab -l > cron_bkp
echo "0 */3 * * * bash /app/htsget_server/renew_token.sh" >> cron_bkp
crontab cron_bkp
rm cron_bkp

rm initial_setup
fi

python3 htsget_server/server.py $@
python3 htsget_server/server.py $@
3 changes: 2 additions & 1 deletion htsget_server/authz.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import requests
import json
import os
from config import AUTHZ, TEST_KEY, CANDIG_OPA_SITE_ADMIN_KEY
from config import AUTHZ, TEST_KEY, CANDIG_OPA_SITE_ADMIN_KEY, VAULT_S3_TOKEN
from flask import Flask
import drs_operations
import re


app = Flask(__name__)
Expand Down
27 changes: 3 additions & 24 deletions htsget_server/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,40 +6,19 @@
config.read('./config.ini')

AUTHZ = config['authz']
CANDIG_OPA_SITE_ADMIN_KEY = os.environ.get("CANDIG_OPA_SITE_ADMIN_KEY")
if CANDIG_OPA_SITE_ADMIN_KEY is None:
CANDIG_OPA_SITE_ADMIN_KEY = "site_admin"
CANDIG_OPA_SITE_ADMIN_KEY = os.getenv("CANDIG_OPA_SITE_ADMIN_KEY", "site_admin")

DB_PATH = config['paths']['DBPath']
LOCAL_FILE_PATH = config['paths']['LocalFilesPath']

MINIO = config['minio']
MINIO_END_POINT = MINIO['EndPoint']
MINIO_ACCESS_KEY = MINIO['AccessKey']
MINIO_SECRET_KEY = MINIO['SecretKey']
MINIO_BUCKET_NAME = MINIO['BucketName']

CHUNK_SIZE = int(config['DEFAULT']['ChunkSize'])

PORT = config['DEFAULT']['Port']

TEST_KEY = os.environ.get("HTSGET_TEST_KEY")
if TEST_KEY is None:
TEST_KEY = "testtesttest"
TEST_KEY = os.getenv("HTSGET_TEST_KEY", "testtesttest")

USE_MINIO_SANDBOX = False
if os.environ.get("USE_MINIO_SANDBOX") == "True":
USE_MINIO_SANDBOX = True

def get_minio_client():
if USE_MINIO_SANDBOX:
return Minio(
"play.min.io:9000",
access_key="Q3AM3UQ867SPQQA43P2F",
secret_key="zuf+tfteSlswRu7BJ86wekitnifILbZam1KYY3TG"
), "testhtsget"
return Minio(
MINIO_END_POINT,
access_key=MINIO_ACCESS_KEY,
secret_key=MINIO_SECRET_KEY
), MINIO_BUCKET_NAME
VAULT_S3_TOKEN = os.getenv("VAULT_S3_TOKEN", "none")
2 changes: 1 addition & 1 deletion htsget_server/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ def create_drs_object(obj):
if 'region' in method:
new_method.region = method['region']
if 'access_id' in method:
new_method.access_id = method['access_id']
new_method.access_id = method['access_id'].replace("/",";")
if 'access_url' in method:
new_method.url = method['access_url']['url']
if 'headers' in method['access_url']:
Expand Down
2 changes: 1 addition & 1 deletion htsget_server/drs_openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -582,7 +582,7 @@ components:
- s3
access_id:
type: string
description: The filename of the file
description: The location of the file, specified as endpoint/bucket/filename.
region:
type: string
description: >-
Expand Down
65 changes: 37 additions & 28 deletions htsget_server/drs_operations.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from minio import Minio
import connexion
import database
from pathlib import Path
from config import LOCAL_FILE_PATH, get_minio_client
from config import AUTHZ, VAULT_S3_TOKEN
from flask import request
import os
import re
import authz
import requests

# API endpoints
def get_service_info():
Expand Down Expand Up @@ -38,38 +39,46 @@ def list_objects():


def get_access_url(object_id, access_id):
client, bucket = get_minio_client()
try:
result = client.stat_object(bucket_name=bucket, object_name=access_id)
url = client.presigned_get_object(bucket_name=bucket, object_name=access_id)
except Exception as e:
return {"message": str(e)}, 500
return {"url": url}, 200
id_parse = re.match(r"(https*:\/\/)*(.+?)[;\/](.+?)[;\/](.+)$", access_id)
if id_parse is not None:
endpoint = id_parse.group(2)
bucket = id_parse.group(3)
object_name = id_parse.group(4)
# play.min.io endpoint is the sandbox:
if "play.min.io" in endpoint:
client = Minio(
"play.min.io:9000",
access_key="Q3AM3UQ867SPQQA43P2F",
secret_key="zuf+tfteSlswRu7BJ86wekitnifILbZam1KYY3TG"
)
bucket = "testhtsget"
else:
response = requests.get(
AUTHZ['CANDIG_VAULT_URL'] + f"/v1/aws/{endpoint}/{bucket}",
headers={"Authorization": f"Bearer {VAULT_S3_TOKEN}"}
)
if response.status_code == 200:
client = Minio(
endpoint,
access_key=response.json()["data"]["access"],
secret_key=response.json()["data"]["secret"]
)
else:
return {"message": f"Vault error: {response.text}"}, response.status_code
try:
result = client.stat_object(bucket_name=bucket, object_name=object_name)
url = client.presigned_get_object(bucket_name=bucket, object_name=object_name)
except Exception as e:
return {"message": str(e)}, 500
return {"url": url}, 200
else:
return {"message": f"Malformed access_id {access_id}: should be in the form endpoint/bucket/item"}, 400


def post_object():
if not authz.is_site_admin(request):
return {"message": "User is not authorized to POST"}, 403
client, bucket = get_minio_client()
new_object = database.create_drs_object(connexion.request.json)
if "access_methods" in new_object:
for method in new_object['access_methods']:
if 'access_id' in method and method['access_id'] != "":
# check to see if it's already there; otherwise, upload it
(url_obj, status_code) = get_access_url(new_object['id'], method['access_id'])
if status_code != 200:
try:
#create the minio bucket/object/etc
if 'NoSuchBucket' in url_obj['message']:
if 'region' in method:
client.make_bucket(bucket, location=method['region'])
else:
client.make_bucket(bucket)
file = Path(LOCAL_FILE_PATH).joinpath(new_object['id'])
with Path.open(file, "rb") as fp:
result = client.put_object(bucket, new_object['id'], fp, file.stat().st_size)
except Exception as e:
return {"message": str(e)}, 500
return new_object, 200


Expand Down
6 changes: 3 additions & 3 deletions htsget_server/operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ def _get_data(id_, reference_name=None, start=None, end=None, class_="body", for
file_name = f"{id_}.{format_}"

# get a file and index from drs, based on the id_
gen_obj = _get_genomic_obj(id_)
gen_obj = _get_genomic_obj(request, id_)
if gen_obj is not None:
file_in = gen_obj["file"]
ntf = tempfile.NamedTemporaryFile(prefix='htsget', suffix=format_,
Expand Down Expand Up @@ -226,7 +226,7 @@ def _get_urls(file_type, id, reference_name=None, start=None, end=None, _class=N
if file_type not in ["variant", "read"]:
raise ValueError("File type must be 'variant' or 'read'")

gen_obj = _get_genomic_obj(id)
gen_obj = _get_genomic_obj(request, id)
if gen_obj is not None:
if _class == "header":
urls = [{"url": f"{request.url_root}/htsget/v1/{file_type}s/data/{id}?class=header",
Expand Down Expand Up @@ -281,7 +281,7 @@ def _get_index(position, file_in):
# We need to query DRS to get the bundling object, which should contain links to
# two contents objects. We can instantiate them into temp files and pass those
# file handles back.
def _get_genomic_obj(object_id):
def _get_genomic_obj(request, object_id):
index_file = None
variant_file = None
read_file = None
Expand Down
4 changes: 4 additions & 0 deletions renew_token.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
curl -X "POST" "http://docker.localhost:8200/v1/auth/token/renew-self" \
-H 'X-Vault-Token: <VAULT_S3_TOKEN>' \
-H 'Content-Type: application/json; charset=utf-8' \
-d $'{}'
50 changes: 35 additions & 15 deletions tests/test_htsget_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import pytest
import requests
from pysam import AlignmentFile, VariantFile
from minio import Minio
from pathlib import Path

# assumes that we are running pytest from the repo directory
sys.path.insert(0,os.path.abspath("htsget_server"))
Expand All @@ -23,8 +25,30 @@ def test_post_objects(drs_objects):
url = f"{HOST}/ga4gh/drs/v1/objects/{obj['id']}"
response = requests.request("GET", url, headers={"Test_Key": TEST_KEY})
if response.status_code == 200:
response = requests.request("DELETE", url, headers={"Test_Key": TEST_KEY})
assert response.status_code == 200
response = requests.request("DELETE", url, headers={"Test_Key": TEST_KEY})
assert response.status_code == 200
if "access_methods" in obj and obj["access_methods"][0]["type"] == "s3":
method = obj["access_methods"][0]
client = Minio(
"play.min.io:9000",
access_key="Q3AM3UQ867SPQQA43P2F",
secret_key="zuf+tfteSlswRu7BJ86wekitnifILbZam1KYY3TG"
)
bucket = "testhtsget"
try:
#create the minio bucket/object/etc
if not client.bucket_exists(bucket):
if 'region' in method:
client.make_bucket(bucket, location=method['region'])
else:
client.make_bucket(bucket)
file = Path(LOCAL_FILE_PATH).joinpath(obj['id'])
with Path.open(file, "rb") as fp:
result = client.put_object(bucket, obj['id'], fp, file.stat().st_size)
except Exception as e:
print(str(e))
assert False
return {"message": str(e)}, 500
url = f"{HOST}/ga4gh/drs/v1/objects"
response = requests.request("POST", url, json=obj, headers={"Test_Key": TEST_KEY})
print(f"POST {obj['name']}: {response.json()}")
Expand Down Expand Up @@ -156,11 +180,9 @@ def drs_objects():
{
"access_methods": [
{
"access_url": {
"headers": [],
"url": f"file://{CWD}/data/files/NA18537.vcf.gz.tbi"
},
"type": "file"
"access_id": "play.min.io:9000/testhtsget/NA18537.vcf.gz.tbi",
"type": "s3",
"region": "us-east-1"
}
],
"aliases": [],
Expand All @@ -178,11 +200,9 @@ def drs_objects():
{
"access_methods": [
{
"access_url": {
"headers": [],
"url": f"file://{CWD}/data/files/NA18537.vcf.gz"
},
"type": "file"
"access_id": "play.min.io:9000/testhtsget/NA18537.vcf.gz",
"type": "s3",
"region": "us-east-1"
}
],
"aliases": [],
Expand Down Expand Up @@ -229,7 +249,7 @@ def drs_objects():
{
"access_methods": [
{
"access_id": "NA20787.vcf.gz.tbi",
"access_id": "play.min.io:9000/testhtsget/NA20787.vcf.gz.tbi",
"type": "s3"
}
],
Expand All @@ -248,7 +268,7 @@ def drs_objects():
{
"access_methods": [
{
"access_id": "NA20787.vcf.gz",
"access_id": "play.min.io:9000/testhtsget/NA20787.vcf.gz",
"type": "s3"
}
],
Expand Down Expand Up @@ -296,7 +316,7 @@ def drs_objects():
{
"access_methods": [
{
"access_id": "NA02102.bam.bai",
"access_id": "play.min.io:9000/testhtsget/NA02102.bam.bai",
"type": "s3"
}
],
Expand Down

0 comments on commit f289068

Please sign in to comment.