Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use gz #32

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
203 changes: 126 additions & 77 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,46 +15,69 @@

UPLOAD_FOLDER = './uploads'
# The type of file that needs to be uploaded to the server by user.
UPLOAD_FILES = ['in_fasta', 'in_gff']
UPLOAD_FILES = {'in_fasta': None, 'in_gff': None}

# Route for submitting data on the production site.
@app.route('/', methods=['GET', 'POST'])
def submit():
form = SubmitJob(request.form)
# Validate user input according to the validation rules defined in forms.py.
if request.method == 'POST' and form.validate():
if (request.files['downstream_fasta'].filename or request.files['upstream_fasta'].filename) and request.form[
'position']:
flash("Error: You must provide either the position, or the upstream and downstream sequences.", 'error')
# Get list of file names, and filter out empty files
downstream_fasta_files = [file for file in request.files.getlist('downstream_fasta') if file.filename]
upstream_fasta_files = [file for file in request.files.getlist('upstream_fasta') if file.filename]
in_fasta_files = [file for file in request.files.getlist('in_fasta') if file.filename]
in_gff_files = [file for file in request.files.getlist('in_gff') if file.filename]
if (downstream_fasta_files or upstream_fasta_files) and request.form['position']:
flash(f"Error: You must provide either the position, or the upstream and downstream sequences. Not all.", 'error')
return redirect(url_for('submit'))
if (request.files['downstream_fasta'].filename or request.files['upstream_fasta'].filename):
if not (request.files['downstream_fasta'].filename and request.files['upstream_fasta'].filename):
if (downstream_fasta_files or upstream_fasta_files):
if not (downstream_fasta_files and upstream_fasta_files):
flash("Error: Must enter both upstream and downstream", 'error')
return redirect(url_for('submit'))
# Verify position is a number
positions = None
try:
if request.form['position']:
int(request.form['position'])
except ValueError:
flash("Error: Position must be an integer, like -1, 0, 1.", 'error')
return redirect(url_for('submit'))
# Verify that the name of the uploaded file is different
if request.form['position'] and (request.files['in_fasta'].filename == request.files['in_gff'].filename):
flash("Error: ref_fasta and ref_gff must be different files", 'error')
position_str = request.form['position']
if position_str:
positions = process_position(position_str)
# print("Positions:", positions)
except ValueError as e:
flash(str(e), 'error')
return redirect(url_for('submit'))
elif (request.files['downstream_fasta'].filename and request.files['upstream_fasta'].filename):
filenames = [request.files['in_fasta'].filename,
request.files['in_gff'].filename,
request.files['downstream_fasta'].filename,
request.files['upstream_fasta'].filename]
if len(filenames) != len(set(filenames)):
flash("Error: ref_fasta, ref_gff, downstream_fasta, upstream_fasta must be different files", 'error')
# Verify that the name of the uploaded file is different by set()
# User upload in_fasta and in_gff
if in_fasta_files and in_gff_files:
all_in_filenames = []
in_fasta_filenames = [file.filename for file in in_fasta_files]
in_gff_filenames = [file.filename for file in in_gff_files]
all_in_filenames.extend(in_fasta_filenames)
all_in_filenames.extend(in_gff_filenames)
# When position provide
if request.form['position'] and (len(all_in_filenames) != len(set(all_in_filenames))):
flash("Error: in_fasta and in_gff must be different files", 'error')
return redirect(url_for('submit'))

if not (request.files['downstream_fasta'].filename or request.files['upstream_fasta'].filename) and not \
request.form['position']:
# When position not provide, use upstream_fasta and downstream_fasta
elif downstream_fasta_files and upstream_fasta_files:
all_stream_fasta_filenames = []
downstream_fasta_filenames = [file.filename for file in downstream_fasta_files]
upstream_fasta_filenames = [file.filename for file in upstream_fasta_files]
all_stream_fasta_filenames.extend(downstream_fasta_filenames)
all_stream_fasta_filenames.extend(upstream_fasta_filenames)
if len(all_stream_fasta_filenames) != len(set(all_stream_fasta_filenames)):
flash("Error: downstream_fasta, upstream_fasta must be different files", 'error')
return redirect(url_for('submit'))
# Check all uniqueness for all files
all_filenames = []
all_filenames.extend(all_in_filenames)
all_filenames.extend(all_stream_fasta_filenames)
if len(all_filenames) != len(set(all_filenames)):
flash("Error: in_fasta, in_gff, downstream_fasta, upstream_fasta must be different files", 'error')
return redirect(url_for('submit'))
if not (downstream_fasta_files or upstream_fasta_files) and not request.form['position']:
flash("Error: You must provide either the position, or the upstream and downstream sequences.", 'error')
return redirect(url_for('submit'))

else:
# User Submits Job #
# (1) Create unique ID for each submission
Expand All @@ -68,20 +91,23 @@ def submit():

# (3) Upload files from user device to server
# Verify all files are present before uploading
for files in UPLOAD_FILES:
for files in UPLOAD_FILES.keys():
verified = verify_uploads(files)
if not verified:
return redirect(url_for('submit'))

# Upload Files to UPLOAD_DIR/timestamp/
# Upload Files to UPLOAD_DIR/timestamp/, and convert format to./uploads/$timestamp/item
if verified:
for files in UPLOAD_FILES:
upload(target_dir, files)
for key in UPLOAD_FILES.keys():
UPLOAD_FILES[key] = format_paths(upload(target_dir, key), target_dir)
# Set defualt None to up/down stream fasta
for file_key in ['upstream_fasta', 'downstream_fasta']:
UPLOAD_FILES[file_key] = None
UPLOAD_FILES[file_key] = None

if not request.form['position'] and request.files['upstream_fasta'].filename and request.files[
'downstream_fasta'].filename:
upload(target_dir, 'upstream_fasta')
upload(target_dir, 'downstream_fasta')
if not request.form['position']:
UPLOAD_FILES['upstream_fasta'] = format_paths(upload(target_dir, 'upstream_fasta'), target_dir)
UPLOAD_FILES['downstream_fasta'] = format_paths(upload(target_dir, 'downstream_fasta'), target_dir)

# (4) Send the job to the backend
# Connect to the Redis server and intial a queue
Expand All @@ -93,13 +119,13 @@ def submit():
timestamp,
request.form['email'],
request.form['chrom'],
request.files['upstream_fasta'].filename,
request.files['downstream_fasta'].filename,
request.form['position'],
UPLOAD_FILES['upstream_fasta'],
UPLOAD_FILES['downstream_fasta'], #request.files['upstream_fasta'].filename,
positions, #request.form['position'],
request.form['ref_fasta'],
request.form['ref_gff'],
request.files['in_fasta'].filename,
request.files['in_gff'].filename),
UPLOAD_FILES['in_fasta'],
UPLOAD_FILES['in_gff']),
result_ttl=-1,
job_timeout=3000
)
Expand All @@ -125,34 +151,59 @@ def submit_test():
# Validate user input based on test site rule
form = Testjob(request.form)
if request.method == 'POST' and form.validate():
if (request.files['downstream_fasta'].filename or request.files['upstream_fasta'].filename) and request.form[
'position']:
downstream_fasta_files = [file for file in request.files.getlist('downstream_fasta') if file.filename]
upstream_fasta_files = [file for file in request.files.getlist('upstream_fasta') if file.filename]
in_fasta_files = [file for file in request.files.getlist('in_fasta') if file.filename]
in_gff_files = [file for file in request.files.getlist('in_gff') if file.filename]
if (downstream_fasta_files or upstream_fasta_files) and request.form['position']:
flash("Error: You must provide either the position, or the upstream and downstream sequences.", 'error')
return redirect(url_for('submit_test'))
if (request.files['downstream_fasta'].filename or request.files['upstream_fasta'].filename):
if not (request.files['downstream_fasta'].filename and request.files['upstream_fasta'].filename):
return redirect(url_for('submit'))
if (downstream_fasta_files or upstream_fasta_files):
if not (downstream_fasta_files and upstream_fasta_files):
flash("Error: Must enter both upstream and downstream", 'error')
return redirect(url_for('submit_test'))
return redirect(url_for('submit'))
# Verify position is a number
positions = None
try:
if request.form['position']:
int(request.form['position'])
except ValueError:
flash("Error: Position must be an integer, like -1, 0, 1.", 'error')
return redirect(url_for('submit_test'))
# Verify that the name of the uploaded file is not empty and different
if request.form['position'] and (request.files['in_fasta'].filename and request.files['in_gff'].filename) \
and (request.files['in_fasta'].filename == request.files['in_gff'].filename):
flash("Error: ref_fasta and ref_gff must be different files", 'error')
return redirect(url_for('submit_test'))
elif (request.files['downstream_fasta'].filename and request.files['upstream_fasta'].filename and request.files['in_fasta'].filename and request.files['in_gff'].filename):
filenames_test = [request.files['in_fasta'].filename,
request.files['in_gff'].filename,
request.files['downstream_fasta'].filename,
request.files['upstream_fasta'].filename]
if len(filenames_test) != len(set(filenames_test)):
flash("Error: ref_fasta, ref_gff, downstream_fasta, upstream_fasta must be different files", 'error')
return redirect(url_for('submit_test'))
position_str = request.form['position']
if position_str:
positions = process_position(position_str)
# print("Positions:", positions)
except ValueError as e:
flash(str(e), 'error')
return redirect(url_for('submit'))
# Verify that the name of the uploaded file is different when user upload (filename is not empty )
# Verify that the name of the uploaded file is different by set()
if in_fasta_files and in_gff_files:
all_in_filenames = []
in_fasta_filenames = [file.filename for file in in_fasta_files]
in_gff_filenames = [file.filename for file in in_gff_files]
all_in_filenames.extend(in_fasta_filenames)
all_in_filenames.extend(in_gff_filenames)
# When position provide
if request.form['position'] and (len(all_in_filenames) != len(set(all_in_filenames))):
flash("Error: in_fasta and in_gff must be different files", 'error')
return redirect(url_for('submit'))
# When position not provide, use upstream_fasta and downstream_fasta
elif downstream_fasta_files and upstream_fasta_files:
all_stream_fasta_filenames = []
downstream_fasta_filenames = [file.filename for file in downstream_fasta_files]
upstream_fasta_filenames = [file.filename for file in upstream_fasta_files]
all_stream_fasta_filenames.extend(downstream_fasta_filenames)
all_stream_fasta_filenames.extend(upstream_fasta_filenames)
if len(all_stream_fasta_filenames) != len(set(all_stream_fasta_filenames)):
flash("Error: downstream_fasta, upstream_fasta must be different files", 'error')
return redirect(url_for('submit'))
# Check all uniqueness for all files
all_filenames = []
all_filenames.extend(all_in_filenames)
all_filenames.extend(all_stream_fasta_filenames)
if len(all_filenames) != len(set(all_filenames)):
flash("Error: in_fasta, in_gff, downstream_fasta, upstream_fasta must be different files", 'error')
return redirect(url_for('submit'))
if not (downstream_fasta_files or upstream_fasta_files) and not request.form['position']:
flash("Error: You must provide either the position, or the upstream and downstream sequences.", 'error')
return redirect(url_for('submit'))
else:
# User Submits Job #
# (1) Create unique ID for each submission
Expand All @@ -162,7 +213,6 @@ def submit_test():
if not os.path.isfile('database.db'):
db_create()


# (3) Upload files from user device to server
# Verify all files are present before uploading
for files in UPLOAD_FILES:
Expand All @@ -172,18 +222,17 @@ def submit_test():

# Choose to upload new files or use local files
if verified:
uploaded_files = {}
for file_key in UPLOAD_FILES:
uploaded_files[file_key] = upload_test(target_dir, file_key, DEFAULT_FILES)
for file_key in UPLOAD_FILES.keys():
UPLOAD_FILES[file_key] = format_paths(upload_test(target_dir, file_key, DEFAULT_FILES), target_dir)
# Set defualt None to up/down stream fasta
for file_key in ['upstream_fasta', 'downstream_fasta']:
uploaded_files['upstream_fasta'] = None
uploaded_files['downstream_fasta'] = None
UPLOAD_FILES['upstream_fasta'] = None
UPLOAD_FILES['downstream_fasta'] = None

# Uploaded upstream/downstream files when position is not provided
if not request.form['position']:
for file_key in ['upstream_fasta', 'downstream_fasta']:
uploaded_files[file_key] = upload_test(target_dir, file_key, DEFAULT_FILES)
UPLOAD_FILES[file_key] = format_paths(upload_test(target_dir, file_key, DEFAULT_FILES), target_dir)

# Replace Ref Sequence with local path if test files detected
if request.form['ref_fasta'] == 'test-ref.fa':
Expand All @@ -193,9 +242,9 @@ def submit_test():
if request.form['ref_gff'] == 'test-ref.gtf':
uploaded_files['ref_gff'] = DEFAULT_FILES['ref_gff']
else:
uploaded_files['ref_gff'] = request.form['ref_gff']
UPLOAD_FILES['ref_gff'] = request.form['ref_gff']

db_test_submit(request, uploaded_files, timestamp)
db_test_submit(request, UPLOAD_FILES, timestamp)

# (4) Send job to the backend
# Use the redis queue as same as production site
Expand All @@ -207,13 +256,13 @@ def submit_test():
timestamp,
request.form['email'], # ys4680@nyu.edu
request.form['chrom'], # 1
uploaded_files['upstream_fasta'], # by default
uploaded_files['downstream_fasta'],
request.form['position'],
uploaded_files['ref_fasta'], # by default
uploaded_files['ref_gff'], # by default
uploaded_files['in_fasta'], # by default
uploaded_files['in_gff'] # by default
UPLOAD_FILES['upstream_fasta'], # by default
UPLOAD_FILES['downstream_fasta'],
positions,
UPLOAD_FILES['ref_fasta'], # by default
UPLOAD_FILES['ref_gff'], # by default
UPLOAD_FILES['in_fasta'], # by default
UPLOAD_FILES['in_gff'] # by default
),
result_ttl=-1,
job_timeout=3000
Expand Down
Loading