From 9aaf3cb1ce751380ff45b896b20a7d0733a6f06e Mon Sep 17 00:00:00 2001
From: Yuwei Sun <ys4680@nyu.edu>
Date: Thu, 8 Aug 2024 16:46:59 -0400
Subject: [PATCH 1/2] update with sequential processing

---
 app.py    | 217 +++++++-----
 forms.py  |  30 +-
 job.py    | 122 ++++---
 reform.py | 959 ++++++++++++++++++++++++++++++------------------------
 run.sh    |  21 +-
 5 files changed, 774 insertions(+), 575 deletions(-)

diff --git a/app.py b/app.py
index 8730353..7a6494d 100644
--- a/app.py
+++ b/app.py
@@ -15,7 +15,7 @@
 
 UPLOAD_FOLDER = './uploads'
 # The type of file that needs to be uploaded to the server by user.
-UPLOAD_FILES = ['in_fasta', 'in_gff']
+UPLOAD_FILES = {'in_fasta': None, 'in_gff': None}
 
 # Route for submitting data on the production site.
 @app.route('/', methods=['GET', 'POST'])
@@ -23,38 +23,61 @@ def submit():
     form = SubmitJob(request.form)
     # Validate user input according to the validation rules defined in forms.py.
     if request.method == 'POST' and form.validate():
-        if (request.files['downstream_fasta'].filename or request.files['upstream_fasta'].filename) and request.form[
-            'position']:
-            flash("Error: You must provide either the position, or the upstream and downstream sequences.", 'error')
+        # Get list of file names, and filter out empty files
+        downstream_fasta_files = [file for file in request.files.getlist('downstream_fasta') if file.filename]
+        upstream_fasta_files = [file for file in request.files.getlist('upstream_fasta') if file.filename]
+        in_fasta_files = [file for file in request.files.getlist('in_fasta') if file.filename]
+        in_gff_files = [file for file in request.files.getlist('in_gff') if file.filename]
+        if (downstream_fasta_files or upstream_fasta_files) and request.form['position']:
+            flash(f"Error: You must provide either the position, or the upstream and downstream sequences. Not all.", 'error')
             return redirect(url_for('submit'))
-        if (request.files['downstream_fasta'].filename or request.files['upstream_fasta'].filename):
-            if not (request.files['downstream_fasta'].filename and request.files['upstream_fasta'].filename):
+        if (downstream_fasta_files or upstream_fasta_files):
+            if not (downstream_fasta_files and upstream_fasta_files):
                 flash("Error: Must enter both upstream and downstream", 'error')
                 return redirect(url_for('submit'))
         # Verify position is a number
+        positions = None
         try:
-            if request.form['position']:
-                int(request.form['position'])
-        except ValueError:
-            flash("Error: Position must be an integer, like -1, 0, 1.", 'error')
-            return redirect(url_for('submit'))
-        # Verify that the name of the uploaded file is different
-        if request.form['position'] and (request.files['in_fasta'].filename == request.files['in_gff'].filename):
-            flash("Error: ref_fasta and ref_gff must be different files", 'error')
+            position_str = request.form['position']
+            if position_str:
+                positions = process_position(position_str)
+                # print("Positions:", positions)
+        except ValueError as e:
+            flash(str(e), 'error')
             return redirect(url_for('submit'))
-        elif (request.files['downstream_fasta'].filename and request.files['upstream_fasta'].filename):
-            filenames = [request.files['in_fasta'].filename, 
-                request.files['in_gff'].filename, 
-                request.files['downstream_fasta'].filename,
-                request.files['upstream_fasta'].filename]
-            if len(filenames) != len(set(filenames)):
-                flash("Error: ref_fasta, ref_gff, downstream_fasta, upstream_fasta must be different files", 'error')
+        # Verify that the name of the uploaded file is different by set()
+        # User upload in_fasta and in_gff
+        if in_fasta_files and in_gff_files:
+            all_in_filenames = []
+            in_fasta_filenames = [file.filename for file in in_fasta_files]
+            in_gff_filenames = [file.filename for file in in_gff_files]
+            all_in_filenames.extend(in_fasta_filenames)
+            all_in_filenames.extend(in_gff_filenames)
+            # When position provide
+            if request.form['position'] and (len(all_in_filenames) != len(set(all_in_filenames))):
+                flash("Error: in_fasta and in_gff must be different files", 'error')
                 return redirect(url_for('submit'))
-        
-        if not (request.files['downstream_fasta'].filename or request.files['upstream_fasta'].filename) and not \
-                request.form['position']:
+            # When position not provide, use upstream_fasta and downstream_fasta
+            elif downstream_fasta_files and upstream_fasta_files:
+                all_stream_fasta_filenames = []
+                downstream_fasta_filenames = [file.filename for file in downstream_fasta_files]
+                upstream_fasta_filenames = [file.filename for file in upstream_fasta_files]
+                all_stream_fasta_filenames.extend(downstream_fasta_filenames)
+                all_stream_fasta_filenames.extend(upstream_fasta_filenames)
+                if len(all_stream_fasta_filenames) != len(set(all_stream_fasta_filenames)):
+                    flash("Error: downstream_fasta, upstream_fasta must be different files", 'error')
+                    return redirect(url_for('submit'))
+                # Check all uniqueness for all files
+                all_filenames = []
+                all_filenames.extend(all_in_filenames)
+                all_filenames.extend(all_stream_fasta_filenames)
+                if len(all_filenames) != len(set(all_filenames)):
+                    flash("Error: in_fasta, in_gff, downstream_fasta, upstream_fasta must be different files", 'error')
+                    return redirect(url_for('submit'))
+        if not (downstream_fasta_files or upstream_fasta_files) and not request.form['position']:
             flash("Error: You must provide either the position, or the upstream and downstream sequences.", 'error')
             return redirect(url_for('submit'))
+        
         else:
             # User Submits Job #
             # (1) Create unique ID for each submission
@@ -68,20 +91,23 @@ def submit():
 
             # (3) Upload files from user device to server
             # Verify all files are present before uploading
-            for files in UPLOAD_FILES:
+            for files in UPLOAD_FILES.keys():
                 verified = verify_uploads(files)
                 if not verified:
                     return redirect(url_for('submit'))
 
-            # Upload Files to UPLOAD_DIR/timestamp/
+            # Upload Files to UPLOAD_DIR/timestamp/, and convert format to./uploads/$timestamp/item
             if verified:
-                for files in UPLOAD_FILES:
-                    upload(target_dir, files)
+                for key in UPLOAD_FILES.keys():
+                    UPLOAD_FILES[key] = format_paths(upload(target_dir, key), target_dir)
+                # Set defualt None to up/down stream fasta
+                for file_key in ['upstream_fasta', 'downstream_fasta']:
+                    UPLOAD_FILES[file_key] = None
+                    UPLOAD_FILES[file_key] = None
 
-            if not request.form['position'] and request.files['upstream_fasta'].filename and request.files[
-                'downstream_fasta'].filename:
-                upload(target_dir, 'upstream_fasta')
-                upload(target_dir, 'downstream_fasta')
+            if not request.form['position']:
+                UPLOAD_FILES['upstream_fasta'] = format_paths(upload(target_dir, 'upstream_fasta'), target_dir)
+                UPLOAD_FILES['downstream_fasta'] = format_paths(upload(target_dir, 'downstream_fasta'), target_dir)
 
             # (4) Send the job to the backend
             # Connect to the Redis server and intial a queue
@@ -93,13 +119,13 @@ def submit():
                                             timestamp,
                                             request.form['email'],
                                             request.form['chrom'],
-                                            request.files['upstream_fasta'].filename,
-                                            request.files['downstream_fasta'].filename,
-                                            request.form['position'],
+                                            UPLOAD_FILES['upstream_fasta'],
+                                            UPLOAD_FILES['downstream_fasta'], #request.files['upstream_fasta'].filename,
+                                            positions, #request.form['position'],
                                             request.form['ref_fasta'],
                                             request.form['ref_gff'],
-                                            request.files['in_fasta'].filename,
-                                            request.files['in_gff'].filename),
+                                            UPLOAD_FILES['in_fasta'],
+                                            UPLOAD_FILES['in_gff']),
                             result_ttl=-1,
                             job_timeout=3000
                             )
@@ -117,42 +143,67 @@ def submit_test():
     DEFAULT_FILES = {
         'ref_fasta': './staticData/ref/Mus_musculus.GRCm38.dna.toplevel.fa',
         'ref_gff': './staticData/ref/Mus_musculus.GRCm38.88.gff3',
-        'in_fasta': './staticData/inserted/test-in.fa',
-        'in_gff': './staticData/inserted/test-in.gtf',
-        'upstream_fasta': './staticData/up-down-seq/test-up.fa',
-        'downstream_fasta': './staticData/up-down-seq/test-down.fa'
+        'in_fasta': ['./staticData/inserted/test-in.fa'],
+        'in_gff': ['./staticData/inserted/test-in.gtf'],
+        'upstream_fasta': ['./staticData/up-down-seq/test-up.fa'],
+        'downstream_fasta': ['./staticData/up-down-seq/test-down.fa']
     }
     # Validate user input based on test site rule
     form = Testjob(request.form)
     if request.method == 'POST' and form.validate():
-        if (request.files['downstream_fasta'].filename or request.files['upstream_fasta'].filename) and request.form[
-            'position']:
+        downstream_fasta_files = [file for file in request.files.getlist('downstream_fasta') if file.filename]
+        upstream_fasta_files = [file for file in request.files.getlist('upstream_fasta') if file.filename]
+        in_fasta_files = [file for file in request.files.getlist('in_fasta') if file.filename]
+        in_gff_files = [file for file in request.files.getlist('in_gff') if file.filename]
+        if (downstream_fasta_files or upstream_fasta_files) and request.form['position']:
             flash("Error: You must provide either the position, or the upstream and downstream sequences.", 'error')
-            return redirect(url_for('submit_test'))
-        if (request.files['downstream_fasta'].filename or request.files['upstream_fasta'].filename):
-            if not (request.files['downstream_fasta'].filename and request.files['upstream_fasta'].filename):
+            return redirect(url_for('submit'))
+        if (downstream_fasta_files or upstream_fasta_files):
+            if not (downstream_fasta_files and upstream_fasta_files):
                 flash("Error: Must enter both upstream and downstream", 'error')
-                return redirect(url_for('submit_test'))
+                return redirect(url_for('submit'))
         # Verify position is a number
+        positions = None
         try:
-            if request.form['position']:
-                int(request.form['position'])
-        except ValueError:
-            flash("Error: Position must be an integer, like -1, 0, 1.", 'error')
-            return redirect(url_for('submit_test'))
-        # Verify that the name of the uploaded file is not empty and different
-        if request.form['position'] and (request.files['in_fasta'].filename and request.files['in_gff'].filename) \
-            and (request.files['in_fasta'].filename == request.files['in_gff'].filename):
-            flash("Error: ref_fasta and ref_gff must be different files", 'error')
-            return redirect(url_for('submit_test'))
-        elif (request.files['downstream_fasta'].filename and request.files['upstream_fasta'].filename and request.files['in_fasta'].filename and request.files['in_gff'].filename):
-            filenames_test = [request.files['in_fasta'].filename,
-                request.files['in_gff'].filename,
-                request.files['downstream_fasta'].filename,
-                request.files['upstream_fasta'].filename]
-            if len(filenames_test) != len(set(filenames_test)):
-                flash("Error: ref_fasta, ref_gff, downstream_fasta, upstream_fasta must be different files", 'error')
-                return redirect(url_for('submit_test'))
+            position_str = request.form['position']
+            if position_str:
+                positions = process_position(position_str)
+                # print("Positions:", positions)
+        except ValueError as e:
+            flash(str(e), 'error')
+            return redirect(url_for('submit'))
+        # Verify that the name of the uploaded file is different when user upload (filename is not empty )
+        # Verify that the name of the uploaded file is different by set()
+        if in_fasta_files and in_gff_files:
+            all_in_filenames = []
+            in_fasta_filenames = [file.filename for file in in_fasta_files]
+            in_gff_filenames = [file.filename for file in in_gff_files]
+            all_in_filenames.extend(in_fasta_filenames)
+            all_in_filenames.extend(in_gff_filenames)
+            # When position provide
+            if request.form['position'] and (len(all_in_filenames) != len(set(all_in_filenames))):
+                flash("Error: in_fasta and in_gff must be different files", 'error')
+                return redirect(url_for('submit'))
+            # When position not provide, use upstream_fasta and downstream_fasta
+            elif downstream_fasta_files and upstream_fasta_files:
+                all_stream_fasta_filenames = []
+                downstream_fasta_filenames = [file.filename for file in downstream_fasta_files]
+                upstream_fasta_filenames = [file.filename for file in upstream_fasta_files]
+                all_stream_fasta_filenames.extend(downstream_fasta_filenames)
+                all_stream_fasta_filenames.extend(upstream_fasta_filenames)
+                if len(all_stream_fasta_filenames) != len(set(all_stream_fasta_filenames)):
+                    flash("Error: downstream_fasta, upstream_fasta must be different files", 'error')
+                    return redirect(url_for('submit'))
+                # Check all uniqueness for all files
+                all_filenames = []
+                all_filenames.extend(all_in_filenames)
+                all_filenames.extend(all_stream_fasta_filenames)
+                if len(all_filenames) != len(set(all_filenames)):
+                    flash("Error: in_fasta, in_gff, downstream_fasta, upstream_fasta must be different files", 'error')
+                    return redirect(url_for('submit'))
+        if not (downstream_fasta_files or upstream_fasta_files) and not request.form['position']:
+            flash("Error: You must provide either the position, or the upstream and downstream sequences.", 'error')
+            return redirect(url_for('submit'))
         else:
             # User Submits Job #
             # (1) Create unique ID for each submission
@@ -162,7 +213,6 @@ def submit_test():
             if not os.path.isfile('database.db'):
                 db_create()
 
-
             # (3) Upload files from user device to server
             # Verify all files are present before uploading
             for files in UPLOAD_FILES:
@@ -172,30 +222,29 @@ def submit_test():
 
             # Choose to upload new files or use local files
             if verified:
-                uploaded_files = {}
-                for file_key in UPLOAD_FILES:
-                    uploaded_files[file_key] = upload_test(target_dir, file_key, DEFAULT_FILES)
+                for file_key in UPLOAD_FILES.keys():
+                    UPLOAD_FILES[file_key] = format_paths(upload_test(target_dir, file_key, DEFAULT_FILES), target_dir)
                 # Set defualt None to up/down stream fasta
                 for file_key in ['upstream_fasta', 'downstream_fasta']:
-                    uploaded_files['upstream_fasta'] = None
-                    uploaded_files['downstream_fasta'] = None
+                    UPLOAD_FILES['upstream_fasta'] = None
+                    UPLOAD_FILES['downstream_fasta'] = None
             
             # Uploaded upstream/downstream files when position is not provided
             if not request.form['position']:
                 for file_key in ['upstream_fasta', 'downstream_fasta']:
-                    uploaded_files[file_key] = upload_test(target_dir, file_key, DEFAULT_FILES)
+                    UPLOAD_FILES[file_key] = format_paths(upload_test(target_dir, file_key, DEFAULT_FILES), target_dir)
 
             # Replace Ref Sequence with local path if example ftp detected
             if request.form['ref_fasta'] ==  'ftp://ftp.ensembl.org/pub/release-88/fasta/mus_musculus/dna/Mus_musculus.GRCm38.dna.toplevel.fa.gz':
-                uploaded_files['ref_fasta'] = DEFAULT_FILES['ref_fasta']
+                UPLOAD_FILES['ref_fasta'] = DEFAULT_FILES['ref_fasta']
             else:
-                uploaded_files['ref_fasta'] = request.form['ref_fasta']
+                UPLOAD_FILES['ref_fasta'] = request.form['ref_fasta']
             if request.form['ref_gff'] ==  'ftp://ftp.ensembl.org/pub/release-88/gff3/mus_musculus/Mus_musculus.GRCm38.88.gff3.gz':
-                uploaded_files['ref_gff'] = DEFAULT_FILES['ref_gff']
+                UPLOAD_FILES['ref_gff'] = DEFAULT_FILES['ref_gff']
             else:
-                uploaded_files['ref_gff'] = request.form['ref_gff']
+                UPLOAD_FILES['ref_gff'] = request.form['ref_gff']
             
-            db_test_submit(request, uploaded_files, timestamp)
+            db_test_submit(request, UPLOAD_FILES, timestamp)
             
             # (4) Send job to the backend
             # Use the redis queue as same as production site
@@ -207,13 +256,13 @@ def submit_test():
                                             timestamp,
                                             request.form['email'], # ys4680@nyu.edu
                                             request.form['chrom'], # 1
-                                            uploaded_files['upstream_fasta'], # by default
-                                            uploaded_files['downstream_fasta'],
-                                            request.form['position'],
-                                            uploaded_files['ref_fasta'], # by default
-                                            uploaded_files['ref_gff'], # by default
-                                            uploaded_files['in_fasta'], # by default
-                                            uploaded_files['in_gff'] # by default
+                                            UPLOAD_FILES['upstream_fasta'], # by default
+                                            UPLOAD_FILES['downstream_fasta'],
+                                            positions,
+                                            UPLOAD_FILES['ref_fasta'], # by default
+                                            UPLOAD_FILES['ref_gff'], # by default
+                                            UPLOAD_FILES['in_fasta'], # by default
+                                            UPLOAD_FILES['in_gff'] # by default
                                             ), 
                             result_ttl=-1,
                             job_timeout=3000
diff --git a/forms.py b/forms.py
index be4783c..07fbbcc 100644
--- a/forms.py
+++ b/forms.py
@@ -1,6 +1,6 @@
 from wtforms import Form, StringField, FileField
 from wtforms.validators import URL, Optional, InputRequired, Email
-from flask_wtf.file import FileRequired, FileAllowed
+from flask_wtf.file import FileRequired, FileAllowed, MultipleFileField
 
 ALLOWED_EXTENSIONS = {'fa', 'gff', 'gff3', 'gtf', 'fasta', 'fna', 'tar', 'gz'}
 
@@ -28,31 +28,31 @@ class SubmitJob(Form):
                                        "of chromosome. Note: Position is 0-based",
                            validators=[Optional()])
     # OR
-    upstream_fasta = FileField('Upstream Sequence',
-                               description="FASTA file with upstream sequence.",
+    upstream_fasta = MultipleFileField('Upstream Sequence',
+                               description="FASTA files with upstream sequence.",
                                validators=[
                                    Optional()
                                ])
-    downstream_fasta = FileField('Downstream Sequence',
-                                 description="FASTA file with downstream sequence.",
+    downstream_fasta = MultipleFileField('Downstream Sequence',
+                                 description="FASTA files with downstream sequence.",
                                  validators=[
                                      Optional()
                                  ])
 
     # Uploads
-    in_fasta = FileField('Inserted Sequence (FASTA)',
-                         description="New sequence to be inserted into reference genome.",
+    in_fasta = MultipleFileField('Inserted Sequence (FASTA)',
+                         description="New sequences to be inserted into reference genome.",
                          validators=[
                              Optional(),
                              FileAllowed([ALLOWED_EXTENSIONS], 'Invalid File Type'),
                              FileRequired()
                          ])
-    in_gff = FileField('Inserted Reference (gff3 or gtf)',
-                       description="GFF file describing new FASTA sequence to be inserted.",
+    in_gff = MultipleFileField('Inserted Reference (gff3 or gtf)',
+                       description="GFF files describing new FASTA sequence to be inserted.",
                        validators=[
                            Optional(),
                            FileAllowed([ALLOWED_EXTENSIONS], 'Invalid File Type'),
-                           InputRequired()
+                           FileRequired()
                        ])
     # Downloads
     ref_fasta = StringField('Reference Sequence (FASTA)',
@@ -100,31 +100,31 @@ class Testjob(Form):
                                        "of chromosome. Note: Position is 0-based",
                            validators=[Optional()])
     # OR
-    upstream_fasta = FileField('Upstream Sequence',
+    upstream_fasta = MultipleFileField('Upstream Sequence',
                                description="FASTA file with upstream sequence. If no file is selected, the system will use 'test-up.fa' as a default.",
                                validators=[
                                    Optional()
                                ])
-    downstream_fasta = FileField('Downstream Sequence',
+    downstream_fasta = MultipleFileField('Downstream Sequence',
                                  description="FASTA file with downstream sequence. If no file is selected, the system will use 'test-down.fa' as a default.",
                                  validators=[
                                      Optional()
                                  ])
 
     # Uploads
-    in_fasta = FileField('Inserted Sequence (FASTA)',
+    in_fasta = MultipleFileField('Inserted Sequence (FASTA)',
                          description="Please upload the new sequence to be inserted into the reference genome. If no file is selected, the system will use 'test-in.fa' as a default.",
                          validators=[
                              Optional(),
                              FileAllowed([ALLOWED_EXTENSIONS], 'Invalid File Type'),
                              # FileRequired()
                          ])
-    in_gff = FileField('Inserted Reference (gff3 or gtf)',
+    in_gff = MultipleFileField('Inserted Reference (gff3 or gtf)',
                        description="Please upload the GFF file describing the new FASTA sequence to be inserted. If no file is selected, the system will use 'test-in.gff' as a default.",
                        validators=[
                            Optional(),
                            FileAllowed([ALLOWED_EXTENSIONS], 'Invalid File Type'),
-                          # InputRequired()
+                          # FileRequired()
                        ])
     # Downloads
     ref_fasta = StringField('Reference Sequence (FASTA)',
diff --git a/job.py b/job.py
index af7899a..b1c6390 100644
--- a/job.py
+++ b/job.py
@@ -23,15 +23,21 @@
 
 def redisjob(target_dir, timestamp, email, chrom, upstream_fasta, downstream_fasta, position, ref_fastaURL, ref_gffURL,
              in_fasta, in_gff):
+    # Convert list of insert sequences to string
+    in_fasta_str  = ','.join(in_fasta)
+    in_gff_str = ','.join(in_gff)
     if position:
+
         command = "bash ./run.sh {} {} {} {} {} {} {} {} {}".format(target_dir, timestamp, email, chrom,
-                                                                    ref_fastaURL, ref_gffURL, in_fasta,
-                                                                    in_gff, position)
+                                                                    ref_fastaURL, ref_gffURL, in_fasta_str,
+                                                                    in_gff_str, position)
     else:
+        upstream_fasta_str  = ','.join(upstream_fasta)
+        downstream_fasta_str  = ','.join(downstream_fasta)
         command = "bash ./run.sh {} {} {} {} {} {} {} {} {} {}".format(target_dir, timestamp, email, chrom,
-                                                                       ref_fastaURL, ref_gffURL, in_fasta,
-                                                                       in_gff, upstream_fasta,
-                                                                       downstream_fasta)
+                                                                       ref_fastaURL, ref_gffURL, in_fasta_str,
+                                                                       in_gff_str, upstream_fasta_str,
+                                                                       downstream_fasta_str)
     try:
         #subprocess.run([command])
         os.system(command)
@@ -81,62 +87,75 @@ def allowed_file(filename):
 
 # Verify that the file is uploaded to the form and it is allowed
 def verify_uploads(file):
-    fileObj = request.files[file]
-
-    if fileObj.filename == '':
-        flash('No ' + file + ' file selected for uploading', 'error')
-        return False
-    if fileObj and allowed_file(fileObj.filename):
-        return True
-    else:
-        flash('Invalid File Type for ' + file, 'error')
-        return False
+    fileObj_lists = request.files.getlist(file)
+    for uploaded_file in fileObj_lists:
+        if uploaded_file.filename == '':
+            flash('No ' + file + ' file selected for uploading', 'error')
+            return False
+        if uploaded_file and allowed_file(uploaded_file.filename):
+            continue
+        else:
+            flash('Invalid File Type for ' + file, 'error')
+            return False
+    return True
 
 # verify_uploads() for test site
 def verify_test_uploads(file):
-    fileObj = request.files[file]
-    # Not require user to upload files
-    if fileObj.filename == '':
-        # flash('No ' + file + ' file selected for uploading', 'error')
-        # return False
-        return True # If no file is uploaded then the default file is used
-    if fileObj and allowed_file(fileObj.filename):
-        return True
-    else:
-        flash('Invalid File Type for ' + file, 'error')
-        return False
+    fileObj_lists = request.files.getlist(file)
+    for uploaded_file in fileObj_lists:
+        # Not require user to upload files
+        if uploaded_file.filename == '':
+            # flash('No ' + file + ' file selected for uploading', 'error')
+            # return False
+            continue # If no file is uploaded then the default file is used
+        if uploaded_file and allowed_file(uploaded_file.filename):
+            continue
+        else:
+            flash('Invalid File Type for ' + file, 'error')
+            return False
+    return True
 
 
 # Upload files to uploads/timestamp 
 def upload(target_dir, file):
-    fileObj = request.files[file]
+    fileObj_lists = request.files.getlist(file)
     # make the directory based on timestamp
     os.system('mkdir -p ' + target_dir)
     # save the file
-    fileObj.save(os.path.join(target_dir,
-                              secure_filename(fileObj.filename)))
+    filenames = []
+    for fileObj in fileObj_lists:
+        filename = secure_filename(fileObj.filename)
+        fileObj.save(os.path.join(target_dir, filename))
+        filenames.append(filename)
+    return filenames
 
 # upload file function for test site, return by filename
 def upload_test(target_dir, file_key, default_files):
     # if file is empty (indicated use default file), fileObj set to None
-    fileObj = request.files[file_key] if file_key in request.files else None
+    fileObj_lists = request.files.getlist(file_key) if file_key in request.files else []
     os.makedirs(target_dir, exist_ok=True) # dirs for upload files
     
     # User provided new files in form
-    if fileObj:
-        # save the uploaded file
-        filename = secure_filename(fileObj.filename)
-        file_path = os.path.join(target_dir, filename)
-        fileObj.save(file_path)
-        return fileObj.filename
+    filenames = []
+    if fileObj_lists:
+        for fileObj in fileObj_lists:
+            # save the uploaded file
+            filename = secure_filename(fileObj.filename)
+            file_path = os.path.join(target_dir, filename)
+            fileObj.save(file_path)
+            filenames.append(fileObj.filename)
     else:
-        # Use default file if no file was uploaded
-        src = os.path.abspath(default_files[file_key])
-        dst = os.path.join(target_dir, os.path.basename(src))
-        # Create soft link in uploads/timestamp
-        if not os.path.exists(dst):
-            os.symlink(src, dst)
-        return os.path.basename(src)
+        # default_files is a list of file names
+        src_files = default_files[file_key]
+        for src in src_files:
+            # Use default file if no file was uploaded
+            src = os.path.abspath(default_files[file_key])
+            dst = os.path.join(target_dir, os.path.basename(src))
+            # Create soft link in uploads/timestamp
+            if not os.path.exists(dst):
+                os.symlink(src, dst)
+            filenames.append(os.path.basename(src))
+    return filenames
 
 
 def download(target_dir, URL):
@@ -284,3 +303,20 @@ def db_update(timestamp, set_id, set_value):
     db.execute("UPDATE submissions SET " + set_id + "=? where timestamp=? ", (set_value, timestamp))
     db.commit()
     db.close()
+
+def process_position(position_str):
+    try:
+        position = int(position_str)
+        return position_str  # return Str
+    except ValueError:
+        try:
+            position_list = [pos.strip() for pos in position_str.split(',')]
+            for pos in position_list:
+                int(pos)
+            return position_str
+        except ValueError:
+            raise ValueError("Position must be an integer or a comma-separated list of integers.")
+
+# Format file path into: ./uploads/$timestamp/item
+def format_paths(file_list, target_dir):
+            return [os.path.join(target_dir, filename) for filename in file_list]
\ No newline at end of file
diff --git a/reform.py b/reform.py
index 659bb3c..9288bb2 100644
--- a/reform.py
+++ b/reform.py
@@ -1,446 +1,559 @@
 import argparse
 import re
 import os
+import gzip
+import tempfile
 from Bio import SeqIO
 from Bio.Seq import Seq
 from Bio.SeqRecord import SeqRecord
 from Bio.Alphabet import generic_dna
 
-
 def main():
-    ## Retrieve command line arguments
-    in_arg = get_input_args()
-
-    ## Retrieve ouptut directory
-    output_dir = in_arg.output_dir
-
-    ## Read the new fasta (to be inserted into the ref genome)
-    record = list(SeqIO.parse(in_arg.in_fasta, "fasta"))[0]
-
-    ## Generate index of sequences from ref reference fasta
-    chrom_seqs = SeqIO.index(in_arg.ref_fasta, 'fasta')
-
-    ## Obtain the sequence of the chromosome we want to modify
-    seq = chrom_seqs[in_arg.chrom]
-    seq_str = str(seq.seq)
-
-    ## Get the position to insert the new sequence
-    positions = get_position(in_arg.position, in_arg.upstream_fasta, in_arg.downstream_fasta, in_arg.chrom, seq_str)
-    position = positions['position']
-    down_position = positions['down_position']
-    if position != down_position:
-        print("Removing nucleotides from position {} - {}".format(position, down_position))
-    print("Proceeding to insert sequence '{}' from {} at position {} on chromsome {}"
-          .format(record.description, in_arg.in_fasta, position, in_arg.chrom))
-
-    ## Build the new chromosome sequence with the inserted_seq
-    ## If the chromosome sequence length is in the header, replace it with new length
-    new_seq = seq_str[:position] + str(record.seq) + seq_str[down_position:]
-    chrom_length = str(len(seq_str))
-    new_length = str(len(new_seq))
-    new_record = SeqRecord(
-        Seq(new_seq, generic_dna),
-        id=seq.id,
-        description=seq.description.replace(chrom_length, new_length)
-    )
-
-    ## Create new fasta file with modified chromosome
-    ref_basename = os.path.basename(in_arg.ref_fasta)
-    ref_name = os.path.splitext(ref_basename)[0]
-    new_fasta = output_dir + ref_name + '_reformed.fa'
-    with open(new_fasta, "w") as f:
-        for s in chrom_seqs:
-            if s == seq.id:
-                SeqIO.write([new_record], f, "fasta")
-            else:
-                SeqIO.write([chrom_seqs[s]], f, "fasta")
-
-    print("New fasta file created: ", new_fasta)
-    print("Preparing to create new annotation file")
-
-    ## Read in new GFF features from in_gff
-    in_gff_lines = get_in_gff_lines(in_arg.in_gff)
-
-    ## Create new gff file
-    annotation_basename = os.path.basename(in_arg.ref_gff)
-    (annotation_name, annotation_ext) = os.path.splitext(annotation_basename)
-    new_gff_name = output_dir + annotation_name + '_reformed' + annotation_ext
-    new_gff = create_new_gff(new_gff_name, in_arg.ref_gff, in_gff_lines, position, down_position, seq.id,
-                             len(str(record.seq)))
-    print("New {} file created: {} ".format(annotation_ext.upper(), new_gff.name))
+	## Retrieve command line arguments and number of iterations
+	in_arg, iterations = get_input_args()
+
+	## Retrieve ouptut directory
+	output_dir = in_arg.output_dir
+
+	# TODO: modify the postion from up.fa and down.fa
+	
+	## List for previous postion and modification length
+	prev_modifications = []
+
+	## Path for the files generated in sequential processing, mainly managed by tempfile.
+	prev_fasta_path = None
+	prev_gff_path = None
+
+	## Sequential processing
+	for index in range(iterations):
+		## Read the new fasta (to be inserted into the ref genome)
+		record = list(SeqIO.parse(in_arg.in_fasta[index], "fasta"))[0]
+		
+		## Generate index of sequences from ref reference fasta
+		if prev_fasta_path:
+			chrom_seqs = index_fasta(prev_fasta_path)
+			os.remove(prev_fasta_path)
+		else:	
+			chrom_seqs = index_fasta(in_arg.ref_fasta)
+		
+		## Obtain the sequence of the chromosome we want to modify
+		seq = chrom_seqs[in_arg.chrom]
+		seq_str = str(seq.seq)
+		
+		## Get the position to insert the new sequence
+		positions = get_position(index, in_arg.position, in_arg.upstream_fasta, in_arg.downstream_fasta, in_arg.chrom, seq_str, prev_modifications)
+		position = positions['position']
+		down_position = positions['down_position']
+		
+		## Save current modification which include position(index) and length changed.
+		if position == down_position:
+			length_changed = len(str(record.seq))
+		else:
+			length_changed = len(str(record.seq)) - (down_position - position - 1)
+		prev_modifications.append((position,length_changed))
+		
+		if position != down_position:
+			print("Removing nucleotides from position {} - {}".format(position, down_position))
+		print("Proceeding to insert sequence '{}' from {} at position {} on chromsome {}"
+			.format(record.description, in_arg.in_fasta[index], position, in_arg.chrom))
+		
+		## Build the new chromosome sequence with the inserted_seq 
+		## If the chromosome sequence length is in the header, replace it with new length
+		new_seq = seq_str[:position] + str(record.seq) + seq_str[down_position:]
+		chrom_length = str(len(seq_str))
+		new_length = str(len(new_seq))
+		new_record = SeqRecord(
+			Seq(new_seq, generic_dna), 
+			id=seq.id, 
+			description=seq.description.replace(chrom_length, new_length)
+		)
+		
+		## Create new fasta file with modified chromosome 
+		if index < iterations - 1:
+			new_fasta_file = tempfile.NamedTemporaryFile(delete=False, mode='w', suffix='.fa')
+			new_fasta_file.close()
+			new_fasta = new_fasta_file.name
+			prev_fasta_path = new_fasta
+		else:
+			ref_basename, _ = get_ref_basename(in_arg.ref_fasta)
+			ref_name = ref_basename
+			new_fasta = output_dir + ref_name + '_reformed.fa'
+		with open(new_fasta, "w") as f:
+			for s in chrom_seqs:
+				if s == seq.id:
+					SeqIO.write([new_record], f, "fasta")
+				else:
+					SeqIO.write([chrom_seqs[s]], f, "fasta")
+		print("New fasta file created: ", new_fasta)
+		
+		print("Preparing to create new annotation file")
+		
+		## Read in new GFF features from in_gff
+		in_gff_lines = get_in_gff_lines(in_arg.in_gff[index])
+		
+		## Create a temp file for gff, if index is not equal to last iteration
+		annotation_name, annotation_ext = get_ref_basename(in_arg.ref_gff)
+		print(in_arg.ref_gff)
+		if index < iterations - 1:
+			temp_gff = tempfile.NamedTemporaryFile(delete=False, mode='w', suffix=annotation_ext)
+			temp_gff_name = temp_gff.name
+			temp_gff.close()
+			if prev_gff_path:
+				new_gff_path = create_new_gff(temp_gff_name, prev_gff_path, in_gff_lines, position, down_position, seq.id, len(str(record.seq)))
+				os.remove(prev_gff_path)
+			else:
+				new_gff_path = create_new_gff(temp_gff_name, in_arg.ref_gff, in_gff_lines, position, down_position, seq.id, len(str(record.seq)))
+		else:
+			new_gff_name = annotation_name + '_reformed' + annotation_ext
+			if prev_gff_path: 
+				new_gff_path = create_new_gff(new_gff_name, prev_gff_path, in_gff_lines, position, down_position, seq.id, len(str(record.seq)))
+			else:
+				new_gff_path = create_new_gff(new_gff_name, in_arg.ref_gff, in_gff_lines, position, down_position, seq.id, len(str(record.seq)))
+		prev_gff_path = new_gff_path
+		print("New {} file created: {} ".format(annotation_ext.upper(), prev_gff_path))
+
+
+def index_fasta(fasta_path):
+	'''
+	Process incoming FASTA files, supporting both decompressed and uncompressed 
+	formats. It takes a file path (fasta_path), decompresses the file into a 
+	temporary file if it is a compressed file ending in .gz, and then indexes 
+	the temporary file. Finally, the indexing result is returned.
+	'''
+	if fasta_path.endswith('.gz'):
+		with gzip.open(fasta_path, 'rt') as f:
+            		# Create a tempfile to store uncompressde content
+			with tempfile.NamedTemporaryFile(delete=False, mode='w') as tmp_f:
+				tmp_f.write(f.read())
+				tmp_f_path = tmp_f.name
+		chrom_seqs = SeqIO.index(tmp_f_path, 'fasta')
+        	# remove temp file
+		os.remove(tmp_f_path)
+	else:
+		chrom_seqs = SeqIO.index(fasta_path, 'fasta')
+	return chrom_seqs
+
+def get_ref_basename(filepath):
+	'''
+	Takes a filepath and returns the filename and extension minus the .gz.
+	'''
+	base = os.path.basename(filepath)
+	if base.endswith('.gz'):
+		base = base[:-3]  # remove .gz
+	name, ext = os.path.splitext(base)
+	return name, ext
 
 
 def modify_gff_line(elements, start=None, end=None, comment=None):
-    '''
-    Modifies an existing GFF line and returns the modified line. Currently, you can
-    override the start position, end position, and comment column.
-
-    gff_out: an open file handle for writing new gff lines to
-    elements: a list containing each column (9 in total) of a single feature line in a gff file
-    start: if provided, overwrite the start position in elements with this start position
-    end: if provided, overwrite the end position in elements with this end position
-    comment: if provided, overwrite the comments column in elements with this comment
-    '''
-    if start == None:
-        start = int(elements[3])
-    if end == None:
-        end = int(elements[4])
-    if comment == None:
-        comment = elements[8]
-    if not comment.endswith('\n'):
-        comment += '\n'
-
-    ## Return the modified line
-    return ("{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}".format(elements[0], elements[1], elements[2], start, end, elements[5],
-                                                        elements[6], elements[7], comment))
-
-
+	'''
+	Modifies an existing GFF line and returns the modified line. Currently, you can 
+	override the start position, end position, and comment column. 
+	
+	gff_out: an open file handle for writing new gff lines to
+	elements: a list containing each column (9 in total) of a single feature line in a gff file
+	start: if provided, overwrite the start position in elements with this start position
+	end: if provided, overwrite the end position in elements with this end position
+	comment: if provided, overwrite the comments column in elements with this comment
+	'''
+	if start == None: 
+		start = int(elements[3])
+	if end == None:
+		end = int(elements[4])
+	if comment == None:
+		comment = elements[8]
+	if not comment.endswith('\n'):
+		comment += '\n'
+
+	## Return the modified line
+	return("{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}".format(elements[0], elements[1], elements[2], start, end, elements[5], elements[6], elements[7], comment))
+	
 def get_in_gff_lines(in_gff):
-    '''
-    Takes a gff file and returns a list of lists where
-    each parent list item is a single line of the gff file
-    and the child elements are the columns of the line
-    '''
-    with open(in_gff, "r") as f:
-        in_gff_lines = []
-        for line in f:
-            line_elements = line.split('\t')
-            # Skip comment lines
-            if line.startswith("#"):
-                continue
-            # Ensure lines have 9 columns
-            if len(line_elements) != 9:
-                print("** ERROR: in_gff file does not have 9 columns, it has", len(line_elements))
-                print(line_elements)
-                exit()
-            in_gff_lines.append(line_elements)
-    return in_gff_lines
-
-
-def get_position(position, upstream, downstream, chrom, seq_str):
-    '''
-    Determine the position in seq_str to insert the new sequence given
-    the position, upstream, downstream, and chrom arguments.
-    Note that either position, or upstream AND downstream sequences must
-    be provided.
-    '''
-    if position is not None and position >= -1:
-        print("Checking position validity")
-        if position > len(seq_str):
-            print("** ERROR: Position greater than length of chromosome.")
-            print("Chromosome: {}\Chromosome length: {}\nPosition: \n{}".format(chrom, len(seq_str), position))
-            exit()
-        elif position == -1:
-            position = len(seq_str)
-        # At the moment we don't accept down position as a param
-        # so set down_position = position
-        down_position = position
-        print("Position valid")
-    else:
-        print("No valid position specified, checking for upstream and downstream sequence")
-        if upstream is not None and downstream is not None:
-            seq_str = seq_str.upper()
-            upstream_fasta = list(SeqIO.parse(upstream, "fasta"))
-            upstream_seq = str(upstream_fasta[0].seq).upper()
-            downstream_fasta = list(SeqIO.parse(downstream, "fasta"))
-            downstream_seq = str(downstream_fasta[0].seq).upper()
-            # Ensure the upstream and downstream target sequences exists once in the selected chromosome, else die
-            upstream_seq_count = seq_str.count(upstream_seq)
-            downstream_seq_count = seq_str.count(downstream_seq)
-            if upstream_seq_count == 1 and downstream_seq_count == 1:
-                ## Obtain the starting position of the left_strand
-                index = seq_str.find(upstream_seq)
-                position = index + len(upstream_seq)
-                down_position = seq_str.find(downstream_seq)
-            else:
-                print(
-                    "** ERROR: The upstream and downstream target sequences must be present and unique in the specified chromosome.")
-                print("Chromosome: {}\n".format(chrom))
-                print("Upstream sequence found {} times".format(upstream_seq_count))
-                print("Downstream sequence found {} times".format(downstream_seq_count))
-                exit()
-        else:
-            print("** ERROR: You must specify a valid position or upstream and downstream sequences.")
-            exit()
-    if down_position < position:
-        print("** ERROR: Upstream and Downstream sequences must not overlap. Exiting.")
-        exit()
-    return {'position': position, 'down_position': down_position}
-
+	'''
+	Takes a gff file and returns a list of lists where 
+	each parent list item is a single line of the gff file
+	and the child elements are the columns of the line
+	'''
+	with open(in_gff, "r") as f:
+		in_gff_lines = []
+		for line in f:
+			line_elements = line.split('\t')
+			# Skip comment lines
+			if line.startswith("#"):
+				continue
+			# Ensure lines have 9 columns
+			if len(line_elements) != 9:
+				print("** ERROR: in_gff file does not have 9 columns, it has", len(line_elements))
+				print(line_elements)
+				exit()
+			in_gff_lines.append(line_elements)
+	return in_gff_lines
+	
+def get_position(index, positions, upstream, downstream, chrom, seq_str, prev_modifications):
+	''' 
+	Determine the position in seq_str to insert the new sequence given 
+	the position, upstream, downstream, and chrom arguments.
+	Note that either position, or upstream AND downstream sequences must
+	be provided.
+	'''
+	if positions and index < len(positions) and positions[index] >= -1:
+		print("Checking position validity")
+		position = positions[index]
+		## Update current postion based on previous modification
+		for pos, lc in prev_modifications:
+			## Also ignore when postion == -1
+			if position >= pos:
+				position += lc
+		if position > len(seq_str):
+			print("** ERROR: Position greater than length of chromosome.")
+			print("Chromosome: {}\Chromosome length: {}\nPosition: \n{}".format(chrom, len(seq_str), position))
+			exit()
+		elif position == -1:
+			position = len(seq_str)
+		# At the moment we don't accept down position as a param
+		# so set down_position = position
+		down_position = position
+		print("Position valid")
+	else:
+		print("No valid position specified, checking for upstream and downstream sequence")
+		if index < len(upstream) and index < len(downstream):
+			seq_str = seq_str.upper()
+			upstream_fasta = list(SeqIO.parse(upstream[index], "fasta"))
+			upstream_seq = str(upstream_fasta[0].seq).upper()
+			downstream_fasta = list(SeqIO.parse(downstream[index], "fasta"))
+			downstream_seq = str(downstream_fasta[0].seq).upper()
+			# Ensure the upstream and downstream target sequences exists once in the selected chromosome, else die
+			upstream_seq_count = seq_str.count(upstream_seq)
+			downstream_seq_count = seq_str.count(downstream_seq)
+			### TODO: Update postion based on previous modifications
+			if upstream_seq_count == 1 and downstream_seq_count == 1:
+				## Obtain the starting position of the left_strand
+				new_index = seq_str.find(upstream_seq)
+				position = new_index + len(upstream_seq)
+				down_position = seq_str.find(downstream_seq)
+			else:
+				print("** ERROR: The upstream and downstream target sequences must be present and unique in the specified chromosome.")
+				print("Chromosome: {}\n".format(chrom))
+				print("Upstream sequence found {} times".format(upstream_seq_count))
+				print("Downstream sequence found {} times".format(downstream_seq_count))
+				exit()
+		else:
+			print("** ERROR: You must specify a valid position or upstream and downstream sequences.")
+			exit()
+	if down_position < position:
+		print("** ERROR: Upstream and Downstream sequences must not overlap. Exiting.")
+		exit()
+	return {'position': position, 'down_position': down_position}
 
 def write_in_gff_lines(gff_out, in_gff_lines, position, split_features):
-    for l in in_gff_lines:
-        new_gff_line = modify_gff_line(
-            l, start=int(l[3]) + position, end=int(l[4]) + position)
-        gff_out.write(new_gff_line)
-
-    ## If insertion caused any existing features to be split, add
-    ## the split features now immediately after adding the new features
-    for sf in split_features:
-        modified_line = modify_gff_line(
-            sf[0], start=sf[1], end=sf[2], comment=sf[3])
-        gff_out.write(modified_line)
-
-    ## Return True after writing the new GFF lines
-    return True
-
+	for l in in_gff_lines:
+		new_gff_line = modify_gff_line(
+			l, start = int(l[3]) + position, end = int(l[4]) + position)
+		gff_out.write(new_gff_line)
+	
+	## If insertion caused any existing features to be split, add
+	## the split features now immediately after adding the new features
+	for sf in split_features:
+		modified_line = modify_gff_line(
+			sf[0], start = sf[1], end = sf[2], comment = sf[3])
+		gff_out.write(modified_line)
+		
+	## Return True after writing the new GFF lines
+	return True
 
 def create_new_gff(new_gff_name, ref_gff, in_gff_lines, position, down_position, chrom_id, new_seq_length):
-    '''
-    Goes line by line through a gff file to remove existing features
-    (or parts of existing features) and/or insert new features.
-    In the process of adding new features, existing features
-    may be cut-off on either end or split into two.
-    This attempts to handle all cases.
-    new_gff_name: the name of the new gff file to create
-    ref_gff: the reference gff file to modify
-    in_gff_lines: a list of lists where each nested list is a list of
-        columns (in gff format) associated with each new feature to insert
-    position: start position of removal of existing sequence
-    down_position: end position of removal of existing sequence
-    chrom_id: the ID of the chromosome to modify
-    new_seq_length: the length of the new sequence being added to the chromosome
-    '''
-    gff_out = open(new_gff_name, "w")
-    in_gff_lines_appended = False
-    split_features = []
-    last_seen_chrom_id = None
-    gff_ext = new_gff_name.split('.')[-1]
-    with open(ref_gff, "r") as f:
-        for line in f:
-            line_elements = line.split('\t')
-            if line.startswith("#"):
-                if line_elements[0] == "##sequence-region" and line_elements[1] == chrom_id:
-                    ## Edit the length of the chromosome
-                    original_length = int(line_elements[3])
-                    new_length = original_length - (down_position - position) + new_seq_length
-                    line = line.replace(str(original_length), str(new_length))
-                gff_out.write(line)
-            else:
-                gff_chrom_id = line_elements[0]
-                gff_feat_start = int(line_elements[3])
-                gff_feat_end = int(line_elements[4])
-                gff_feat_type = line_elements[2]
-                gff_feat_strand = line_elements[6]
-                gff_comments = line_elements[8].strip()
-
-                # If we've seen at least one chromosome
-                # and the last chromosome seen was the chromosome of interest (i.e. chrom_id)
-                # and now we're on a new chromosome in the original gff file
-                # and the in_gff_lines have not yet been appended:
-                # we assume they need to be appended to the end of the chromosome
-                # so append them before proceeding
-                if (last_seen_chrom_id is not None
-                        and last_seen_chrom_id == chrom_id
-                        and gff_chrom_id != last_seen_chrom_id
-                        and not in_gff_lines_appended):
-                    in_gff_lines_appended = write_in_gff_lines(
-                        gff_out, in_gff_lines, position, split_features)
-
-                last_seen_chrom_id = gff_chrom_id
-
-                # If this is not the chromosome of interest
-                # Or if the current feature ends before any
-                # modification (which occurs at position)
-                # Then simply write the feature as is (no modification)
-                # (remember gff co-ordinates are 1 based and position
-                # is 0 based, therefor check less than *or equal to*)
-                if gff_chrom_id != chrom_id or gff_feat_end <= position:
-                    gff_out.write(line)
-
-                # Modify chromosome feature length (if feature is
-                # "chromosome" or "region")
-                elif gff_feat_type in ['chromosome', 'region']:
-                    original_length = gff_feat_end
-                    new_length = original_length - (down_position - position) + new_seq_length
-                    line = line.replace(str(original_length), str(new_length))
-                    gff_out.write(line)
-
-                # Split feature into 2 if feature starts before position
-                # and ends after down_position
-                elif gff_feat_start <= position and gff_feat_end > down_position:
-                    print("Feature split")
-                    # Which side of the feature depends on the strand (we add this as a comment)
-                    (x, y) = ("5", "3") if gff_feat_strand == "+" else ("3", "5")
-
-                    new_comment = format_comment(
-                        "original feature split by inserted sequence, this is the {} prime end".format(x),
-                        gff_ext
-                    )
-                    # Modified feature ends at 'position'
-                    modified_line = modify_gff_line(
-                        line_elements,
-                        end=position,
-                        comment=gff_comments + new_comment
-                    )
-                    gff_out.write(modified_line)
-
-                    # The downstream flank(s) will be added immediately after the
-                    # in_gff (new) features are written.
-                    # First, attempt to rename IDs (to indicate split)
-                    renamed_id_attributes = rename_id(line)
-                    new_comment = format_comment(
-                        "original feature split by inserted sequence, this is the {} prime end".format(y),
-                        gff_ext
-                    )
-                    split_features.append(
-                        (
-                            line_elements,
-                            position + new_seq_length + 1,
-                            gff_feat_end + new_seq_length - (down_position - position),
-                            renamed_id_attributes + new_comment
-                        )
-                    )
-
-                # Change end position of feature to cut off point (position) if the
-                # feature ends within the deletion (between position & down_position)
-                elif gff_feat_start <= position and gff_feat_end <= down_position:
-                    # Which side of the feature depends on the strand (we add this as a comment)
-                    x = "3" if gff_feat_strand == "+" else "5"
-                    print("Feature cut off - {} prime side of feature cut off ({} strand)"
-                          .format(x, gff_feat_strand))
-                    new_comment = format_comment(
-                        "{} prime side of feature cut-off by inserted sequence".format(x),
-                        gff_ext
-                    )
-                    modified_line = modify_gff_line(
-                        line_elements,
-                        end=position,
-                        comment=gff_comments + new_comment
-                    )
-                    gff_out.write(modified_line)
-
-                # Skip this feature if it falls entirely within the deletion
-                elif gff_feat_start > position and gff_feat_end <= down_position:
-                    print("Skip feature (this feature was removed from sequence)")
-                    continue
-
-                else:
-                    if not in_gff_lines_appended:
-                        in_gff_lines_appended = write_in_gff_lines(
-                            gff_out, in_gff_lines, position, split_features)
-
-                    # Change start position of feature to after cutoff point if
-                    # the feature starts within the deletion
-                    if (gff_feat_start > position
-                            and gff_feat_start <= down_position
-                            and gff_feat_end > down_position):
-                        x = "5" if gff_feat_strand == "+" else "3"
-                        print("Feature cut off - {} prime side of feature cut off ({} strand)"
-                              .format(x, gff_feat_strand))
-                        new_comment = format_comment(
-                            "{} prime side of feature cut-off by inserted sequence".format(x),
-                            gff_ext
-                        )
-                        modified_line = modify_gff_line(
-                            line_elements,
-                            start=position + new_seq_length + 1,
-                            end=gff_feat_end + new_seq_length - (down_position - position),
-                            comment=gff_comments + new_comment
-                        )
-                        gff_out.write(modified_line)
-
-                    # Offset all downstream feature positions by offset length
-                    elif gff_feat_start > down_position:
-                        offset_length = new_seq_length - (down_position - position)
-                        modified_line = modify_gff_line(
-                            line_elements,
-                            start=gff_feat_start + offset_length,
-                            end=gff_feat_end + offset_length
-                        )
-                        gff_out.write(modified_line)
-
-                    else:
-                        print("** Error: Unknown case for GFF modification. Exiting " + str(line_elements))
-                        exit()
-
-        # If we've iterated over the entire original gff
-        # (i.e. out of the for loop which iterates over it)
-        # and still haven't written in_gff_lines
-        # and the last chromosome seen was the chromosome of interest (i.e. chrom_id):
-        # we assume they need to be appended to the end of the genome
-        # (i.e. end of the last chromosome)
-        # so append them now
-        if (last_seen_chrom_id is not None
-                and last_seen_chrom_id == chrom_id
-                and not in_gff_lines_appended):
-            in_gff_lines_appended = write_in_gff_lines(
-                gff_out, in_gff_lines, position, split_features)
-
-        # Checking to ensure in_gff_lines written
-        if not in_gff_lines_appended:
-            print("** Error: Something went wrong, in_gff not added to reference gff. Exiting")
-            exit()
-
-    gff_out.close()
-
-    return gff_out
-
+	'''
+	Goes line by line through a gff file to remove existing features 
+	(or parts of existing features) and/or insert new features. 
+	In the process of adding new features, existing features 
+	may be cut-off on either end or split into two. 
+	This attempts to handle all cases.
+	new_gff_name: the name of the new gff file to create
+	ref_gff: the reference gff file to modify
+	in_gff_lines: a list of lists where each nested list is a list of 
+		columns (in gff format) associated with each new feature to insert
+	position: start position of removal of existing sequence
+	down_position: end position of removal of existing sequence
+	chrom_id: the ID of the chromosome to modify
+	new_seq_length: the length of the new sequence being added to the chromosome
+	'''
+	with open(new_gff_name, "w") as gff_out:
+		in_gff_lines_appended = False
+		split_features = []
+		last_seen_chrom_id = None
+		gff_ext = new_gff_name.split('.')[-1]
+		ref_gff_path = ref_gff
+		if ref_gff.endswith('.gz'):
+			with gzip.open(ref_gff, 'rt') as f:
+						# Create a tempfile to store uncompressde content
+				with tempfile.NamedTemporaryFile(delete=False, mode='w') as tmp_f:
+					tmp_f.write(f.read())
+					ref_gff_path = tmp_f.name
+		with open(ref_gff_path, "r") as f:
+			for line in f:
+				line_elements = line.split('\t')
+				if line.startswith("#"):
+					if line_elements[0] == "##sequence-region" and line_elements[1] == chrom_id:
+						## Edit the length of the chromosome 
+						original_length = int(line_elements[3])
+						new_length = original_length - (down_position - position) + new_seq_length
+						line = line.replace(str(original_length), str(new_length))
+					gff_out.write(line)
+				else:
+					gff_chrom_id = line_elements[0]
+					gff_feat_start = int(line_elements[3])
+					gff_feat_end = int(line_elements[4])
+					gff_feat_type = line_elements[2]
+					gff_feat_strand = line_elements[6]
+					gff_comments = line_elements[8].strip()
+					
+					# If we've seen at least one chromosome
+					# and the last chromosome seen was the chromosome of interest (i.e. chrom_id)
+					# and now we're on a new chromosome in the original gff file
+					# and the in_gff_lines have not yet been appended:
+					# we assume they need to be appended to the end of the chromosome
+					# so append them before proceeding
+					if (last_seen_chrom_id is not None
+						and last_seen_chrom_id == chrom_id
+						and gff_chrom_id != last_seen_chrom_id 
+						and not in_gff_lines_appended):
+						in_gff_lines_appended = write_in_gff_lines(
+							gff_out, in_gff_lines, position, split_features)
+					
+					last_seen_chrom_id = gff_chrom_id
+					
+					# If this is not the chromosome of interest
+					# Or if the current feature ends before any 
+					# modification (which occurs at position)
+					# Then simply write the feature as is (no modification)
+					# (remember gff co-ordinates are 1 based and position 
+					# is 0 based, therefor check less than *or equal to*)
+					if gff_chrom_id != chrom_id or gff_feat_end <= position:
+						gff_out.write(line)
+						
+					# Modify chromosome feature length (if feature is 
+					# "chromosome" or "region")
+					elif gff_feat_type in ['chromosome', 'region']:
+						original_length = gff_feat_end
+						new_length = original_length - (down_position - position) + new_seq_length
+						line = line.replace(str(original_length), str(new_length))
+						gff_out.write(line)
+						
+					# Split feature into 2 if feature starts before position
+					# and ends after down_position
+					elif gff_feat_start <= position and gff_feat_end > down_position:
+						print("Feature split")
+						# Which side of the feature depends on the strand (we add this as a comment)
+						(x, y) = ("5", "3") if gff_feat_strand == "+" else ("3", "5")
+						
+						new_comment = format_comment(
+							"original feature split by inserted sequence, this is the {} prime end".format(x),
+							gff_ext
+						)
+						# Modified feature ends at 'position'
+						modified_line = modify_gff_line( 
+							line_elements, 
+							end = position, 
+							comment = gff_comments + new_comment
+						)
+						gff_out.write(modified_line)
+						
+						# The downstream flank(s) will be added immediately after the 
+						# in_gff (new) features are written.
+						# First, attempt to rename IDs (to indicate split)
+						renamed_id_attributes = rename_id(line)
+						new_comment = format_comment(
+							"original feature split by inserted sequence, this is the {} prime end".format(y),
+							gff_ext
+						)
+						split_features.append(
+							(
+								line_elements, 
+								position + new_seq_length + 1, 
+								gff_feat_end + new_seq_length - (down_position - position), 
+								renamed_id_attributes + new_comment 
+							)
+						)
+					
+					# Change end position of feature to cut off point (position) if the
+					# feature ends within the deletion (between position & down_position)
+					elif gff_feat_start <= position and gff_feat_end <= down_position:
+						# Which side of the feature depends on the strand (we add this as a comment)
+						x = "3" if gff_feat_strand == "+" else "5"
+						print("Feature cut off - {} prime side of feature cut off ({} strand)"
+							.format(x, gff_feat_strand))
+						new_comment = format_comment(
+							"{} prime side of feature cut-off by inserted sequence".format(x),
+							gff_ext
+						)
+						modified_line = modify_gff_line(
+							line_elements, 
+							end = position, 
+							comment = gff_comments + new_comment 
+						)
+						gff_out.write(modified_line)
+					
+					# Skip this feature if it falls entirely within the deletion 
+					elif gff_feat_start > position and gff_feat_end <= down_position:
+						print("Skip feature (this feature was removed from sequence)")
+						continue
+						
+					else:
+						if not in_gff_lines_appended:
+							in_gff_lines_appended = write_in_gff_lines(
+								gff_out, in_gff_lines, position, split_features)
+							
+						# Change start position of feature to after cutoff point if
+						# the feature starts within the deletion
+						if (gff_feat_start > position 
+							and gff_feat_start <= down_position 
+							and gff_feat_end > down_position):
+							x = "5" if gff_feat_strand == "+" else "3"
+							print("Feature cut off - {} prime side of feature cut off ({} strand)"
+								.format(x, gff_feat_strand))
+							new_comment = format_comment(
+								"{} prime side of feature cut-off by inserted sequence".format(x),
+								gff_ext
+							)
+							modified_line = modify_gff_line(
+								line_elements, 
+								start = position + new_seq_length + 1, 
+								end = gff_feat_end + new_seq_length - (down_position - position), 
+								comment = gff_comments + new_comment
+							)
+							gff_out.write(modified_line)
+							
+						# Offset all downstream feature positions by offset length
+						elif gff_feat_start > down_position:
+							offset_length = new_seq_length - (down_position - position)
+							modified_line = modify_gff_line(
+								line_elements, 
+								start = gff_feat_start + offset_length, 
+								end = gff_feat_end + offset_length
+							)
+							gff_out.write(modified_line)
+							
+						else:
+							print("** Error: Unknown case for GFF modification. Exiting " + str(line_elements))
+							exit()
+							
+			# If we've iterated over the entire original gff
+			# (i.e. out of the for loop which iterates over it)
+			# and still haven't written in_gff_lines
+			# and the last chromosome seen was the chromosome of interest (i.e. chrom_id):
+			# we assume they need to be appended to the end of the genome
+			# (i.e. end of the last chromosome)
+			# so append them now
+			if (last_seen_chrom_id is not None 
+				and last_seen_chrom_id == chrom_id
+				and not in_gff_lines_appended):
+				in_gff_lines_appended = write_in_gff_lines(
+					gff_out, in_gff_lines, position, split_features)
+			
+			# Checking to ensure in_gff_lines written
+			if not in_gff_lines_appended:
+				print("** Error: Something went wrong, in_gff not added to reference gff. Exiting")
+				exit()
+		# Remove temp gtf file
+		if ref_gff.endswith('.gz'):
+			os.remove(ref_gff_path)
+	return new_gff_name
 
 def format_comment(comment, ext):
-    '''
-    Format comment according to ext (GFF or GTF) and return
-    '''
-    if ext.lower() == 'gtf':
-        new_comment = 'reform_comment "{}";'.format(comment)
-    elif ext.lower().startswith('gff'):
-        new_comment = "; reform_comment={}".format(comment)
-    else:
-        print("** Error: Unrecognized extension {} in format_comment(). Exiting".format(ext))
-        exit()
-    return new_comment
-
-
+	'''
+	Format comment according to ext (GFF or GTF) and return
+	'''
+	if ext.lower() == 'gtf':
+		new_comment = 'reform_comment "{}";'.format(comment)
+	elif ext.lower().startswith('gff'):
+		new_comment = "; reform_comment={}".format(comment)
+	else:
+		print("** Error: Unrecognized extension {} in format_comment(). Exiting".format(ext))
+		exit()
+	return new_comment
+	
 def rename_id(line):
-    '''
-    Given a gff or gtf  line, this function will append the string "_split"
-    to the end of the ID/gene_id attribute
-    '''
-    attributes = line.split('\t')[8].strip()
-    elements = attributes.split(';')
-    if elements[0].startswith("ID="):
-        print("Renaming split feature {} --> {}_split".format(elements[0], elements[0]))
-        return ("{}_split;{}".format(elements[0], ';'.join(elements[1:])))
-    elif elements[0].startswith("gene_id "):
-        gene_id = re.match(r'gene_id \"(.+)\"', elements[0])[1]
-        print('Renaming split feature {} --> {}_split'.format(gene_id, gene_id))
-        return ('gene _id "{}_split";{}'.format(gene_id, ';'.join(elements[1:])))
-
-    else:
-        print("This feature will not be renamed because it does not has an ID/gene_id attribute:\n", line)
-        return attributes
-
-
+	'''
+	Given a gff or gtf  line, this function will append the string "_split" 
+	to the end of the ID/gene_id attribute
+	'''
+	attributes = line.split('\t')[8].strip()
+	elements = attributes.split(';')
+	if elements[0].startswith("ID="):
+		print("Renaming split feature {} --> {}_split".format(elements[0], elements[0]))
+		return ("{}_split;{}".format(elements[0], ';'.join(elements[1:])))
+	elif elements[0].startswith("gene_id "):
+		gene_id = re.match(r'gene_id \"(.+)\"', elements[0])[1]
+		print('Renaming split feature {} --> {}_split'.format(gene_id, gene_id))
+		return ('gene _id "{}_split";{}'.format(gene_id, ';'.join(elements[1:])))
+
+	else:
+		print("This feature will not be renamed because it does not has an ID/gene_id attribute:\n", line)
+		return attributes
+		
 def get_input_args():
-    parser = argparse.ArgumentParser()
-
-    parser.add_argument('--chrom', type=str, required=True,
-                        help="Chromosome name (String)")
-    parser.add_argument('--in_fasta', type=str, required=True,
-                        help="Path to new sequence to be inserted into reference genome in fasta format")
-    parser.add_argument('--in_gff', type=str, required=True,
-                        help="Path to GFF file describing new fasta sequence to be inserted")
-    parser.add_argument('--upstream_fasta', type=str, default=None,
-                        help="Path to Fasta file with upstream sequence. Either position, or upstream AND downstream sequence must be provided.")
-    parser.add_argument('--downstream_fasta', type=str, default=None,
-                        help="Path to Fasta file with downstream sequence. Either position, or upstream AND downstream sequence must be provided.")
-    parser.add_argument('--position', type=int, default=None,
-                        help="Position at which to insert new sequence. Note: Position is 0-based. Either position, or upstream AND downstream sequence must be provided.")
-    parser.add_argument('--ref_fasta', type=str, required=True,
-                        help="Path to reference fasta file")
-    parser.add_argument('--ref_gff', type=str, required=True,
-                        help="Path to reference gff file")
-    parser.add_argument('--output_dir', type=str, default=None,
+	parser = argparse.ArgumentParser()
+	
+	parser.add_argument('--chrom', type = str, required = True,
+					help = "Chromosome name (String)") 
+	parser.add_argument('--in_fasta', type=str, required=True,
+                    help="Path(s) to new sequence(s) to be inserted into reference genome in fasta format") 
+	parser.add_argument('--in_gff', type=str, required=True,
+                    help="Path(s) to GFF file(s) describing new fasta sequence(s) to be inserted") 
+	parser.add_argument('--upstream_fasta', type=str, default=None, 
+                    help="Path(s) to Fasta file(s) with upstream sequence. Either position, or upstream AND downstream sequence must be provided.")
+	parser.add_argument('--downstream_fasta', type=str, default=None, 
+                    help="Path(s) to Fasta file(s) with downstream sequence. Either position, or upstream AND downstream sequence must be provided.")
+	parser.add_argument('--position', type=str, default=None,
+                    help="Comma-separated positions at which to insert new sequence. Note: Position is 0-based, no space between each comma. Either position, or upstream AND downstream sequence must be provided.")
+	parser.add_argument('--ref_fasta', type = str, required = True,
+					help = "Path to reference fasta file")
+	parser.add_argument('--ref_gff', type = str, required = True,
+					help = "Path to reference gff file") 
+	parser.add_argument('--output_dir', type=str, default=None,
                         help="Path to output to")
-
-    in_args = parser.parse_args()
-
-    if in_args.position is None and (in_args.upstream_fasta is None or in_args.downstream_fasta is None):
-        print("** Error: You must provide either the position, or the upstream and downstream sequences.")
-        exit()
-
-    return in_args
-
-
+					
+	in_args = parser.parse_args()
+	in_args.in_fasta = in_args.in_fasta.split(',')
+	in_args.in_gff = in_args.in_gff.split(',')
+	if in_args.upstream_fasta:
+		in_args.upstream_fasta = in_args.upstream_fasta.split(',')
+	if in_args.downstream_fasta:
+		in_args.downstream_fasta = in_args.downstream_fasta.split(',')
+
+	if in_args.position is None and (in_args.upstream_fasta is None or in_args.downstream_fasta is None):
+		print("** Error: You must provide either the position, or the upstream and downstream sequences.")
+		exit()
+	
+	if not in_args.position and len(in_args.upstream_fasta) != len(in_args.downstream_fasta):
+		print("** Error: The number of upstream_fasta and downstream_fasta files does not match.")
+		exit()
+
+	if in_args.position is not None:
+		try:
+			in_args.position = list(map(int, in_args.position.split(',')))
+			iterations = iterations = len(in_args.position)
+		except ValueError:
+			print("** Error: Position must be a comma-separated list of integers, like 1,5,-1.")
+			exit()
+	else:
+		iterations = len(in_args.upstream_fasta)
+
+	if (len(in_args.in_fasta) != len(in_args.in_gff)) or (len(in_args.in_fasta) != iterations):
+		print("** Error: The number of inserted FASTA files does not match the number of GTF files, or their counts and positions do not align.")
+		exit()
+		
+	return in_args, iterations
+	
 if __name__ == "__main__":
-    main()
+	main()
+	# Temp code for step 1 and test, will be remove later
+	# args, iterations = get_input_args()
+	# print("iterations:", iterations)
+	# print("Chromosome:", args.chrom)
+	# print("Input FASTA files:", args.in_fasta)
+	# print("Input GFF files:", args.in_gff)
+	# print("Reference FASTA file:", args.ref_fasta)
+	# print("Reference GFF file:", args.ref_gff)
+	# print("Upstream FASTA files:", args.upstream_fasta)
+	# print("Downstream FASTA files:", args.downstream_fasta)
+	# print("Position:", list(map(int, args.position.split(','))))
diff --git a/run.sh b/run.sh
index cb68f2e..1b8739e 100644
--- a/run.sh
+++ b/run.sh
@@ -85,24 +85,25 @@ else
 fi
 
 # Run reform.py
+# ./uploads/$timestamp/
 
 if [ ! -z "$position" ]; then
-  echo   /home/reform/venv/bin/python reform.py --chrom $chrom --position $position --in_fasta ./uploads/$timestamp/$in_fasta \
-  --in_gff ./uploads/$timestamp/$in_gff --ref_fasta "$ref_fasta_path" --ref_gff "$ref_gff_path" \
+  echo   /home/reform/venv/bin/python reform.py --chrom $chrom --position $position --in_fasta $in_fasta \
+  --in_gff $in_gff --ref_fasta "$ref_fasta_path" --ref_gff "$ref_gff_path" \
   --output_dir "./results/$timestamp/"
 
-  /home/reform/venv/bin/python reform.py --chrom $chrom --position $position --in_fasta ./uploads/$timestamp/$in_fasta \
-  --in_gff ./uploads/$timestamp/$in_gff --ref_fasta "$ref_fasta_path" --ref_gff "$ref_gff_path" \
+  /home/reform/venv/bin/python reform.py --chrom $chrom --position $position --in_fasta $in_fasta \
+  --in_gff $in_gff --ref_fasta "$ref_fasta_path" --ref_gff "$ref_gff_path" \
   --output_dir "./results/$timestamp/" 2>&1 | tee ./results/$timestamp/$timestamp-worker-err.log
 else
-  echo   /home/reform/venv/bin/python reform.py --chrom $chrom --upstream_fasta ./uploads/$timestamp/$upstream_fasta \
-  --downstream_fasta ./uploads/$timestamp/$downstream_fasta --in_fasta ./uploads/$timestamp/$in_fasta \
-  --in_gff ./uploads/$timestamp/$in_gff --ref_fasta "$ref_fasta_path" --ref_gff "$ref_gff_path" \
+  echo   /home/reform/venv/bin/python reform.py --chrom $chrom --upstream_fasta $upstream_fasta \
+  --downstream_fasta $downstream_fasta --in_fasta $in_fasta \
+  --in_gff $in_gff --ref_fasta "$ref_fasta_path" --ref_gff "$ref_gff_path" \
   --output_dir "./results/$timestamp/"
 
-  /home/reform/venv/bin/python reform.py --chrom $chrom --upstream_fasta ./uploads/$timestamp/$upstream_fasta \
-  --downstream_fasta ./uploads/$timestamp/$downstream_fasta --in_fasta ./uploads/$timestamp/$in_fasta \
-  --in_gff ./uploads/$timestamp/$in_gff --ref_fasta "$ref_fasta_path" --ref_gff "$ref_gff_path" \
+  /home/reform/venv/bin/python reform.py --chrom $chrom --upstream_fasta $upstream_fasta \
+  --downstream_fasta $downstream_fasta --in_fasta $in_fasta \
+  --in_gff $in_gff --ref_fasta "$ref_fasta_path" --ref_gff "$ref_gff_path" \
   --output_dir "./results/$timestamp/" 2>&1 | tee ./results/$timestamp/$timestamp-worker-err.log
 fi
 

From 38e5d73374e6fc95e1de91c1a4f24985562ba069 Mon Sep 17 00:00:00 2001
From: Yuwei Sun <ys4680@nyu.edu>
Date: Fri, 4 Oct 2024 22:20:47 -0400
Subject: [PATCH 2/2] Pass compressed  files directly to reform

---
 run.sh | 61 +++++++++-------------------------------------------------
 1 file changed, 9 insertions(+), 52 deletions(-)

diff --git a/run.sh b/run.sh
index 1b8739e..2054bcf 100644
--- a/run.sh
+++ b/run.sh
@@ -28,61 +28,18 @@ mkdir -p ./results/$timestamp
 log_file="./results/$timestamp/$timestamp-worker-out.log"
 exec > >(tee -a "$log_file")
 
-# Function to determine if a path is a URL
-is_url() {
-    if [[ $1 =~ ^https?:// ]] || [[ $1 =~ ^ftp:// ]]; then
-        return 0  # It's a URL
-    else
-        return 1  # It's a file path
-    fi
-}
-
-# Function to process downloading and decompressing files
-download_and_decompress() {
-    local file_url=$1
-    local target_path=$2
-    local file_name=$(basename "$file_url")
-
-    echo "Downloading $file_url"
-    wget --no-check-certificate -nv "$file_url" -O "$target_path/$file_name"
-    if [[ ${file_name: -3} == ".gz" ]]; then
-        echo "pigz -d $target_path/$file_name"
-        pigz -d "$target_path/$file_name"
-        file_name=${file_name::-3} 
-    fi
-}
-
 # Create the upload directories
 echo "mkdir -p ./$target_dir"
 mkdir -p "./$target_dir"
 
 # Variables to hold the final paths to be used in the reform.py command
-ref_fasta_path="$ref_fasta"
-ref_gff_path="$ref_gff"
-
-# Check and process the reference fasta file
-if is_url "$ref_fasta"; then
-    download_and_decompress "$ref_fasta" "./$target_dir"
-    ref_fasta_path="./$target_dir/$(basename "$ref_fasta")"
-    if [[ ${ref_fasta_path: -3} == ".gz" ]]; then
-        ref_fasta_path="${ref_fasta_path::-3}"
-    fi
-else
-    echo "Using local file: $ref_fasta"
-    ref_fasta_path="$ref_fasta"
-fi
+echo "Downloading $ref_fasta"
+wget --no-check-certificate -nv "$ref_fasta" -O "$target_dir/$(basename "$ref_fasta")"
+ref_fasta_path="$target_dir/$(basename "$ref_fasta")"
 
-# Check and process the reference gff file
-if is_url "$ref_gff"; then
-    download_and_decompress "$ref_gff" "./$target_dir"
-    ref_gff_path="./$target_dir/$(basename "$ref_gff")"
-    if [[ ${ref_gff_path: -3} == ".gz" ]]; then
-        ref_gff_path="${ref_gff_path::-3}"
-    fi
-else
-    echo "Using local file: $ref_gff"
-    ref_gff_path="$ref_gff"
-fi
+echo "Downloading $ref_gff"
+wget --no-check-certificate -nv "$ref_gff" -O "$target_dir/$(basename "$ref_gff")"
+ref_gff_path="$target_dir/$(basename "$ref_gff")"
 
 # Run reform.py
 # ./uploads/$timestamp/
@@ -107,9 +64,9 @@ else
   --output_dir "./results/$timestamp/" 2>&1 | tee ./results/$timestamp/$timestamp-worker-err.log
 fi
 
-# remove upload folder
-echo "rm -Rf ./uploads/$timestamp"
-rm -Rf ./uploads/$timestamp
+# # remove upload folder
+# echo "rm -Rf ./uploads/$timestamp"
+# rm -Rf ./uploads/$timestamp
 
 # create downloads directory
 echo "mkdir -p ./downloads/$timestamp"