Skip to content

Commit

Permalink
Update parse.py
Browse files Browse the repository at this point in the history
Added the option of passing a file-like object to the parse function instead of only a file path. This enables Streamlit to use uploaded files that are kept in memory.
  • Loading branch information
inigoalonso authored Jun 24, 2024
1 parent 359f02c commit d377c4f
Showing 1 changed file with 46 additions and 21 deletions.
67 changes: 46 additions & 21 deletions cpm/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,48 +2,73 @@
from cpm.models import DSM


def parse_csv(filepath: str, delimiter: str = 'auto', encoding: str = 'utf-8', instigator: str = 'column'):
def parse_csv(file, delimiter: str = 'auto', encoding: str = 'utf-8', instigator: str = 'column'):
"""
Parse CSV to DSM
:param filepath: Targeted CSV file
:param file: Targeted CSV file or file-like object
:param delimiter: CSV delimiter. Defaults to auto-detection.
:param encoding: text-encoding. Defaults to utf-8
:param instigator: Determines directionality of DSM. Defaults to columns instigating rows.
:return: DSM
"""

def read_file(file):
if isinstance(file, str):
with open(file, 'r', encoding=encoding) as f:
return f.read()
elif hasattr(file, 'read'):
position = file.tell()
content = file.read()
file.seek(position)
return content
else:
raise ValueError("Invalid file input. Must be a filepath or a file-like object.")

def get_file_lines(file):
if isinstance(file, str):
with open(file, 'r', encoding=encoding) as f:
return f.readlines()
elif hasattr(file, 'read'):
position = file.tell()
file.seek(0)
lines = file.readlines()
file.seek(position)
return lines
else:
raise ValueError("Invalid file input. Must be a filepath or a file-like object.")

content = read_file(file)

if delimiter == 'auto':
with open(filepath, 'r', encoding=encoding) as file:
delimiter = detect_delimiter(file.read())
delimiter = detect_delimiter(content)

# Identify number of rows, and separate header row
num_cols = 0
column_names = []
with open(filepath, 'r') as file:
for line in file:
column_names.append(line.split(delimiter)[0])
num_cols += 1
lines = get_file_lines(file)
for line in lines:
column_names.append(line.split(delimiter)[0])
num_cols += 1

# We do not want the first column in the header
column_names.pop(0)

data = []

with open(filepath, 'r') as file:
for i, line in enumerate(file):
if i == 0:
for i, line in enumerate(lines):
if i == 0:
continue
data.append([])
for j, col in enumerate(line.split(delimiter)):
if j == 0:
continue
data.append([])
for j, col in enumerate(line.split(delimiter)):
if j == 0:
continue
if col == "":
if col == "":
data[i-1].append(None)
else:
try:
data[i-1].append(float(col))
except ValueError:
data[i-1].append(None)
else:
try:
data[i-1].append(float(col))
except ValueError:
data[i - 1].append(None)

dsm = DSM(matrix=data, columns=column_names, instigator=instigator)

Expand Down

0 comments on commit d377c4f

Please sign in to comment.