Skip to content

Commit

Permalink
Merge pull request #15 from selfdecode/feature/Axiom
Browse files Browse the repository at this point in the history
Feature/axiom_FGA
  • Loading branch information
adrianodemarino authored Apr 3, 2023
2 parents 90f4077 + ac36066 commit 8deb4ce
Showing 1 changed file with 136 additions and 0 deletions.
136 changes: 136 additions & 0 deletions src/snps/io/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,65 @@
"MT": "MT",
}

CHROMOSOME_FGA = {
"1": "1",
"2": "2",
"3": "3",
"4": "4",
"5": "5",
"6": "6",
"7": "7",
"8": "8",
"9": "9",
"10": "10",
"11": "11",
"12": "12",
"13": "13",
"14": "14",
"15": "15",
"16": "16",
"17": "17",
"18": "18",
"19": "19",
"20": "20",
"21": "21",
"22": "22",
"23": "X",
"24": "X",
"25": "Y",
"26": "MT",
1: "1",
2: "2",
3: "3",
4: "4",
5: "5",
6: "6",
7: "7",
8: "8",
9: "9",
10: "10",
11: "11",
12: "12",
13: "13",
14: "14",
15: "15",
16: "16",
17: "17",
18: "18",
19: "19",
20: "20",
21: "21",
22: "22",
23: "X",
24: "X",
25: "Y",
26: "MT",
"X": "X",
"Y": "Y",
"MT": "MT",
}


def get_empty_snps_dataframe():
"""Get empty dataframe normalized for usage with ``snps``.
Expand Down Expand Up @@ -263,6 +322,11 @@ def read(self):
elif re.match("^#*[ \t]*rsid[, \t]*chr", first_line):
d = self.read_generic(file, compression)
print('SNPs library reader: Generic 3')

elif ("axiom", comments.lower()) and ("Functional Genomic Analysis" in first_line):
d = self.read_FGA(file, compression)
print('SNPs library reader: Functional Genomic Analysis')

elif re.match("^rs[0-9]*[, \t]{1}[1]", first_line):
d = self.read_generic(file, compression, skip=0)
print('SNPs library reader: Generic 4')
Expand Down Expand Up @@ -1309,6 +1373,7 @@ def parse(sep):
compression=compression,
)
df["chrom"] = df["chrom"].map(CHROMOSOME)
df = df[~df.chrom.isna()]
return df
try:
df = parse(",")
Expand Down Expand Up @@ -1339,6 +1404,77 @@ def parse(sep):

return self.read_helper("generic", parser)

def read_FGA(self, file, compression, skip=1):
"""Read and parse generic CSV or TSV file.
Notes
-----
Assumes columns are 'rsid', 'chrom' / 'chromosome', 'pos' / 'position', and 'genotype';
values are comma separated; unreported genotypes are indicated by '--'; and one header row
precedes data. For example:
rsid,chromosome,position,genotype
rs1,1,1,AA
rs2,1,2,CC
rs3,1,3,--
Parameters
----------
file : str
path to file
Returns
-------
dict
result of `read_helper`
"""

def parser():
def parse(sep):
df = pd.read_csv(
file,
sep=sep,
comment='#',
na_values=NA_VALUES,
header=None,
names=["rsid", "chrom", "pos", "genotype"],
usecols=[0, 1, 2, 3],
index_col=0,
dtype=NORMALIZED_DTYPES,
compression=compression,
)
df["chrom"] = df["chrom"].map(CHROMOSOME_FGA)
df = df[~df.chrom.isna()]
return df
try:
df = parse("\t")
except ValueError:
try:
if isinstance(file, io.BufferedIOBase):
file.seek(0)

df = parse("\t")
except ValueError:
if isinstance(file, io.BufferedIOBase):
file.seek(0)

df = pd.read_csv(
file,
sep=None,
na_values=NA_VALUES,
skiprows=skip,
engine="python",
names=["rsid", "chrom", "pos", "genotype"],
usecols=[0, 1, 2, 3],
index_col=0,
dtype=NORMALIZED_DTYPES,
compression=compression,
)
df["chrom"] = df["chrom"].map(CHROMOSOME_FGA)
return (df,)

return self.read_helper("generic", parser)

def read_vcf(self, file, compression, provider, rsids=()):
"""Read and parse VCF file.
Expand Down

0 comments on commit 8deb4ce

Please sign in to comment.