######## snakemake preamble start (automatically inserted, do not edit) ########
+import sys; sys.path.extend(['/Users/jbloom/miniconda3/envs/seqneut-pipeline/lib/python3.11/site-packages', '/Users/jbloom/Library/Caches/snakemake/snakemake/source-cache/runtime-cache/tmph9qjx7u5/file/Users/jbloom/Library/CloudStorage/OneDrive-FredHutchinsonCancerCenter/seqneut-pipeline/notebooks', '/Users/jbloom/Library/CloudStorage/OneDrive-FredHutchinsonCancerCenter/seqneut-pipeline/notebooks']); import pickle; snakemake = pickle.loads(b'\x80\x04\x95B:\x00\x00\x00\x00\x00\x00\x8c\x10snakemake.script\x94\x8c\tSnakemake\x94\x93\x94)\x81\x94}\x94(\x8c\x05input\x94\x8c\x0csnakemake.io\x94\x8c\nInputFiles\x94\x93\x94)\x81\x94(\x8c*results/barcode_counts/plate11_none-10.csv\x94\x8c-results/barcode_counts/plate11_M099d30_20.csv\x94\x8c-results/barcode_counts/plate11_M099d30_60.csv\x94\x8c.results/barcode_counts/plate11_M099d30_180.csv\x94\x8c.results/barcode_counts/plate11_M099d30_540.csv\x94\x8c/results/barcode_counts/plate11_M099d30_1620.csv\x94\x8c/results/barcode_counts/plate11_M099d30_4860.csv\x94\x8c0results/barcode_counts/plate11_M099d30_14580.csv\x94\x8c0results/barcode_counts/plate11_M099d30_43740.csv\x94\x8c1results/barcode_counts/plate11_M099d30_131220.csv\x94\x8c1results/barcode_counts/plate11_M099d30_393660.csv\x94\x8c)results/barcode_counts/plate11_none-2.csv\x94\x8c*results/barcode_counts/plate11_none-11.csv\x94\x8c,results/barcode_counts/plate11_M099d0_20.csv\x94\x8c,results/barcode_counts/plate11_M099d0_60.csv\x94\x8c-results/barcode_counts/plate11_M099d0_180.csv\x94\x8c-results/barcode_counts/plate11_M099d0_540.csv\x94\x8c.results/barcode_counts/plate11_M099d0_1620.csv\x94\x8c.results/barcode_counts/plate11_M099d0_4860.csv\x94\x8c/results/barcode_counts/plate11_M099d0_14580.csv\x94\x8c/results/barcode_counts/plate11_M099d0_43740.csv\x94\x8c0results/barcode_counts/plate11_M099d0_131220.csv\x94\x8c0results/barcode_counts/plate11_M099d0_393660.csv\x94\x8c)results/barcode_counts/plate11_none-3.csv\x94\x8c*results/barcode_counts/plate11_none-12.csv\x94\x8c-results/barcode_counts/plate11_Y044d30_20.csv\x94\x8c-results/barcode_counts/plate11_Y044d30_60.csv\x94\x8c.results/barcode_counts/plate11_Y044d30_180.csv\x94\x8c.results/barcode_counts/plate11_Y044d30_540.csv\x94\x8c/results/barcode_counts/plate11_Y044d30_1620.csv\x94\x8c/results/barcode_counts/plate11_Y044d30_4860.csv\x94\x8c0results/barcode_counts/plate11_Y044d30_14580.csv\x94\x8c0results/barcode_counts/plate11_Y044d30_43740.csv\x94\x8c1results/barcode_counts/plate11_Y044d30_131220.csv\x94\x8c1results/barcode_counts/plate11_Y044d30_393660.csv\x94\x8c)results/barcode_counts/plate11_none-4.csv\x94\x8c)results/barcode_fates/plate11_none-10.csv\x94\x8c,results/barcode_fates/plate11_M099d30_20.csv\x94\x8c,results/barcode_fates/plate11_M099d30_60.csv\x94\x8c-results/barcode_fates/plate11_M099d30_180.csv\x94\x8c-results/barcode_fates/plate11_M099d30_540.csv\x94\x8c.results/barcode_fates/plate11_M099d30_1620.csv\x94\x8c.results/barcode_fates/plate11_M099d30_4860.csv\x94\x8c/results/barcode_fates/plate11_M099d30_14580.csv\x94\x8c/results/barcode_fates/plate11_M099d30_43740.csv\x94\x8c0results/barcode_fates/plate11_M099d30_131220.csv\x94\x8c0results/barcode_fates/plate11_M099d30_393660.csv\x94\x8c(results/barcode_fates/plate11_none-2.csv\x94\x8c)results/barcode_fates/plate11_none-11.csv\x94\x8c+results/barcode_fates/plate11_M099d0_20.csv\x94\x8c+results/barcode_fates/plate11_M099d0_60.csv\x94\x8c,results/barcode_fates/plate11_M099d0_180.csv\x94\x8c,results/barcode_fates/plate11_M099d0_540.csv\x94\x8c-results/barcode_fates/plate11_M099d0_1620.csv\x94\x8c-results/barcode_fates/plate11_M099d0_4860.csv\x94\x8c.results/barcode_fates/plate11_M099d0_14580.csv\x94\x8c.results/barcode_fates/plate11_M099d0_43740.csv\x94\x8c/results/barcode_fates/plate11_M099d0_131220.csv\x94\x8c/results/barcode_fates/plate11_M099d0_393660.csv\x94\x8c(results/barcode_fates/plate11_none-3.csv\x94\x8c)results/barcode_fates/plate11_none-12.csv\x94\x8c,results/barcode_fates/plate11_Y044d30_20.csv\x94\x8c,results/barcode_fates/plate11_Y044d30_60.csv\x94\x8c-results/barcode_fates/plate11_Y044d30_180.csv\x94\x8c-results/barcode_fates/plate11_Y044d30_540.csv\x94\x8c.results/barcode_fates/plate11_Y044d30_1620.csv\x94\x8c.results/barcode_fates/plate11_Y044d30_4860.csv\x94\x8c/results/barcode_fates/plate11_Y044d30_14580.csv\x94\x8c/results/barcode_fates/plate11_Y044d30_43740.csv\x94\x8c0results/barcode_fates/plate11_Y044d30_131220.csv\x94\x8c0results/barcode_fates/plate11_Y044d30_393660.csv\x94\x8c(results/barcode_fates/plate11_none-4.csv\x94\x8c-data/viral_libraries/pdmH1N1_lib2023_loes.csv\x94\x8c3data/neut_standard_sets/loes2023_neut_standards.csv\x94e}\x94(\x8c\x06_names\x94}\x94(\x8c\ncount_csvs\x94K\x00K$\x86\x94\x8c\tfate_csvs\x94K$KH\x86\x94\x8c\x11viral_library_csv\x94KHN\x86\x94\x8c\x15neut_standard_set_csv\x94KIN\x86\x94u\x8c\x12_allowed_overrides\x94]\x94(\x8c\x05index\x94\x8c\x04sort\x94eha\x8c\tfunctools\x94\x8c\x07partial\x94\x93\x94h\x06\x8c\x19Namedlist._used_attribute\x94\x93\x94\x85\x94R\x94(hg)}\x94\x8c\x05_name\x94hasNt\x94bhbhehg\x85\x94R\x94(hg)}\x94hkhbsNt\x94bhWh\x06\x8c\tNamedlist\x94\x93\x94)\x81\x94(h\nh\x0bh\x0ch\rh\x0eh\x0fh\x10h\x11h\x12h\x13h\x14h\x15h\x16h\x17h\x18h\x19h\x1ah\x1bh\x1ch\x1dh\x1eh\x1fh h!h"h#h$h%h&h\'h(h)h*h+h,h-e}\x94(hU}\x94h_]\x94(hahbehahehg\x85\x94R\x94(hg)}\x94hkhasNt\x94bhbhehg\x85\x94R\x94(hg)}\x94hkhbsNt\x94bubhYhr)\x81\x94(h.h/h0h1h2h3h4h5h6h7h8h9h:h;h<h=h>h?h@hAhBhChDhEhFhGhHhIhJhKhLhMhNhOhPhQe}\x94(hU}\x94h_]\x94(hahbehahehg\x85\x94R\x94(hg)}\x94hkhasNt\x94bhbhehg\x85\x94R\x94(hg)}\x94hkhbsNt\x94bubh[hRh]hSub\x8c\x06output\x94h\x06\x8c\x0bOutputFiles\x94\x93\x94)\x81\x94(\x8c5results/plates/plate11/process_counts_qc_failures.txt\x94\x8c+results/plates/plate11/frac_infectivity.csv\x94e}\x94(hU}\x94(\x8c\x0bqc_failures\x94K\x00N\x86\x94\x8c\x14frac_infectivity_csv\x94K\x01N\x86\x94uh_]\x94(hahbehahehg\x85\x94R\x94(hg)}\x94hkhasNt\x94bhbhehg\x85\x94R\x94(hg)}\x94hkhbsNt\x94bh\x93h\x8fh\x95h\x90ub\x8c\x06params\x94h\x06\x8c\x06Params\x94\x93\x94)\x81\x94(\x8c\x12pandas.core.series\x94\x8c\x06Series\x94\x93\x94)\x81\x94}\x94(\x8c\x04_mgr\x94\x8c\x1epandas.core.internals.managers\x94\x8c\x12SingleBlockManager\x94\x93\x94)\x81\x94(]\x94\x8c\x18pandas.core.indexes.base\x94\x8c\n_new_Index\x94\x93\x94\x8c\x19pandas.core.indexes.range\x94\x8c\nRangeIndex\x94\x93\x94}\x94(\x8c\x04name\x94N\x8c\x05start\x94K\x00\x8c\x04stop\x94K$\x8c\x04step\x94K\x01u\x86\x94R\x94a]\x94\x8c\x15numpy.core.multiarray\x94\x8c\x0c_reconstruct\x94\x93\x94\x8c\x05numpy\x94\x8c\x07ndarray\x94\x93\x94K\x00\x85\x94C\x01b\x94\x87\x94R\x94(K\x01K$\x85\x94h\xc0\x8c\x05dtype\x94\x93\x94\x8c\x02O8\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01|\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK?t\x94b\x89]\x94(\x8c\x0fplate11_none-10\x94\x8c\x12plate11_M099d30_20\x94\x8c\x12plate11_M099d30_60\x94\x8c\x13plate11_M099d30_180\x94\x8c\x13plate11_M099d30_540\x94\x8c\x14plate11_M099d30_1620\x94\x8c\x14plate11_M099d30_4860\x94\x8c\x15plate11_M099d30_14580\x94\x8c\x15plate11_M099d30_43740\x94\x8c\x16plate11_M099d30_131220\x94\x8c\x16plate11_M099d30_393660\x94\x8c\x0eplate11_none-2\x94\x8c\x0fplate11_none-11\x94\x8c\x11plate11_M099d0_20\x94\x8c\x11plate11_M099d0_60\x94\x8c\x12plate11_M099d0_180\x94\x8c\x12plate11_M099d0_540\x94\x8c\x13plate11_M099d0_1620\x94\x8c\x13plate11_M099d0_4860\x94\x8c\x14plate11_M099d0_14580\x94\x8c\x14plate11_M099d0_43740\x94\x8c\x15plate11_M099d0_131220\x94\x8c\x15plate11_M099d0_393660\x94\x8c\x0eplate11_none-3\x94\x8c\x0fplate11_none-12\x94\x8c\x12plate11_Y044d30_20\x94\x8c\x12plate11_Y044d30_60\x94\x8c\x13plate11_Y044d30_180\x94\x8c\x13plate11_Y044d30_540\x94\x8c\x14plate11_Y044d30_1620\x94\x8c\x14plate11_Y044d30_4860\x94\x8c\x15plate11_Y044d30_14580\x94\x8c\x15plate11_Y044d30_43740\x94\x8c\x16plate11_Y044d30_131220\x94\x8c\x16plate11_Y044d30_393660\x94\x8c\x0eplate11_none-4\x94et\x94ba]\x94h\xb1h\xb4}\x94(h\xb6Nh\xb7K\x00h\xb8K$h\xb9K\x01u\x86\x94R\x94a}\x94\x8c\x060.14.1\x94}\x94(\x8c\x04axes\x94h\xae\x8c\x06blocks\x94]\x94}\x94(\x8c\x06values\x94h\xc6\x8c\x08mgr_locs\x94\x8c\x08builtins\x94\x8c\x05slice\x94\x93\x94K\x00K$K\x01\x87\x94R\x94uaust\x94b\x8c\x04_typ\x94\x8c\x06series\x94\x8c\t_metadata\x94]\x94hka\x8c\x05attrs\x94}\x94\x8c\x06_flags\x94}\x94\x8c\x17allows_duplicate_labels\x94\x88shk\x8c\x06sample\x94ub}\x94(\x8c\x04date\x94\x8c\n2023-09-26\x94\x8c\rviral_library\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c\x11neut_standard_set\x94\x8c\x08loes2023\x94\x8c\x0bsamples_csv\x94\x8c\x1fdata/plates/plate11_samples.csv\x94\x8c\x1cprocess_counts_qc_thresholds\x94}\x94(\x8c\x12avg_barcode_counts\x94K\xfa\x8c\x16min_neut_standard_frac\x94G?tz\xe1G\xae\x14{\x8c\x1fmax_neut_standard_frac_no_serum\x94G?\xb9\x99\x99\x99\x99\x99\x9a\x8c\x18barcode_frac_consistency\x94K\x04\x8c\x16min_viral_barcode_frac\x94G?@bM\xd2\xf1\xa9\xfc\x8c\x1emin_neut_standard_barcode_frac\x94G?tz\xe1G\xae\x14{\x8c\x17min_neut_standard_count\x94M^\x01\x8c min_no_serum_viral_barcode_count\x94K\x05\x8c!min_dilutions_per_serum_replicate\x94K\x04\x8c\x14max_frac_infectivity\x94K\x08u\x8c\x10barcodes_to_drop\x94]\x94\x8c\x10GGTCCATCTCAGATCG\x94a\x8c\rwells_to_drop\x94]\x94(\x8c\x02D6\x94\x8c\x03C12\x94e\x8c\x07samples\x94\x8c\x11pandas.core.frame\x94\x8c\tDataFrame\x94\x93\x94)\x81\x94}\x94(h\xa9h\xaa\x8c\x0cBlockManager\x94\x93\x94(\x8c\x16pandas._libs.internals\x94\x8c\x0f_unpickle_block\x94\x93\x94h\xbfh\xc2K\x00\x85\x94h\xc4\x87\x94R\x94(K\x01K\x03K$\x86\x94h\xcc\x89]\x94(\x8c\x02B1\x94\x8c\x02B2\x94\x8c\x02B3\x94\x8c\x02B4\x94\x8c\x02B5\x94\x8c\x02B6\x94\x8c\x02B7\x94\x8c\x02B8\x94\x8c\x02B9\x94\x8c\x03B10\x94\x8c\x03B11\x94\x8c\x03B12\x94\x8c\x02C1\x94\x8c\x02C2\x94\x8c\x02C3\x94\x8c\x02C4\x94\x8c\x02C5\x94\x8c\x02C6\x94\x8c\x02C7\x94\x8c\x02C8\x94\x8c\x02C9\x94\x8c\x03C10\x94\x8c\x03C11\x94\x8c\x03C12\x94\x8c\x02D1\x94\x8c\x02D2\x94\x8c\x02D3\x94\x8c\x02D4\x94\x8c\x02D5\x94\x8c\x02D6\x94\x8c\x02D7\x94\x8c\x02D8\x94\x8c\x02D9\x94\x8c\x03D10\x94\x8c\x03D11\x94\x8c\x03D12\x94\x8c\x04none\x94\x8c\x07M099d30\x94jc\x01\x00\x00jc\x01\x00\x00jc\x01\x00\x00jc\x01\x00\x00jc\x01\x00\x00jc\x01\x00\x00jc\x01\x00\x00jc\x01\x00\x00jc\x01\x00\x00jb\x01\x00\x00jb\x01\x00\x00\x8c\x06M099d0\x94jd\x01\x00\x00jd\x01\x00\x00jd\x01\x00\x00jd\x01\x00\x00jd\x01\x00\x00jd\x01\x00\x00jd\x01\x00\x00jd\x01\x00\x00jd\x01\x00\x00jb\x01\x00\x00jb\x01\x00\x00\x8c\x07Y044d30\x94je\x01\x00\x00je\x01\x00\x00je\x01\x00\x00je\x01\x00\x00je\x01\x00\x00je\x01\x00\x00je\x01\x00\x00je\x01\x00\x00je\x01\x00\x00jb\x01\x00\x00\x8c*fastqs/Plate11_Noserum2_S2_R1_001.fastq.gz\x94\x8c)fastqs/M099_d30_conc1_S10_R1_001.fastq.gz\x94\x8c)fastqs/M099_d30_conc2_S18_R1_001.fastq.gz\x94\x8c)fastqs/M099_d30_conc3_S26_R1_001.fastq.gz\x94\x8c)fastqs/M099_d30_conc4_S34_R1_001.fastq.gz\x94\x8c)fastqs/M099_d30_conc5_S42_R1_001.fastq.gz\x94\x8c)fastqs/M099_d30_conc6_S50_R1_001.fastq.gz\x94\x8c)fastqs/M099_d30_conc7_S58_R1_001.fastq.gz\x94\x8c)fastqs/M099_d30_conc8_S66_R1_001.fastq.gz\x94\x8c)fastqs/M099_d30_conc9_S74_R1_001.fastq.gz\x94\x8c*fastqs/M099_d30_conc10_S82_R1_001.fastq.gz\x94\x8c,fastqs/Plate11_Noserum10_S90_R1_001.fastq.gz\x94\x8c*fastqs/Plate11_Noserum3_S3_R1_001.fastq.gz\x94\x8c(fastqs/M099_d0_conc1_S11_R1_001.fastq.gz\x94\x8c(fastqs/M099_d0_conc2_S19_R1_001.fastq.gz\x94\x8c(fastqs/M099_d0_conc3_S27_R1_001.fastq.gz\x94\x8c(fastqs/M099_d0_conc4_S35_R1_001.fastq.gz\x94\x8c(fastqs/M099_d0_conc5_S43_R1_001.fastq.gz\x94\x8c(fastqs/M099_d0_conc6_S51_R1_001.fastq.gz\x94\x8c(fastqs/M099_d0_conc7_S59_R1_001.fastq.gz\x94\x8c(fastqs/M099_d0_conc8_S67_R1_001.fastq.gz\x94\x8c(fastqs/M099_d0_conc9_S75_R1_001.fastq.gz\x94\x8c)fastqs/M099_d0_conc10_S83_R1_001.fastq.gz\x94\x8c,fastqs/Plate11_Noserum11_S91_R1_001.fastq.gz\x94\x8c*fastqs/Plate11_Noserum4_S4_R1_001.fastq.gz\x94\x8c)fastqs/Y044_d30_conc1_S12_R1_001.fastq.gz\x94\x8c)fastqs/Y044_d30_conc2_S20_R1_001.fastq.gz\x94\x8c)fastqs/Y044_d30_conc3_S28_R1_001.fastq.gz\x94\x8c)fastqs/Y044_d30_conc4_S36_R1_001.fastq.gz\x94\x8c)fastqs/Y044_d30_conc5_S44_R1_001.fastq.gz\x94\x8c)fastqs/Y044_d30_conc6_S52_R1_001.fastq.gz\x94\x8c)fastqs/Y044_d30_conc7_S60_R1_001.fastq.gz\x94\x8c)fastqs/Y044_d30_conc8_S68_R1_001.fastq.gz\x94\x8c)fastqs/Y044_d30_conc9_S76_R1_001.fastq.gz\x94\x8c*fastqs/Y044_d30_conc10_S84_R1_001.fastq.gz\x94\x8c,fastqs/Plate11_Noserum12_S92_R1_001.fastq.gz\x94et\x94bh\xbfh\xc2K\x00\x85\x94h\xc4\x87\x94R\x94(K\x01K\x03\x85\x94h\xc9\x8c\x02i8\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01<\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK\x00t\x94b\x89C\x18\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x94t\x94bK\x02\x87\x94R\x94j8\x01\x00\x00\x8c\x1apandas.core.arrays.integer\x94\x8c\x0cIntegerArray\x94\x93\x94)\x81\x94}\x94(\x8c\x05_data\x94h\xbfh\xc2K\x00\x85\x94h\xc4\x87\x94R\x94(K\x01K$\x85\x94j\x91\x01\x00\x00\x89B \x01\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x14\x00\x00\x00\x00\x00\x00\x00<\x00\x00\x00\x00\x00\x00\x00\xb4\x00\x00\x00\x00\x00\x00\x00\x1c\x02\x00\x00\x00\x00\x00\x00T\x06\x00\x00\x00\x00\x00\x00\xfc\x12\x00\x00\x00\x00\x00\x00\xf48\x00\x00\x00\x00\x00\x00\xdc\xaa\x00\x00\x00\x00\x00\x00\x94\x00\x02\x00\x00\x00\x00\x00\xbc\x01\x06\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x14\x00\x00\x00\x00\x00\x00\x00<\x00\x00\x00\x00\x00\x00\x00\xb4\x00\x00\x00\x00\x00\x00\x00\x1c\x02\x00\x00\x00\x00\x00\x00T\x06\x00\x00\x00\x00\x00\x00\xfc\x12\x00\x00\x00\x00\x00\x00\xf48\x00\x00\x00\x00\x00\x00\xdc\xaa\x00\x00\x00\x00\x00\x00\x94\x00\x02\x00\x00\x00\x00\x00\xbc\x01\x06\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x14\x00\x00\x00\x00\x00\x00\x00<\x00\x00\x00\x00\x00\x00\x00\xb4\x00\x00\x00\x00\x00\x00\x00\x1c\x02\x00\x00\x00\x00\x00\x00T\x06\x00\x00\x00\x00\x00\x00\xfc\x12\x00\x00\x00\x00\x00\x00\xf48\x00\x00\x00\x00\x00\x00\xdc\xaa\x00\x00\x00\x00\x00\x00\x94\x00\x02\x00\x00\x00\x00\x00\xbc\x01\x06\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x94t\x94b\x8c\x05_mask\x94h\xbfh\xc2K\x00\x85\x94h\xc4\x87\x94R\x94(K\x01K$\x85\x94h\xc9\x8c\x02b1\x94\x89\x88\x87\x94R\x94(K\x03h\xcdNNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK\x00t\x94b\x89C$\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x94t\x94b\x8c\x06_cache\x94}\x94\x8c\x05dtype\x94j\x98\x01\x00\x00\x8c\nInt64Dtype\x94\x93\x94)\x81\x94}\x94j\xaf\x01\x00\x00}\x94(\x8c\x0bnumpy_dtype\x94j\x91\x01\x00\x00\x8c\x04kind\x94\x8c\x01i\x94usbsubj\x04\x01\x00\x00K\x02K\x03K\x01\x87\x94R\x94K\x02\x87\x94R\x94j8\x01\x00\x00j\x9a\x01\x00\x00)\x81\x94}\x94(j\x9d\x01\x00\x00h\xbfh\xc2K\x00\x85\x94h\xc4\x87\x94R\x94(K\x01K$\x85\x94j\x91\x01\x00\x00\x89B \x01\x00\x00\n\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x0c\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x94t\x94bj\xa4\x01\x00\x00h\xbfh\xc2K\x00\x85\x94h\xc4\x87\x94R\x94(K\x01K$\x85\x94j\xab\x01\x00\x00\x89C$\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x94t\x94bj\xaf\x01\x00\x00}\x94j\xb1\x01\x00\x00j\xb4\x01\x00\x00subj\x04\x01\x00\x00K\x03K\x04K\x01\x87\x94R\x94K\x02\x87\x94R\x94j8\x01\x00\x00h\xbfh\xc2K\x00\x85\x94h\xc4\x87\x94R\x94(K\x01K\x01K$\x86\x94h\xcc\x89]\x94(\x8c\x07none-10\x94jc\x01\x00\x00jc\x01\x00\x00jc\x01\x00\x00jc\x01\x00\x00jc\x01\x00\x00jc\x01\x00\x00jc\x01\x00\x00jc\x01\x00\x00jc\x01\x00\x00jc\x01\x00\x00\x8c\x06none-2\x94\x8c\x07none-11\x94jd\x01\x00\x00jd\x01\x00\x00jd\x01\x00\x00jd\x01\x00\x00jd\x01\x00\x00jd\x01\x00\x00jd\x01\x00\x00jd\x01\x00\x00jd\x01\x00\x00jd\x01\x00\x00\x8c\x06none-3\x94\x8c\x07none-12\x94je\x01\x00\x00je\x01\x00\x00je\x01\x00\x00je\x01\x00\x00je\x01\x00\x00je\x01\x00\x00je\x01\x00\x00je\x01\x00\x00je\x01\x00\x00je\x01\x00\x00\x8c\x06none-4\x94et\x94bj\x04\x01\x00\x00K\x05K\x06K\x01\x87\x94R\x94K\x02\x87\x94R\x94j8\x01\x00\x00h\xbfh\xc2K\x00\x85\x94h\xc4\x87\x94R\x94(K\x01K\x01K$\x86\x94h\xcc\x89]\x94(j\xd6\x01\x00\x00\x8c\nM099d30_20\x94\x8c\nM099d30_60\x94\x8c\x0bM099d30_180\x94\x8c\x0bM099d30_540\x94\x8c\x0cM099d30_1620\x94\x8c\x0cM099d30_4860\x94\x8c\rM099d30_14580\x94\x8c\rM099d30_43740\x94\x8c\x0eM099d30_131220\x94\x8c\x0eM099d30_393660\x94j\xd7\x01\x00\x00j\xd8\x01\x00\x00\x8c\tM099d0_20\x94\x8c\tM099d0_60\x94\x8c\nM099d0_180\x94\x8c\nM099d0_540\x94\x8c\x0bM099d0_1620\x94\x8c\x0bM099d0_4860\x94\x8c\x0cM099d0_14580\x94\x8c\x0cM099d0_43740\x94\x8c\rM099d0_131220\x94\x8c\rM099d0_393660\x94j\xd9\x01\x00\x00j\xda\x01\x00\x00\x8c\nY044d30_20\x94\x8c\nY044d30_60\x94\x8c\x0bY044d30_180\x94\x8c\x0bY044d30_540\x94\x8c\x0cY044d30_1620\x94\x8c\x0cY044d30_4860\x94\x8c\rY044d30_14580\x94\x8c\rY044d30_43740\x94\x8c\x0eY044d30_131220\x94\x8c\x0eY044d30_393660\x94j\xdb\x01\x00\x00et\x94bj\x04\x01\x00\x00K\x06K\x07K\x01\x87\x94R\x94K\x02\x87\x94R\x94j8\x01\x00\x00h\xbfh\xc2K\x00\x85\x94h\xc4\x87\x94R\x94(K\x01K\x01K$\x86\x94h\xcc\x89]\x94(h\xd0h\xd1h\xd2h\xd3h\xd4h\xd5h\xd6h\xd7h\xd8h\xd9h\xdah\xdbh\xdch\xddh\xdeh\xdfh\xe0h\xe1h\xe2h\xe3h\xe4h\xe5h\xe6h\xe7h\xe8h\xe9h\xeah\xebh\xech\xedh\xeeh\xefh\xf0h\xf1h\xf2h\xf3et\x94bj\x04\x01\x00\x00K\x07K\x08K\x01\x87\x94R\x94K\x02\x87\x94R\x94j8\x01\x00\x00h\xbfh\xc2K\x00\x85\x94h\xc4\x87\x94R\x94(K\x01K\x01K$\x86\x94h\xcc\x89]\x94(\x8c\x07plate11\x94j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00et\x94bj\x04\x01\x00\x00K\x08K\tK\x01\x87\x94R\x94K\x02\x87\x94R\x94j8\x01\x00\x00h\xbfh\xc2K\x00\x85\x94h\xc4\x87\x94R\x94(K\x01K\x01K$\x86\x94h\xcc\x89]\x94(\x8c\nplate11-10\x94j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00\x8c\tplate11-2\x94\x8c\nplate11-11\x94j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00\x8c\tplate11-3\x94\x8c\nplate11-12\x94j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00j\x18\x02\x00\x00\x8c\tplate11-4\x94et\x94bj\x04\x01\x00\x00K\tK\nK\x01\x87\x94R\x94K\x02\x87\x94R\x94t\x94]\x94(h\xb1h\xaf\x8c\x05Index\x94\x93\x94}\x94(\x8c\x04data\x94h\xbfh\xc2K\x00\x85\x94h\xc4\x87\x94R\x94(K\x01K\n\x85\x94h\xcc\x89]\x94(\x8c\x04well\x94\x8c\x05serum\x94\x8c\x0fdilution_factor\x94\x8c\treplicate\x94\x8c\x05fastq\x94\x8c\x0fserum_replicate\x94\x8c\x0esample_noplate\x94j\x11\x01\x00\x00\x8c\x05plate\x94\x8c\x0fplate_replicate\x94et\x94bh\xb6Nu\x86\x94R\x94h\xbbe\x86\x94R\x94j\x08\x01\x00\x00\x8c\tdataframe\x94j\n\x01\x00\x00]\x94j\x0c\x01\x00\x00}\x94j\x0e\x01\x00\x00}\x94j\x10\x01\x00\x00\x88subue}\x94(hU}\x94(j.\x01\x00\x00K\x00N\x86\x94\x8c\x0cplate_params\x94K\x01N\x86\x94uh_]\x94(hahbehahehg\x85\x94R\x94(hg)}\x94hkhasNt\x94bhbhehg\x85\x94R\x94(hg)}\x94hkhbsNt\x94bj.\x01\x00\x00h\xa7jN\x02\x00\x00j\x12\x01\x00\x00ub\x8c\twildcards\x94h\x06\x8c\tWildcards\x94\x93\x94)\x81\x94\x8c\x07plate11\x94a}\x94(hU}\x94\x8c\x05plate\x94K\x00N\x86\x94sh_]\x94(hahbehahehg\x85\x94R\x94(hg)}\x94hkhasNt\x94bhbhehg\x85\x94R\x94(hg)}\x94hkhbsNt\x94bj@\x02\x00\x00j]\x02\x00\x00ub\x8c\x07threads\x94K\x01\x8c\tresources\x94h\x06\x8c\tResources\x94\x93\x94)\x81\x94(K\x01K\x01\x8c0/var/folders/jj/t2kjg1p146x_z68z7j80z8yw0000gq/T\x94e}\x94(hU}\x94(\x8c\x06_cores\x94K\x00N\x86\x94\x8c\x06_nodes\x94K\x01N\x86\x94\x8c\x06tmpdir\x94K\x02N\x86\x94uh_]\x94(hahbehahehg\x85\x94R\x94(hg)}\x94hkhasNt\x94bhbhehg\x85\x94R\x94(hg)}\x94hkhbsNt\x94bjs\x02\x00\x00K\x01ju\x02\x00\x00K\x01jw\x02\x00\x00jp\x02\x00\x00ub\x8c\x03log\x94h\x06\x8c\x03Log\x94\x93\x94)\x81\x94\x8c3results/plates/plate11/process_counts_plate11.ipynb\x94a}\x94(hU}\x94\x8c\x08notebook\x94K\x00N\x86\x94sh_]\x94(hahbehahehg\x85\x94R\x94(hg)}\x94hkhasNt\x94bhbhehg\x85\x94R\x94(hg)}\x94hkhbsNt\x94bj\x89\x02\x00\x00j\x86\x02\x00\x00ub\x8c\x06config\x94}\x94(\x8c\x10seqneut-pipeline\x94\x8c\x03../\x94\x8c\x04docs\x94\x8c\x07../docs\x94\x8c\x0bdescription\x94X\xba\x01\x00\x00# Test example for [seqneut-pipeline](https://github.com/jbloomlab/seqneut-pipeline)\nThis is a small toy-example created by subsetting a real experiment dataset.\n\nSee [https://github.com/jbloomlab/seqneut-pipeline](https://github.com/jbloomlab/seqneut-pipeline)\nfor the computer code and underlying numerical data.\n\nSee [here](https://github.com/jbloomlab/seqneut-pipeline/graphs/contributors) for a\nlist of all contributors to the pipeline.\n\x94\x8c\x0fviral_libraries\x94}\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c-data/viral_libraries/pdmH1N1_lib2023_loes.csv\x94s\x8c\x17viral_strain_plot_order\x94\x8c data/viral_strain_plot_order.csv\x94\x8c\x12neut_standard_sets\x94}\x94\x8c\x08loes2023\x94\x8c3data/neut_standard_sets/loes2023_neut_standards.csv\x94s\x8c\x1eillumina_barcode_parser_params\x94}\x94(\x8c\x08upstream\x94\x8c\x1fCTCCCTACAATGTCGGATTTGTATTTAATAG\x94\x8c\ndownstream\x94\x8c\x00\x94\x8c\x04minq\x94K\x14\x8c\x11upstream_mismatch\x94K\x04\x8c\x0ebc_orientation\x94\x8c\x02R2\x94u\x8c$default_process_counts_qc_thresholds\x94}\x94(j\x1d\x01\x00\x00K\xfaj\x1e\x01\x00\x00G?tz\xe1G\xae\x14{j\x1f\x01\x00\x00G?\xb9\x99\x99\x99\x99\x99\x9aj \x01\x00\x00K\x04j!\x01\x00\x00G?@bM\xd2\xf1\xa9\xfcj"\x01\x00\x00G?tz\xe1G\xae\x14{j#\x01\x00\x00M^\x01j$\x01\x00\x00K\x05j%\x01\x00\x00K\x04j&\x01\x00\x00K\x08u\x8c\x17default_curvefit_params\x94}\x94(\x8c\x18frac_infectivity_ceiling\x94K\x01\x8c\x06fixtop\x94\x89\x8c\tfixbottom\x94K\x00u\x8c\x06plates\x94}\x94(\x8c\x06plate2\x94}\x94(\x8c\x04date\x94\x8c\x08datetime\x94\x8c\x04date\x94\x93\x94C\x04\x07\xe7\x08\x01\x94\x85\x94R\x94\x8c\rviral_library\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c\x11neut_standard_set\x94\x8c\x08loes2023\x94\x8c\x0bsamples_csv\x94\x8c\x1edata/plates/plate2_samples.csv\x94\x8c\x1cprocess_counts_qc_thresholds\x94}\x94(j\x1d\x01\x00\x00K\xfaj\x1e\x01\x00\x00G?tz\xe1G\xae\x14{j\x1f\x01\x00\x00G?\xb9\x99\x99\x99\x99\x99\x9aj \x01\x00\x00K\x04j!\x01\x00\x00G?@bM\xd2\xf1\xa9\xfcj"\x01\x00\x00G?tz\xe1G\xae\x14{j#\x01\x00\x00M^\x01j$\x01\x00\x00K\x05j%\x01\x00\x00K\x04j&\x01\x00\x00K\x08u\x8c\x10barcodes_to_drop\x94]\x94\x8c\x10GGTCCATCTCAGATCG\x94a\x8c\rwells_to_drop\x94]\x94(\x8c\x02C1\x94\x8c\x03D12\x94e\x8c\x0fcurvefit_params\x94}\x94(j\xb4\x02\x00\x00K\x01j\xb5\x02\x00\x00\x89j\xb6\x02\x00\x00K\x00uuj\x18\x02\x00\x00}\x94(j\x13\x01\x00\x00j\xbe\x02\x00\x00C\x04\x07\xe7\t\x1a\x94\x85\x94R\x94j\x15\x01\x00\x00j\x16\x01\x00\x00j\x17\x01\x00\x00j\x18\x01\x00\x00j\x19\x01\x00\x00j\x1a\x01\x00\x00j\x1b\x01\x00\x00}\x94(j\x1d\x01\x00\x00K\xfaj\x1e\x01\x00\x00G?tz\xe1G\xae\x14{j\x1f\x01\x00\x00G?\xb9\x99\x99\x99\x99\x99\x9aj \x01\x00\x00K\x04j!\x01\x00\x00G?@bM\xd2\xf1\xa9\xfcj"\x01\x00\x00G?tz\xe1G\xae\x14{j#\x01\x00\x00M^\x01j$\x01\x00\x00K\x05j%\x01\x00\x00K\x04j&\x01\x00\x00K\x08uj\'\x01\x00\x00]\x94j)\x01\x00\x00aj*\x01\x00\x00]\x94(j,\x01\x00\x00j-\x01\x00\x00e\x8c\x0fcurvefit_params\x94}\x94(j\xb4\x02\x00\x00K\x01j\xb5\x02\x00\x00\x89j\xb6\x02\x00\x00K\x00uuu\x8c\x1aserum_titers_qc_thresholds\x94}\x94(\x8c\x0emin_replicates\x94K\x02\x8c\x1bmax_fold_change_from_median\x94K\x03u\x8c\x1aserum_titers_qc_exclusions\x94}\x94(\x8c\x06M099d0\x94}\x94(\x8c\x16A/Bangladesh/8002/2021\x94}\x94\x8c\tignore_qc\x94\x88s\x8c\x12A/Brisbane/02/2018\x94}\x94\x8c\tignore_qc\x94\x88s\x8c\x13A/Norway/25089/2022\x94}\x94\x8c\x12replicates_to_drop\x94]\x94\x8c\x18plate11-CGGATAAAAATGATAT\x94as\x8c\x14A/Wisconsin/588/2019\x94}\x94\x8c\x12replicates_to_drop\x94]\x94\x8c\x18plate11-AGTCCTATCCTCAAAT\x94as\x8c\x19A/SouthAfrica/R16462/2021\x94}\x94\x8c\x12replicates_to_drop\x94]\x94\x8c\x17plate2-CTAGCAGATTGTATAA\x94asu\x8c\x07M099d30\x94}\x94(\x8c\x13A/Chester/5355/2022\x94}\x94\x8c\x12replicates_to_drop\x94]\x94\x8c\x18plate11-CCTCAAAATAACAAGC\x94as\x8c\x12A/Michigan/45/2015\x94}\x94\x8c\tignore_qc\x94\x88s\x8c\x12A/Paris/31196/2021\x94}\x94\x8c\tignore_qc\x94\x88su\x8c\x07Y044d30\x94}\x94(\x8c\x16A/Bangladesh/8036/2021\x94}\x94\x8c\tignore_qc\x94\x88s\x8c\x12A/Brisbane/48/2022\x94}\x94\x8c\tignore_qc\x94\x88s\x8c\x1aA/India-PUN-NIV328484/2021\x94}\x94\x8c\tignore_qc\x94\x88s\x8c\x0eA/Perth/1/2022\x94}\x94\x8c\tignore_qc\x94\x88s\x8c\x14A/Washington/23/2020\x94}\x94\x8c\tignore_qc\x94\x88suuu\x8c\x04rule\x94\x8c\x0eprocess_counts\x94\x8c\x0fbench_iteration\x94N\x8c\tscriptdir\x94\x8ca/Users/jbloom/Library/CloudStorage/OneDrive-FredHutchinsonCancerCenter/seqneut-pipeline/notebooks\x94ub.'); from snakemake.logging import logger; logger.printshellcmds = False; import os; os.chdir(r'/Users/jbloom/Library/CloudStorage/OneDrive-FredHutchinsonCancerCenter/seqneut-pipeline/test_example');
+######## snakemake preamble end #########
+
import altair as alt
+
+import pandas as pd
+
+_ = alt.data_transformers.disable_max_rows()
+
Get the variables passed by snakemake
:
count_csvs = snakemake.input.count_csvs
+fate_csvs = snakemake.input.fate_csvs
+viral_library_csv = snakemake.input.viral_library_csv
+neut_standard_set_csv = snakemake.input.neut_standard_set_csv
+frac_infectivity_csv = snakemake.output.frac_infectivity_csv
+qc_failures_file = snakemake.output.qc_failures
+samples = snakemake.params.samples
+plate_params = snakemake.params.plate_params
+plate = snakemake.wildcards.plate
+
+samples_df = plate_params["samples"]
+
+assert len(samples) == len(count_csvs) == len(fate_csvs) == len(samples_df)
+
+print(f"Processing {plate=}")
+
+# define set of QC failures for this plate
+qc_failures = set([])
+
+# get and print QC thresholds
+qc_thresholds = plate_params["process_counts_qc_thresholds"]
+display(pd.Series(qc_thresholds))
+
+# will drop the following samples from `wells_to_drop`
+wells_to_drop = plate_params["wells_to_drop"]
+if wells_to_drop:
+ if not set(wells_to_drop).issubset(samples_df["well"]):
+ raise ValueError(f"{wells_to_drop=} not all in `samples_df`")
+ print("Dropping the following wells (samples):")
+ display(samples_df.query("well in @wells_to_drop"))
+ samples_df = samples_df.query("well not in @wells_to_drop")
+
Processing plate='plate11' ++
avg_barcode_counts 250.0000 +min_neut_standard_frac 0.0050 +max_neut_standard_frac_no_serum 0.1000 +barcode_frac_consistency 4.0000 +min_viral_barcode_frac 0.0005 +min_neut_standard_barcode_frac 0.0050 +min_neut_standard_count 350.0000 +min_no_serum_viral_barcode_count 5.0000 +min_dilutions_per_serum_replicate 4.0000 +max_frac_infectivity 8.0000 +dtype: float64+
Dropping the following wells (samples): ++
+ | well | +serum | +dilution_factor | +replicate | +fastq | +serum_replicate | +sample_noplate | +sample | +plate | +plate_replicate | +
---|---|---|---|---|---|---|---|---|---|---|
23 | +C12 | +none | +<NA> | +3 | +fastqs/Plate11_Noserum11_S91_R1_001.fastq.gz | +none-3 | +none-3 | +plate11_none-3 | +plate11 | +plate11-3 | +
29 | +D6 | +Y044d30 | +1620 | +2 | +fastqs/Y044_d30_conc5_S44_R1_001.fastq.gz | +Y044d30 | +Y044d30_1620 | +plate11_Y044d30_1620 | +plate11 | +plate11 | +
Statistics on barcode-parsing for each sample¶
Make interactive chart of the "fates" of the sequencing reads parsed for each sample on the plate.
+If most sequencing reads are not "valid barcodes", this could potentially indicate some problem in the sequencing or barcode set you are parsing.
+Potential fates are:
+-
+
- valid barcode: barcode that matches a known virus or neutralization standard, we hope most reads are this. +
- invalid barcode: a barcode with proper flanking sequences, but does not match a known virus or neutralization standard. If you have a lot of reads of this type, it is probably a good idea to look at the invalid barcode CSVs (in the
./results/barcode_invalid/
subdirectory created by the pipeline) to see what these invalid barcodes are.
+ - unparseable barcode: could not parse a barcode from this read as there was not a sequence of the correct length with the appropriate flanking sequence. +
- low quality barcode: low-quality or
N
nucleotides in barcode, could indicate problem with sequencing.
+ - failed chastity filter: reads that failed the Illumina chastity filter, if these are reported in the FASTQ (they may not be). +
Also, if the number of reads per sample is very uneven, that could indicate that you did not do a good job of balancing the different samples in the Illumina sequencing.
+fates = (
+ pd.concat([pd.read_csv(f).assign(sample=s) for f, s in zip(fate_csvs, samples)])
+ .merge(samples_df, validate="many_to_one", on="sample")
+ .assign(fate_counts=lambda x: x.groupby("fate")["count"].transform("sum"))
+ .query("fate_counts > 0")[ # only keep fates with at least one count
+ ["fate", "count", "well", "serum", "sample_noplate", "dilution_factor"]
+ ]
+)
+
+assert len(fates) == len(fates.drop_duplicates())
+
+serum_selection = alt.selection_point(
+ fields=["serum"],
+ bind=alt.binding_select(
+ options=[None] + sorted(fates["serum"].unique().tolist()),
+ labels=["all"] + [str(s) for s in sorted(fates["serum"].unique())],
+ name="serum",
+ ),
+)
+
+fates_chart = (
+ alt.Chart(fates)
+ .add_params(serum_selection)
+ .transform_filter(serum_selection)
+ .encode(
+ alt.X("count", scale=alt.Scale(nice=False, padding=3)),
+ alt.Y(
+ "sample_noplate",
+ title=None,
+ sort=list(
+ fates.sort_values(["serum", "dilution_factor"])["sample_noplate"]
+ ),
+ ),
+ alt.Color("fate", sort=sorted(fates["fate"].unique(), reverse=True)),
+ alt.Order("fate", sort="descending"),
+ tooltip=fates.columns.tolist(),
+ )
+ .mark_bar(height={"band": 0.85})
+ .properties(
+ height=alt.Step(10),
+ width=200,
+ title=f"Barcode parsing for {plate}",
+ )
+ .configure_axis(grid=False)
+)
+
+fates_chart
+
Counts per barcode¶
First get the counts per barcode and classification information on these barcodes:
+# get barcode counts
+counts = (
+ pd.concat([pd.read_csv(c).assign(sample=s) for c, s in zip(count_csvs, samples)])
+ .merge(samples_df, validate="many_to_one", on="sample")
+ .drop(columns=["replicate", "plate", "fastq"])
+)
+
+# get classification of barcodes as viral or neut standard
+barcode_class = pd.concat(
+ [
+ pd.read_csv(viral_library_csv)[["barcode", "strain"]].assign(
+ neut_standard=False,
+ ),
+ pd.read_csv(neut_standard_set_csv)[["barcode"]].assign(
+ neut_standard=True,
+ strain=pd.NA,
+ ),
+ ],
+ ignore_index=True,
+)
+
+# merge counts and classification of barcodes
+assert set(counts["barcode"]) == set(barcode_class["barcode"])
+counts = counts.merge(barcode_class, on="barcode", validate="many_to_one")
+
Drop any barcodes that are specified to drop:
+barcodes_to_drop = plate_params["barcodes_to_drop"]
+
+if len(barcodes_to_drop):
+ print(
+ "The following barcodes are specified to drop:\n\t"
+ + "\n\t".join(barcodes_to_drop)
+ )
+ invalid_barcodes = set(barcodes_to_drop) - set(counts["barcode"])
+ if invalid_barcodes:
+ raise ValueError(f"Barcodes to drop do not exist: {invalid_barcodes}")
+ counts = counts.query("barcode not in @barcodes_to_drop")
+
+else:
+ print("No barcodes specified to drop.")
+
The following barcodes are specified to drop: + GGTCCATCTCAGATCG ++
Plot average counts per barcode, and make sure that these pass the QC threshold. +If a sample has inadequate barcode counts, it may not have good enough statistics for accurate analysis:
+avg_barcode_counts = (
+ counts.groupby(
+ ["well", "serum", "dilution_factor", "sample_noplate"],
+ dropna=False,
+ as_index=False,
+ )
+ .aggregate(avg_count=pd.NamedAgg("count", "mean"))
+ .assign(passes_qc=lambda x: x["avg_count"] >= qc_thresholds["avg_barcode_counts"])
+)
+
+avg_barcode_counts_chart = (
+ alt.Chart(avg_barcode_counts)
+ .add_params(serum_selection)
+ .transform_filter(serum_selection)
+ .encode(
+ alt.X(
+ "avg_count",
+ title="average counts per barcode",
+ scale=alt.Scale(nice=False, padding=3),
+ ),
+ alt.Y(
+ "sample_noplate",
+ title=None,
+ sort=list(
+ avg_barcode_counts.sort_values(["serum", "dilution_factor"])[
+ "sample_noplate"
+ ]
+ ),
+ ),
+ alt.Color(
+ "passes_qc",
+ title=f"passes QC threshold {qc_thresholds['avg_barcode_counts']}",
+ scale=alt.Scale(domain=[True, False]),
+ ),
+ tooltip=[
+ alt.Tooltip(c, format=".3g") if avg_barcode_counts[c].dtype == float else c
+ for c in avg_barcode_counts.columns
+ ],
+ )
+ .mark_bar(height={"band": 0.85})
+ .properties(
+ height=alt.Step(10),
+ width=250,
+ title=f"Average barcode counts for {plate}",
+ )
+ .configure_axis(grid=False)
+)
+
+display(avg_barcode_counts_chart)
+
+if not avg_barcode_counts["passes_qc"].all():
+ qc_failures.add("avg_barcode_counts")
+ print(f"\nThe following samples failed {qc_thresholds['avg_barcode_counts']=}")
+ display(avg_barcode_counts.query("not passes_qc").reset_index(drop=True))
+else:
+ print(f"\nAll samples passed {qc_thresholds['avg_barcode_counts']=}")
+
+All samples passed qc_thresholds['avg_barcode_counts']=250 ++
Fraction of counts from neutralization standard¶
Determine the fraction of counts from the neutralization standard in each sample, and make sure this fraction passess the QC threshold.
+neut_standard_fracs = (
+ counts.assign(
+ neut_standard_count=lambda x: x["count"] * x["neut_standard"].astype(int)
+ )
+ .groupby(
+ ["well", "serum", "dilution_factor", "sample_noplate"],
+ dropna=False,
+ as_index=False,
+ )
+ .aggregate(
+ total_count=pd.NamedAgg("count", "sum"),
+ neut_standard_count=pd.NamedAgg("neut_standard_count", "sum"),
+ )
+ .assign(
+ neut_standard_frac=lambda x: x["neut_standard_count"] / x["total_count"],
+ passes_qc=lambda x: (
+ (x["neut_standard_frac"] >= qc_thresholds["min_neut_standard_frac"])
+ & (
+ (x["serum"] != "none")
+ | (
+ x["neut_standard_frac"]
+ <= qc_thresholds["max_neut_standard_frac_no_serum"]
+ )
+ )
+ ),
+ )
+)
+
+neut_standard_qc_desc = (
+ f"neut standard frac >= {qc_thresholds['min_neut_standard_frac']}, "
+ + f"<= {qc_thresholds['max_neut_standard_frac_no_serum']} for no-serum samples"
+)
+
+neut_standard_fracs_chart = (
+ alt.Chart(neut_standard_fracs)
+ .add_params(serum_selection)
+ .transform_filter(serum_selection)
+ .encode(
+ alt.X(
+ "neut_standard_frac",
+ title="fraction of counts from neutralization standard",
+ scale=alt.Scale(nice=False, padding=3),
+ ),
+ alt.Y(
+ "sample_noplate",
+ title=None,
+ sort=list(
+ neut_standard_fracs.sort_values(["serum", "dilution_factor"])[
+ "sample_noplate"
+ ]
+ ),
+ ),
+ alt.Color(
+ "passes_qc",
+ title=neut_standard_qc_desc,
+ scale=alt.Scale(domain=[True, False]),
+ ),
+ tooltip=[
+ alt.Tooltip(c, format=".3g") if neut_standard_fracs[c].dtype == float else c
+ for c in neut_standard_fracs.columns
+ ],
+ )
+ .mark_bar(height={"band": 0.85})
+ .properties(
+ height=alt.Step(10),
+ width=250,
+ title=f"Neutralization-standard fractions for {plate}",
+ )
+ .configure_axis(grid=False)
+ .configure_legend(titleLimit=1000)
+)
+
+display(neut_standard_fracs_chart)
+
+if not neut_standard_fracs["passes_qc"].all():
+ qc_failures.add("min_neut_standard_frac or max_neut_standard_frac_no_serum")
+ print(f"\nThe following samples failed {neut_standard_qc_desc}")
+ display(neut_standard_fracs.query("not passes_qc").reset_index(drop=True))
+else:
+ print(f"\nAll samples passed {neut_standard_qc_desc}")
+
+All samples passed neut standard frac >= 0.005, <= 0.1 for no-serum samples ++
Consistency and minimum fractions for barcodes¶
We examine the fraction of counts attributable to each barcode. We do this splitting the data two ways:
+-
+
Looking at all viral (but not neut-standard) barcodes only for the no-serum samples.
+
+Looking at just the neut-standard barcodes for all samples.
+
+
The reasons is that if the experiment is set up perfectly, these fractions should be the same across all samples for each barcode. +(We do not expect viral barcodes to have consistent fractions across no-serum samples as they will be neutralized differently depending on strain).
+We plot these fractions in interactive plots (you can mouseover points and zoom) so you can identify barcodes that fail the expected consistency QC thresholds.
+We also make sure the barcodes meet specified QC minimum thresholds for all samples, and flag any that do not.
+barcode_selection = alt.selection_point(fields=["barcode"], on="mouseover", empty=False)
+
+# look at all samples for neut standard barcodes, or no-serum samples for all barcodes
+for is_neut_standard, df in counts.groupby("neut_standard"):
+ # process data frame
+ if not is_neut_standard:
+ df = df.query("serum == 'none'")
+ df = df.assign(
+ sample_counts=lambda x: x.groupby("sample")["count"].transform("sum"),
+ count_frac=lambda x: x["count"] / x["sample_counts"],
+ median_count_frac=lambda x: x.groupby("barcode")["count_frac"].transform(
+ "median"
+ ),
+ fold_change_from_median=lambda x: x["count_frac"] / x["median_count_frac"],
+ ).drop(
+ columns=(
+ ["sample", "serum_replicate", "sample_counts", "neut_standard"]
+ + (["strain"] if is_neut_standard else ["dilution_factor"])
+ ),
+ )
+
+ # make chart
+ evenness_chart = (
+ alt.Chart(df)
+ .add_params(barcode_selection)
+ .encode(
+ alt.X(
+ "count_frac",
+ title=(
+ "barcode's fraction of neut standard counts"
+ if is_neut_standard
+ else "barcode's fraction of non-neut standard counts"
+ ),
+ scale=alt.Scale(nice=False, padding=5),
+ ),
+ alt.Y(
+ "sample_noplate",
+ title=None,
+ sort=list(
+ neut_standard_fracs.sort_values(["serum", "dilution_factor"])[
+ "sample_noplate"
+ ]
+ ),
+ ),
+ alt.Fill("barcode", legend=None),
+ strokeWidth=alt.condition(barcode_selection, alt.value(2), alt.value(0)),
+ size=alt.condition(barcode_selection, alt.value(60), alt.value(35)),
+ tooltip=[
+ alt.Tooltip(c, format=".3g") if df[c].dtype == float else c
+ for c in df.columns
+ ],
+ )
+ .mark_circle(fillOpacity=0.6, stroke="black", strokeOpacity=1)
+ .properties(
+ height=alt.Step(10),
+ width=300,
+ title=(
+ f"{plate} all samples, neut-standard barcodes"
+ if is_neut_standard
+ else f"{plate} no-serum samples, all barcodes"
+ ),
+ )
+ .configure_axis(grid=False)
+ .configure_legend(titleLimit=1000)
+ .interactive()
+ )
+
+ if is_neut_standard:
+ evenness_chart = evenness_chart.add_params(serum_selection).transform_filter(
+ serum_selection
+ )
+ print(f"\n\n{'=' * 89}\nAnalyzing neut-standard barcodes from all samples\n")
+ else:
+ print(f"\n\n{'=' * 89}\nAnalyzing all barcodes from no-serum samples\n")
+
+ display(evenness_chart)
+
+ # make sure barcode fractions are reasonably consistent when they should be
+ excess_fold_change = df[
+ (df["fold_change_from_median"] <= 1 / qc_thresholds["barcode_frac_consistency"])
+ | (df["fold_change_from_median"] >= qc_thresholds["barcode_frac_consistency"])
+ ]
+ if len(excess_fold_change):
+ print(
+ f"\nFollowing barcodes failed {qc_thresholds['barcode_frac_consistency']=}"
+ )
+ display(excess_fold_change)
+ qc_failures.add("barcode_frac_consistency")
+ else:
+ print(f"\nPassed {qc_thresholds['barcode_frac_consistency']=}")
+
+ # make sure barcodes have sufficient fraction
+ if is_neut_standard:
+ insufficient_neut_standard_barcode_frac = df[
+ df["count_frac"] < qc_thresholds["min_neut_standard_barcode_frac"]
+ ]
+ if len(insufficient_neut_standard_barcode_frac):
+ print(
+ "\nFollowing barcodes fail "
+ + f"{qc_thresholds['min_neut_standard_barcode_frac']=}"
+ )
+ display(insufficient_neut_standard_barcode_frac)
+ qc_failures.add("min_neut_standard_barcode_frac")
+ else:
+ print(f"\nPassed {qc_thresholds['min_neut_standard_barcode_frac']=}")
+ else:
+ insufficient_viral_barcode_frac = df[
+ df["count_frac"] < qc_thresholds["min_viral_barcode_frac"]
+ ]
+ if len(insufficient_viral_barcode_frac):
+ print(
+ f"\nFollowing barcodes fail {qc_thresholds['min_viral_barcode_frac']=}"
+ )
+ display(insufficient_viral_barcode_frac)
+ qc_failures.add("min_viral_barcode_frac")
+ else:
+ print(f"\nPassed {qc_thresholds['min_viral_barcode_frac']=}")
+
+ +========================================================================================= +Analyzing all barcodes from no-serum samples + ++
+Passed qc_thresholds['barcode_frac_consistency']=4 + +Passed qc_thresholds['min_viral_barcode_frac']=0.0005 + + +========================================================================================= +Analyzing neut-standard barcodes from all samples + ++
+Passed qc_thresholds['barcode_frac_consistency']=4 + +Passed qc_thresholds['min_neut_standard_barcode_frac']=0.005 ++
Compute fraction infectivity¶
The fraction infectivity for viral barcode $v_b$ in sample $s$ is computed as: +$$ +F_{v_b,s} = \frac{c_{v_b,s} / \left(\sum_{n_b} c_{n_b,s}\right)}{{\rm median}_{s_0}\left[ c_{v_b,s_0} / \left(\sum_{n_b} c_{n_b,s_0}\right)\right]} +$$ +where
+-
+
- $c_{v_b,s}$ is the counts of viral barcode $v_b$ in sample $s$. +
- $\sum_{n_b} c_{n_b,s}$ is the sum of the counts for all neutralization standard barcodes $n_b$ for sample $s$. +
- $c_{v_b,s_0}$ is the counts of viral barcode $v_b$ in no-serum sample $s_0$. +
- $\sum_{n_b} c_{n_b,s_0}$ is the sum of the counts for all neutralization standard barcodes $n_b$ for no-serum sample $s_0$. +
- ${\rm median}_{s_0}\left[ c_{v_b,s_0} / \left(\sum_{n_b} c_{n_b,s_0}\right)\right]$ is the median taken across all no-serum samples of the counts of viral barcode $v_b$ versus the total counts for all neutralization standard barcodes. +
First, compute the total neutralization-standard counts for each sample. +Plot these, and make sure they meet the QC threshold.
+neut_standard_counts = (
+ counts.query("neut_standard")
+ .groupby(
+ ["well", "serum", "sample_noplate", "dilution_factor"],
+ dropna=False,
+ as_index=False,
+ )
+ .aggregate(neut_standard_count=pd.NamedAgg("count", "sum"))
+ .assign(
+ passes_qc=lambda x: (
+ x["neut_standard_count"] >= qc_thresholds["min_neut_standard_count"]
+ ),
+ )
+)
+
+neut_standard_counts_chart = (
+ alt.Chart(neut_standard_counts)
+ .add_params(serum_selection)
+ .transform_filter(serum_selection)
+ .encode(
+ alt.X(
+ "neut_standard_count",
+ title="counts from neutralization standard",
+ scale=alt.Scale(nice=False, padding=3),
+ ),
+ alt.Y(
+ "sample_noplate",
+ title=None,
+ sort=list(
+ neut_standard_counts.sort_values(["serum", "dilution_factor"])[
+ "sample_noplate"
+ ]
+ ),
+ ),
+ alt.Color(
+ "passes_qc",
+ title=f"at least {qc_thresholds['min_neut_standard_count']} counts",
+ scale=alt.Scale(domain=[True, False]),
+ ),
+ tooltip=[
+ alt.Tooltip(c, format=".3g")
+ if neut_standard_counts[c].dtype == float
+ else c
+ for c in neut_standard_counts.columns
+ ],
+ )
+ .mark_bar(height={"band": 0.85})
+ .properties(
+ height=alt.Step(10),
+ width=250,
+ title=f"Neutralization-standard counts for {plate}",
+ )
+ .configure_axis(grid=False)
+ .configure_legend(titleLimit=1000)
+)
+
+display(neut_standard_counts_chart)
+
+if (neut_standard_counts["passes_qc"]).all():
+ print(f"\nAll samples pass {qc_thresholds['min_neut_standard_count']=}")
+else:
+ print(f"\nSamples failing {qc_thresholds['min_neut_standard_count']=}")
+ display(neut_standard_counts.query("not passes_qc"))
+ qc_failures.add("min_neut_standard_count")
+
+All samples pass qc_thresholds['min_neut_standard_count']=350 ++
Compute and plot the no-serum sample viral barcode counts and check if they pass the QC filters.
+no_serum_counts = (
+ counts.query("serum == 'none'")
+ .query("not neut_standard")
+ .merge(neut_standard_counts, validate="many_to_one")[
+ ["barcode", "strain", "well", "sample_noplate", "count", "neut_standard_count"]
+ ]
+ .assign(
+ passes_qc=lambda x: (
+ x["count"] >= qc_thresholds["min_no_serum_viral_barcode_count"]
+ ),
+ )
+)
+
+# make chart
+no_serum_counts_chart = (
+ alt.Chart(no_serum_counts)
+ .add_params(barcode_selection)
+ .encode(
+ alt.X(
+ "count", title="viral barcode count", scale=alt.Scale(nice=False, padding=5)
+ ),
+ alt.Y("sample_noplate", title=None),
+ alt.Fill("barcode", legend=None),
+ strokeWidth=alt.condition(barcode_selection, alt.value(2), alt.value(0)),
+ size=alt.condition(barcode_selection, alt.value(60), alt.value(35)),
+ tooltip=no_serum_counts.columns.tolist(),
+ )
+ .mark_circle(fillOpacity=0.6, stroke="black", strokeOpacity=1)
+ .properties(
+ height=alt.Step(10),
+ width=300,
+ title=f"{plate} viral barcode counts in no-serum samples",
+ )
+ .configure_axis(grid=False)
+ .configure_legend(titleLimit=1000)
+ .interactive()
+)
+
+display(no_serum_counts_chart)
+
+# QC check
+if (no_serum_counts["passes_qc"]).all():
+ print(f"\nAll samples pass {qc_thresholds['min_no_serum_viral_barcode_count']=}")
+else:
+ print(f"\nSamples failing {qc_thresholds['min_no_serum_viral_barcode_count']=}")
+ display(no_serum_counts.query("not passes_qc"))
+ qc_failures.add("min_no_serum_viral_barcode_count")
+
+All samples pass qc_thresholds['min_no_serum_viral_barcode_count']=5 ++
Compute and plot the median ratio of viral barcode count to neut standard counts across no-serum samples:
+median_no_serum_ratio = (
+ no_serum_counts.assign(ratio=lambda x: x["count"] / x["neut_standard_count"])
+ .groupby(["barcode", "strain"], as_index=False)
+ .aggregate(median_no_serum_ratio=pd.NamedAgg("ratio", "median"))
+)
+
+strain_selection = alt.selection_point(fields=["strain"], on="mouseover", empty=False)
+
+median_no_serum_ratio_chart = (
+ alt.Chart(median_no_serum_ratio)
+ .add_params(strain_selection)
+ .encode(
+ alt.X(
+ "median_no_serum_ratio",
+ title="median ratio of counts",
+ scale=alt.Scale(nice=False, padding=5),
+ ),
+ alt.Y(
+ "barcode",
+ sort=alt.SortField("median_no_serum_ratio", order="descending"),
+ axis=alt.Axis(labelFontSize=5),
+ ),
+ color=alt.condition(strain_selection, alt.value("orange"), alt.value("gray")),
+ tooltip=[
+ alt.Tooltip(c, format=".3g")
+ if median_no_serum_ratio[c].dtype == float
+ else c
+ for c in median_no_serum_ratio.columns
+ ],
+ )
+ .mark_bar(height={"band": 0.85})
+ .properties(
+ height=alt.Step(5),
+ width=250,
+ title=f"{plate} no-serum median ratio viral barcode to neut-standard barcode",
+ )
+ .configure_axis(grid=False)
+ .configure_legend(titleLimit=1000)
+)
+
+display(median_no_serum_ratio_chart)
+
Compute the actual fraction infectivities, QC check if any are null (from zero counts), and also plot and check if any exceed the max_frac_infectivity
:
frac_infectivity = (
+ counts.query("not neut_standard")
+ .query("serum != 'none'")
+ .merge(median_no_serum_ratio, validate="many_to_one")
+ .merge(
+ neut_standard_counts.drop(columns="passes_qc"),
+ validate="many_to_one",
+ )
+ .assign(
+ frac_infectivity=lambda x: (
+ (x["count"] / x["neut_standard_count"]) / x["median_no_serum_ratio"]
+ ),
+ passes_qc=lambda x: x["frac_infectivity"]
+ <= qc_thresholds["max_frac_infectivity"],
+ )[
+ [
+ "barcode",
+ "strain",
+ "serum",
+ "serum_replicate",
+ "plate_replicate",
+ "dilution_factor",
+ "frac_infectivity",
+ "sample_noplate",
+ "well",
+ "passes_qc",
+ ]
+ ]
+)
+
+assert len(
+ frac_infectivity.groupby(["barcode", "serum", "plate_replicate", "dilution_factor"])
+) == len(frac_infectivity)
+assert frac_infectivity["dilution_factor"].notnull().all()
+
+frac_infectivity_chart = (
+ alt.Chart(frac_infectivity)
+ .add_params(serum_selection, barcode_selection)
+ .transform_filter(serum_selection)
+ .encode(
+ alt.X(
+ "frac_infectivity",
+ title="fraction infectivity",
+ scale=alt.Scale(nice=False, padding=3),
+ ),
+ alt.Y(
+ "sample_noplate",
+ title=None,
+ sort=list(
+ neut_standard_counts.sort_values(["serum", "dilution_factor"])[
+ "sample_noplate"
+ ]
+ ),
+ ),
+ strokeWidth=alt.condition(barcode_selection, alt.value(2), alt.value(0)),
+ size=alt.condition(barcode_selection, alt.value(60), alt.value(35)),
+ color=alt.Color(
+ "passes_qc",
+ title=f"frac_infectivity <= {qc_thresholds['max_frac_infectivity']}",
+ scale=alt.Scale(domain=[True, False]),
+ ),
+ tooltip=[
+ alt.Tooltip(c, format=".3g") if frac_infectivity[c].dtype == float else c
+ for c in frac_infectivity.columns
+ ],
+ )
+ .mark_circle(stroke="black", strokeOpacity=1)
+ .properties(
+ height=alt.Step(10),
+ width=250,
+ title=f"Fraction infectivities for {plate}",
+ )
+ .configure_axis(grid=False)
+ .configure_legend(titleLimit=1000)
+)
+
+display(frac_infectivity_chart)
+
+if not frac_infectivity["passes_qc"].all():
+ print(f"\nSome barcode-samples fail {qc_thresholds['max_frac_infectivity']=}")
+ display(frac_infectivity.query("not passes_qc"))
+ qc_failures.add("max_frac_infectivity")
+else:
+ print(f"\nAll barcode-samples pass {qc_thresholds['max_frac_infectivity']=}")
+
+if frac_infectivity["frac_infectivity"].isnull().any():
+ print("\nSome barcodes have undefined fraction infectivity due to zero counts:")
+ display(frac_infectivity.query("frac_infectivity.isnull()"))
+ qc_failures.add("null_frac_infectivity")
+else:
+ print("\nNo undefined fraction infectivities")
+
+frac_infectivity
+
+All barcode-samples pass qc_thresholds['max_frac_infectivity']=8 + +No undefined fraction infectivities ++
+ | barcode | +strain | +serum | +serum_replicate | +plate_replicate | +dilution_factor | +frac_infectivity | +sample_noplate | +well | +passes_qc | +
---|---|---|---|---|---|---|---|---|---|---|
0 | +CGTTTAAACAATGAAG | +A/India-Pune-Nivcov2221170/2022 | +M099d30 | +M099d30 | +plate11 | +20 | +0.000000 | +M099d30_20 | +B2 | +True | +
1 | +AAATAAGTACGCAAAT | +A/Niger/10217/2021 | +M099d30 | +M099d30 | +plate11 | +20 | +0.000163 | +M099d30_20 | +B2 | +True | +
2 | +TGAGGATAATCACAAG | +A/Michigan/45/2015 | +M099d30 | +M099d30 | +plate11 | +20 | +0.000086 | +M099d30_20 | +B2 | +True | +
3 | +CATGTGAATTCGCCCA | +A/California/07/2009 | +M099d30 | +M099d30 | +plate11 | +20 | +0.000430 | +M099d30_20 | +B2 | +True | +
4 | +GATCCGTACTTTGATT | +A/Belgium/H0038/2022 | +M099d30 | +M099d30 | +plate11 | +20 | +0.000143 | +M099d30_20 | +B2 | +True | +
... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +
3185 | +TGGAAAAGATGTAATA | +A/Norway/25089/2022 | +Y044d30 | +Y044d30 | +plate11 | +393660 | +1.985028 | +Y044d30_393660 | +D11 | +True | +
3186 | +AGTCCTATCCTCAAAT | +A/Wisconsin/588/2019 | +Y044d30 | +Y044d30 | +plate11 | +393660 | +0.944186 | +Y044d30_393660 | +D11 | +True | +
3187 | +GCAATCCCGCAATTTG | +A/Ghana/2080/2020 | +Y044d30 | +Y044d30 | +plate11 | +393660 | +0.885451 | +Y044d30_393660 | +D11 | +True | +
3188 | +ACGGAATCCCCTGAGA | +A/Washington/23/2020 | +Y044d30 | +Y044d30 | +plate11 | +393660 | +0.737193 | +Y044d30_393660 | +D11 | +True | +
3189 | +CAGTTCTGCGACCAGC | +A/Bangladesh/8036/2021 | +Y044d30 | +Y044d30 | +plate11 | +393660 | +0.870721 | +Y044d30_393660 | +D11 | +True | +
3190 rows × 10 columns
+Write fraction infectivities to file:
+print(f"\nWriting fraction infectivities to {frac_infectivity_csv}")
+(
+ frac_infectivity[
+ [
+ "barcode",
+ "strain",
+ "serum",
+ "plate_replicate",
+ "dilution_factor",
+ "frac_infectivity",
+ ]
+ ]
+ .sort_values(["serum", "plate_replicate", "dilution_factor", "barcode"])
+ .to_csv(frac_infectivity_csv, index=False, float_format="%.5g")
+)
+
+Writing fraction infectivities to results/plates/plate11/frac_infectivity.csv ++
Make sure we have enough dilutions with non-null fraction infectivities for each serum-replicate:
+n_dilutions = (
+ frac_infectivity.query("frac_infectivity.notnull()")
+ .groupby("serum_replicate")
+ .aggregate(n_dilutions=pd.NamedAgg("dilution_factor", "nunique"))
+ .assign(
+ fails_qc=lambda x: (
+ x["n_dilutions"] <= qc_thresholds["min_dilutions_per_serum_replicate"]
+ ),
+ )
+)
+
+if n_dilutions["fails_qc"].any():
+ print(f"Failing {qc_thresholds['min_dilutions_per_serum_replicate']=}:")
+ display(n_dilutions.query("fails_qc"))
+ qc_failures.add("min_dilutions_per_serum_replicate")
+else:
+ print(f"Passed {qc_thresholds['min_dilutions_per_serum_replicate']=}:")
+
Passed qc_thresholds['min_dilutions_per_serum_replicate']=4: ++
Summarize all QC failures and write to file:
+qc_failures = "\n".join(sorted(qc_failures))
+
+if qc_failures:
+ print(f"Encountered the following QC failures:\n{qc_failures}")
+else:
+ print("No QC failures")
+
+print(f"\nLogging QC failures to {qc_failures_file}")
+with open(qc_failures_file, "w") as f:
+ f.write(qc_failures)
+
No QC failures + +Logging QC failures to results/plates/plate11/process_counts_qc_failures.txt ++
+