diff --git a/finetune.py b/finetune.py index d986903..079de6d 100644 --- a/finetune.py +++ b/finetune.py @@ -17,17 +17,17 @@ from sklearn.model_selection import train_test_split from torch.utils.data import DataLoader -from lib.data import FinetuneDataset -from lib.tokenizer import ConceptTokenizer -from lib.utils import ( +from odyssey.data.dataset import FinetuneDataset +from odyssey.data.tokenizer import ConceptTokenizer +from odyssey.models.cehr_big_bird.model import BigBirdFinetune, BigBirdPretrain +from odyssey.models.cehr_bert.model import BertFinetune, BertPretrain +from odyssey.models.utils import ( get_latest_checkpoint, get_run_id, load_config, load_finetune_data, seed_everything, ) -from models.big_bird_cehr.model import BigBirdFinetune, BigBirdPretrain -from models.cehr_bert.model import BertFinetune, BertPretrain def main( diff --git a/models/__init__.py b/models/__init__.py deleted file mode 100644 index 923ee6a..0000000 --- a/models/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -""" -models.py. - -Includes different model implementation such as XGBoost, Bi-LSTM, -CEHR-BERT, and BigBird on the MIMIC-IV FHIR dataset. -""" diff --git a/odyssey/__init__.py b/odyssey/__init__.py new file mode 100644 index 0000000..c94d3eb --- /dev/null +++ b/odyssey/__init__.py @@ -0,0 +1 @@ +"""Odyssey package.""" diff --git a/odyssey/data/__init__.py b/odyssey/data/__init__.py new file mode 100644 index 0000000..6ca91a7 --- /dev/null +++ b/odyssey/data/__init__.py @@ -0,0 +1 @@ +"""Data sub-package.""" diff --git a/data/bigbird_data/DataChecker.ipynb b/odyssey/data/bigbird_data/DataChecker.ipynb similarity index 100% rename from data/bigbird_data/DataChecker.ipynb rename to odyssey/data/bigbird_data/DataChecker.ipynb diff --git a/lib/data.py b/odyssey/data/dataset.py similarity index 99% rename from lib/data.py rename to odyssey/data/dataset.py index 8a6dbed..d46a90a 100644 --- a/lib/data.py +++ b/odyssey/data/dataset.py @@ -6,7 +6,7 @@ import torch from torch.utils.data import Dataset -from lib.tokenizer import ConceptTokenizer +from odyssey.data.tokenizer import ConceptTokenizer class PretrainDataset(Dataset): diff --git a/odyssey/data/mimiciv/__init__.py b/odyssey/data/mimiciv/__init__.py new file mode 100644 index 0000000..a8885a4 --- /dev/null +++ b/odyssey/data/mimiciv/__init__.py @@ -0,0 +1 @@ +"""MIMICIV specific data processing module.""" diff --git a/data/collect.py b/odyssey/data/mimiciv/collect.py similarity index 89% rename from data/collect.py rename to odyssey/data/mimiciv/collect.py index 61b2f15..ce5d6a2 100644 --- a/data/collect.py +++ b/odyssey/data/mimiciv/collect.py @@ -1,6 +1,7 @@ """Collect data from the FHIR database and save to csv files.""" import json +import logging import os from ast import literal_eval from typing import Any, Dict, List, Optional @@ -17,6 +18,13 @@ from sqlalchemy import MetaData, Table, create_engine, select from tqdm import tqdm +from odyssey.utils.log import setup_logging + + +# Logging. +LOGGER = logging.getLogger(__name__) +setup_logging(print_level="INFO", logger=LOGGER) + PATIENT = "patient" ENCOUNTER = "encounter" @@ -212,7 +220,8 @@ def get_patient_data(self) -> None: ) buffer = [] results = self.execute_query(DATA_COLLECTION_CONFIG[PATIENT]["table_name"]) - for p in tqdm(results, desc="Processing patients", unit=PATIENT): + LOGGER.info("Fetching patient data ...") + for p in tqdm(results, desc="Processing patients", unit="patients"): patient = Patient(p) patient_data = { "patient_id": patient.id, @@ -239,7 +248,7 @@ def get_patient_data(self) -> None: def get_encounter_data(self) -> None: """Get encounter data from the database and save to a csv file.""" try: - patients = pd.read_csv(self.csv_dir + "/patients.csv") + patients = pd.read_csv(os.path.join(self.csv_dir, "patients.csv")) except FileNotFoundError: print("Patients file not found. Please run get_patient_data() first.") return @@ -249,10 +258,11 @@ def get_encounter_data(self) -> None: ) buffer = [] outpatient_ids = [] + LOGGER.info("Fetching encounter data ...") for _, patient_id in tqdm( patients["patient_id"].items(), desc="Processing patients", - unit="patient", + unit="patients", ): results = self.execute_query( DATA_COLLECTION_CONFIG[ENCOUNTER]["table_name"], @@ -295,12 +305,12 @@ def get_encounter_data(self) -> None: flush=True, ) patients = patients[~patients["patient_id"].isin(outpatient_ids)] - patients.to_csv(self.csv_dir + "/inpatient.csv", index=False) + patients.to_csv(os.path.join(self.csv_dir, "inpatient.csv"), index=False) def get_procedure_data(self) -> None: """Get procedure data from the database and save to a csv file.""" try: - patients = pd.read_csv(self.csv_dir + "/inpatient.csv") + patients = pd.read_csv(os.path.join(self.csv_dir, "inpatient.csv")) except FileNotFoundError: print( "Encounters (inpatient) file not found. Please run get_encounter_data() first.", @@ -312,10 +322,11 @@ def get_procedure_data(self) -> None: ) procedure_vocab = set() buffer = [] + LOGGER.info("Fetching procedure data ...") for _, patient_id in tqdm( patients["patient_id"].items(), desc="Processing patients", - unit="patient", + unit="patients", ): results = self.execute_query("procedure", patient_id) proc_codes = [] @@ -358,13 +369,13 @@ def get_procedure_data(self) -> None: save_path, flush=True, ) - with open(self.vocab_dir + "/procedure_vocab.json", "w") as f: + with open(os.path.join(self.vocab_dir, "procedure_vocab.json"), "w") as f: json.dump(list(procedure_vocab), f) def get_medication_data(self) -> None: """Get medication data from the database and save to a csv file.""" try: - patients = pd.read_csv(self.csv_dir + "/inpatient.csv") + patients = pd.read_csv(os.path.join(self.csv_dir, "inpatient.csv")) except FileNotFoundError: print("Patients file not found. Please run get_encounter_data() first.") return @@ -377,11 +388,12 @@ def get_medication_data(self) -> None: save_path = os.path.join(self.csv_dir, "med_requests.csv") med_vocab = set() buffer = [] + LOGGER.info("Fetching medication data ...") with self.engine.connect() as connection: for _, patient_id in tqdm( patients["patient_id"].items(), desc="Processing patients", - unit="patient", + unit="patients", ): results = self.execute_query( DATA_COLLECTION_CONFIG[MEDICATION]["table_name"], @@ -437,13 +449,13 @@ def get_medication_data(self) -> None: save_path, flush=True, ) - with open(self.vocab_dir + "/med_vocab.json", "w") as f: + with open(os.path.join(self.vocab_dir, "med_vocab.json"), "w") as f: json.dump(list(med_vocab), f) def get_lab_data(self) -> None: """Get lab data from the database and save to a csv file.""" try: - patients = pd.read_csv(self.csv_dir + "/inpatient.csv") + patients = pd.read_csv(os.path.join(self.csv_dir, "inpatient.csv")) except FileNotFoundError: print("Patients file not found. Please run get_encounter_data() first.") return @@ -451,10 +463,11 @@ def get_lab_data(self) -> None: lab_vocab = set() all_units = {} buffer = [] + LOGGER.info("Fetching lab data ...") for _, patient_id in tqdm( patients["patient_id"].items(), desc="Processing patients", - unit="patient", + unit="patients", ): results = self.execute_query( DATA_COLLECTION_CONFIG[LAB]["table_name"], @@ -506,10 +519,10 @@ def get_lab_data(self) -> None: save_path, flush=True, ) - with open(self.vocab_dir + "/lab_vocab.json", "w") as f: + with open(os.path.join(self.vocab_dir, "lab_vocab.json"), "w") as f: json.dump(list(lab_vocab), f) all_units = {k: list(v) for k, v in all_units.items()} - with open(self.vocab_dir + "/lab_units.json", "w") as f: + with open(os.path.join(self.vocab_dir, "lab_units.json"), "w") as f: json.dump(all_units, f) def filter_lab_data( @@ -517,20 +530,21 @@ def filter_lab_data( ) -> None: """Filter out lab codes that have more than one units.""" try: - labs = pd.read_csv(self.csv_dir + "/labs.csv") - with open(self.vocab_dir + "/lab_vocab.json", "r") as f: + labs = pd.read_csv(os.path.join(self.csv_dir, "labs.csv")) + with open(os.path.join(self.vocab_dir, "lab_vocab.json"), "r") as f: lab_vocab = json.load(f) - with open(self.vocab_dir + "/lab_units.json", "r") as f: + with open(os.path.join(self.vocab_dir, "lab_units.json"), "r") as f: lab_units = json.load(f) except FileNotFoundError: print("Labs file not found. Please run get_lab_data() first.") return + LOGGER.info("Filtering lab data ...") for code, units in lab_units.items(): if len(units) > 1: lab_vocab.remove(code) labs = labs.apply(lambda x: filter_lab_codes(x, lab_vocab), axis=1) - labs.to_csv(self.csv_dir + "/filtered_labs.csv", index=False) - with open(self.vocab_dir + "/lab_vocab.json", "w") as f: + labs.to_csv(os.path.join(self.csv_dir, "filtered_labs.csv"), index=False) + with open(os.path.join(self.vocab_dir, "lab_vocab.json"), "w") as f: json.dump(list(lab_vocab), f) def process_lab_values(self, num_bins: int = 5) -> None: @@ -540,10 +554,11 @@ def process_lab_values(self, num_bins: int = 5) -> None: ---------- num_bins : int, optional number of bins, by default 5 + """ try: - labs = pd.read_csv(self.csv_dir + "/filtered_labs.csv") - with open(self.vocab_dir + "/lab_vocab.json", "r") as f: + labs = pd.read_csv(os.path.join(self.csv_dir, "filtered_labs.csv")) + with open(os.path.join(self.vocab_dir, "lab_vocab.json"), "r") as f: lab_vocab = json.load(f) except FileNotFoundError: print("Labs file not found. Please run get_lab_data() first.") @@ -565,6 +580,7 @@ def assign_to_quantile_bins(row: pd.Series) -> pd.Series: row["binned_values"] = binned_values return row + LOGGER.info("Processing lab values ...") labs = labs.apply(apply_eval, axis=1) quantile_bins = {} for code in lab_vocab: @@ -582,19 +598,19 @@ def assign_to_quantile_bins(row: pd.Series) -> pd.Series: ).categories labs = labs.apply(assign_to_quantile_bins, axis=1) - labs.to_csv(self.csv_dir + "/processed_labs.csv", index=False) + labs.to_csv(os.path.join(self.csv_dir, "processed_labs.csv"), index=False) lab_vocab_binned = [] lab_vocab_binned.extend( [f"{code}_{i}" for code in lab_vocab for i in range(num_bins)], ) - with open(self.vocab_dir + "/lab_vocab.json", "w") as f: + with open(os.path.join(self.vocab_dir + "lab_vocab.json"), "w") as f: json.dump(lab_vocab_binned, f) def get_condition_data(self) -> None: """Get condition data from the database and save to a csv file.""" try: - patients = pd.read_csv(self.csv_dir + "/inpatient.csv") + patients = pd.read_csv(os.path.join(self.csv_dir, "inpatient.csv")) except FileNotFoundError: print("Patients file not found. Please run get_encounter_data() first.") return @@ -603,10 +619,11 @@ def get_condition_data(self) -> None: condition_counts = {} condition_systems = {} buffer = [] + LOGGER.info("Fetching condition data ...") for _, patient_id in tqdm( patients["patient_id"].items(), desc="Processing patients", - unit="patient", + unit="patients", ): patient_conditions_counted = set() results = self.execute_query( @@ -650,7 +667,7 @@ def get_condition_data(self) -> None: save_path, flush=True, ) - with open(self.vocab_dir + "/condition_vocab.json", "w") as f: + with open(os.path.join(self.vocab_dir, "condition_vocab.json"), "w") as f: json.dump(list(condition_vocab), f) sorted_conditions = sorted( condition_counts.items(), @@ -658,18 +675,18 @@ def get_condition_data(self) -> None: reverse=True, ) sorted_dict = dict(sorted_conditions) - with open(self.vocab_dir + "/condition_counts.json", "w") as f: + with open(os.path.join(self.vocab_dir, "condition_counts.json"), "w") as f: json.dump(sorted_dict, f) - - with open(self.vocab_dir + "/condition_systems.json", "w") as f: + with open(os.path.join(self.vocab_dir, "condition_systems.json"), "w") as f: json.dump(condition_systems, f) def group_conditions(self) -> None: """Group conditions into categories.""" - with open(self.vocab_dir + "/condition_counts.json", "r") as file: + with open(os.path.join(self.vocab_dir, "condition_counts.json"), "r") as file: data = json.load(file) - with open(self.vocab_dir + "/condition_systems.json", "r") as file: + with open(os.path.join(self.vocab_dir, "condition_systems.json"), "r") as file: systems = json.load(file) + LOGGER.info("Grouping conditions ...") grouped_data = {} for code, info in data.items(): prefix = code[:3] @@ -686,7 +703,10 @@ def group_conditions(self) -> None: reverse=True, ), ) - with open(self.vocab_dir + "condition_categories.json", "w") as file: + with open( + os.path.join(self.vocab_dir, "condition_categories.json"), + "w", + ) as file: json.dump(sorted_grouped_data, file, indent=4) diff --git a/data/sequence.py b/odyssey/data/sequence.py similarity index 83% rename from data/sequence.py rename to odyssey/data/sequence.py index 32cb814..e11aa8d 100644 --- a/data/sequence.py +++ b/odyssey/data/sequence.py @@ -1,14 +1,22 @@ """Create patient sequences from the events dataframes.""" -import ast import json +import logging import os +import time +from ast import literal_eval from datetime import datetime from typing import Any, Dict, List, Optional, Union import numpy as np import pandas as pd from dateutil import parser +from odyssey.utils.log import setup_logging + + +# Logging. +LOGGER = logging.getLogger(__name__) +setup_logging(print_level="INFO", logger=LOGGER) class SequenceGenerator: @@ -160,13 +168,13 @@ def _get_valid_encounters( ------- pd.DataFrame """ - proc_encounters = set(eval(procedure_row["encounter_ids"])) - med_encounters = set(eval(medication_row["encounter_ids"])) - lab_encounters = set(eval(lab_row["encounter_ids"])) + proc_encounters = set(literal_eval(procedure_row["encounter_ids"])) + med_encounters = set(literal_eval(medication_row["encounter_ids"])) + lab_encounters = set(literal_eval(lab_row["encounter_ids"])) valid_encounters = proc_encounters.union(med_encounters, lab_encounters) for c_name in ["encounter_ids", "starts", "ends"]: - encounter_row[c_name] = eval(encounter_row[c_name]) + encounter_row[c_name] = literal_eval(encounter_row[c_name]) encounter_ids = encounter_row["encounter_ids"] encounter_starts = encounter_row["starts"] encounter_ends = encounter_row["ends"] @@ -348,13 +356,13 @@ def _edit_datetimes( "lab_values", "lab_units", ]: - row[name] = eval(row[name]) + row[name] = literal_eval(row[name]) elif concept_name == "med": for name in ["encounter_ids", "med_dates", "med_codes"]: - row[name] = eval(row[name]) + row[name] = literal_eval(row[name]) elif concept_name == "proc": for name in ["encounter_ids", "proc_dates", "proc_codes"]: - row[name] = eval(row[name]) + row[name] = literal_eval(row[name]) if row["length"] == 0: return row dates = [] @@ -743,7 +751,7 @@ def _get_mortality_label( death_code = death_date - last_code_date if death_code.days < 0: - print("after death events", row["patient_id"]) + LOGGER.info(f"After death events {row['patient_id']}") self.after_death_events.append(row["patient_id"]) row["deceased"] = 1 @@ -978,163 +986,153 @@ def create_patient_sequence( f"{self.data_dir}/conditions.csv", ] rounds = 0 + more_chunks = True readers = [pd.read_csv(path, chunksize=chunksize) for path in file_paths] - while True: + while more_chunks: try: - # read dataframes - if rounds <= 8: - dataframes = [ - next(reader).reset_index(drop=True) for reader in readers - ] - rounds += 1 - continue dataframes = [next(reader).reset_index(drop=True) for reader in readers] - patients, encounters, procedures, medications, labs, conditions = ( - dataframes - ) - - ## process encounters - # keep valid encounters - encounters = encounters.apply( - lambda row: self._get_valid_encounters( - row, - procedures.iloc[row.name], - medications.iloc[row.name], - labs.iloc[row.name], - ), - axis=1, - ) - # keep valid rows with at least one encounter - valid_indices = encounters[encounters["length"] > 0].index - encounters = encounters.loc[valid_indices].reset_index(drop=True) - patients = patients.loc[valid_indices].reset_index(drop=True) - # sort encounters by start time - encounters = encounters.apply(self._sort_encounters, axis=1) - # get ages of the patients at the time of the encounters - encounters = encounters.apply( - lambda row: self._get_encounters_age(row, patients.iloc[row.name]), - axis=1, - ) - # get time of the encounters in weeks with respect to a reference time - encounters = encounters.apply( - lambda row: self._get_encounters_time(row), - axis=1, - ) - # calculate intervals between encounters - encounters = encounters.apply(self._calculate_intervals, axis=1) - ## process events - # keep valid rows and edit the datetimes of the events - # procedures - procedures = procedures.loc[valid_indices].reset_index(drop=True) - procedures = procedures.apply( - lambda row: self._edit_datetimes( - row, - encounters.iloc[row.name], - "proc", - ), - axis=1, - ) - # medications - medications = medications.loc[valid_indices].reset_index(drop=True) - medications = medications.apply( - lambda row: self._edit_datetimes( - row, - encounters.iloc[row.name], - "med", - ), - axis=1, - ) - # labs - labs = labs.loc[valid_indices].reset_index(drop=True) - labs = labs.apply( - lambda row: self._edit_datetimes( - row, - encounters.iloc[row.name], - "lab", - ), - axis=1, - ) - # conditions - conditions = conditions.loc[valid_indices].reset_index(drop=True) - conditions["encounter_conditions"] = conditions[ - "encounter_conditions" - ].apply(ast.literal_eval) - - # combine events - combined_events = self._concat_concepts( - procedures, - medications, - labs, - encounters, - conditions=conditions if condition_per_encounter else None, - ) - # filter patients based on min_events - combined_events = combined_events[ - combined_events["length"] > min_events - ] - # add elapsed time after admission for all events - combined_events = combined_events.apply( - lambda row: self._time_after_admission( - row, - encounters.iloc[row.name], - ), - axis=1, - ) - # add special tokens to the events - combined_events = combined_events.apply( - lambda row: self._add_tokens(row, encounters.iloc[row.name]), - axis=1, - ) - # filter patients based on min_visits - combined_events = combined_events[ - combined_events["num_visits"] > min_visits - ] - # get mortality label - combined_events = combined_events.apply( - lambda row: self._get_mortality_label( - row, - patients.iloc[row.name], - encounters.iloc[row.name], - ), - axis=1, - ) - combined_events = combined_events[ - ~combined_events["patient_id"].isin(self.after_death_events) - ] - combined_events = combined_events.apply( - lambda row: self._truncate_or_pad(row, pad_events=pad_events), - axis=1, - ) - # get condition label for common and rare conditions - combined_events = combined_events.apply( - lambda row: self._get_condition_label( - row, - conditions.iloc[row.name], - ), - axis=1, - ) - # drop rows with nan values for events if any - combined_events = combined_events.dropna( - subset=[f"event_tokens_{self.max_seq_length}"], - ) - # save the combined events - combined_events_all = combined_events[self.get_all_column_names] - combined_events_max = combined_events[self.get_max_column_names] - - combined_events_all.to_parquet( - self.all_dir + f"/patient_sequences_{rounds}.parquet", - engine="pyarrow", - ) - combined_events_max.to_parquet( - self.max_dir - + f"/patient_sequences_{self.max_seq_length}_{rounds}.parquet", - engine="pyarrow", - ) - print(f"Round {rounds} done") - rounds += 1 - except StopIteration: + more_chunks = False break + patients, encounters, procedures, medications, labs, conditions = dataframes + ## Process encounters + # Keep valid encounters + encounters = encounters.apply( + lambda row: self._get_valid_encounters( + row, + procedures.iloc[row.name], + medications.iloc[row.name], + labs.iloc[row.name], + ), + axis=1, + ) + # keep valid rows with at least one encounter + start = time.time() + valid_indices = encounters[encounters["length"] > 0].index + encounters = encounters.loc[valid_indices].reset_index(drop=True) + patients = patients.loc[valid_indices].reset_index(drop=True) + # sort encounters by start time + encounters = encounters.apply(self._sort_encounters, axis=1) + # get ages of the patients at the time of the encounters + encounters = encounters.apply( + lambda row: self._get_encounters_age(row, patients.iloc[row.name]), + axis=1, + ) + # get time of the encounters in weeks with respect to a reference time + encounters = encounters.apply( + lambda row: self._get_encounters_time(row), + axis=1, + ) + # calculate intervals between encounters + encounters = encounters.apply(self._calculate_intervals, axis=1) + ## process events + # keep valid rows and edit the datetimes of the events + # procedures + procedures = procedures.loc[valid_indices].reset_index(drop=True) + procedures = procedures.apply( + lambda row: self._edit_datetimes( + row, + encounters.iloc[row.name], + "proc", + ), + axis=1, + ) + # medications + medications = medications.loc[valid_indices].reset_index(drop=True) + medications = medications.apply( + lambda row: self._edit_datetimes( + row, + encounters.iloc[row.name], + "med", + ), + axis=1, + ) + # labs + labs = labs.loc[valid_indices].reset_index(drop=True) + labs = labs.apply( + lambda row: self._edit_datetimes( + row, + encounters.iloc[row.name], + "lab", + ), + axis=1, + ) + # conditions + conditions = conditions.loc[valid_indices].reset_index(drop=True) + conditions["encounter_conditions"] = conditions[ + "encounter_conditions" + ].apply(literal_eval) + + # combine events + combined_events = self._concat_concepts( + procedures, + medications, + labs, + encounters, + conditions=conditions if condition_per_encounter else None, + ) + # filter patients based on min_events + combined_events = combined_events[combined_events["length"] > min_events] + # add elapsed time after admission for all events + combined_events = combined_events.apply( + lambda row: self._time_after_admission( + row, + encounters.iloc[row.name], + ), + axis=1, + ) + # add special tokens to the events + combined_events = combined_events.apply( + lambda row: self._add_tokens(row, encounters.iloc[row.name]), + axis=1, + ) + # filter patients based on min_visits + combined_events = combined_events[ + combined_events["num_visits"] > min_visits + ] + # get mortality label + combined_events = combined_events.apply( + lambda row: self._get_mortality_label( + row, + patients.iloc[row.name], + encounters.iloc[row.name], + ), + axis=1, + ) + combined_events = combined_events[ + ~combined_events["patient_id"].isin(self.after_death_events) + ] + combined_events = combined_events.apply( + lambda row: self._truncate_or_pad(row, pad_events=pad_events), + axis=1, + ) + # get condition label for common and rare conditions + combined_events = combined_events.apply( + lambda row: self._get_condition_label( + row, + conditions.iloc[row.name], + ), + axis=1, + ) + # drop rows with nan values for events if any + combined_events = combined_events.dropna( + subset=[f"event_tokens_{self.max_seq_length}"], + ) + # save the combined events + combined_events_all = combined_events[self.get_all_column_names] + combined_events_max = combined_events[self.get_max_column_names] + + combined_events_all.to_parquet( + self.all_dir + f"/patient_sequences_{rounds}.parquet", + engine="pyarrow", + ) + combined_events_max.to_parquet( + self.max_dir + + f"/patient_sequences_{self.max_seq_length}_{rounds}.parquet", + engine="pyarrow", + ) + LOGGER.info(f"Round {rounds} done") + rounds += 1 def reapply_truncation( self, @@ -1176,9 +1174,9 @@ def reapply_truncation( if __name__ == "__main__": generator = SequenceGenerator( max_seq_length=2048, - data_dir="/mnt/data/odyssey/mimiciv_fhir/csv_files", - json_dir="/mnt/data/odyssey/mimiciv_fhir/json_files", - save_dir="/mnt/data/odyssey/mimiciv_fhir/parquet_files", + data_dir="/mnt/data/odyssey/mimiciv_fhir1/csv_files", + json_dir="/mnt/data/odyssey/mimiciv_fhir1/vocab", + save_dir="/mnt/data/odyssey/mimiciv_fhir1/parquet_files", ) generator.create_patient_sequence( chunksize=20000, diff --git a/lib/tokenizer.py b/odyssey/data/tokenizer.py similarity index 59% rename from lib/tokenizer.py rename to odyssey/data/tokenizer.py index a2f9619..f5b16ce 100644 --- a/lib/tokenizer.py +++ b/odyssey/data/tokenizer.py @@ -1,8 +1,4 @@ -""" -file: tokenizer.py. - -Custom HuggingFace tokenizer for medical concepts in MIMIC-IV FHIR dataset. -""" +"""Tokenizer module.""" import glob import json @@ -15,7 +11,47 @@ class ConceptTokenizer: - """Tokenizer for event concepts using HuggingFace Library.""" + """Tokenizer for event concepts using HuggingFace Library. + + Parameters + ---------- + pad_token: str + Padding token. + mask_token: str + Mask token. + start_token: str + Sequence Start token. + end_token: str + Sequence End token. + class_token: str + Class token. + reg_token: str + Registry token. + unknown_token: str + Unknown token. + data_dir: str + Directory containing the data. + tokenizer_object: Optional[Tokenizer] + Tokenizer object. + tokenizer: Optional[PreTrainedTokenizerFast] + Tokenizer object. + + Attributes + ---------- + pad_token: str + Padding token. + mask_token: str + Mask token. + unknown_token: str + Unknown token. + special_tokens: List[str] + Special tokens. + tokenizer: PreTrainedTokenizerFast + Tokenizer object. + tokenizer_object: Tokenizer + Tokenizer object. + + """ def __init__( self, @@ -100,7 +136,19 @@ def create_tokenizer( self, tokenizer_obj: Tokenizer, ) -> PreTrainedTokenizerFast: - """Load the tokenizer from a JSON file on disk.""" + """Load the tokenizer from a JSON file on disk. + + Parameters + ---------- + tokenizer_obj: Tokenizer + Tokenizer object. + + Returns + ------- + PreTrainedTokenizerFast + Tokenizer object. + + """ self.tokenizer = PreTrainedTokenizerFast( tokenizer_object=tokenizer_obj, bos_token="[VS]", @@ -121,7 +169,29 @@ def __call__( padding: str = "max_length", max_length: int = 2048, ) -> BatchEncoding: - """Return the tokenized dictionary of input batch.""" + """Return the tokenized dictionary of input batch. + + Parameters + ---------- + batch: Union[str, List[str]] + Input batch. + return_attention_mask: bool + Return attention mask. + return_token_type_ids: bool + Return token type ids. + truncation: bool + Truncate the input. + padding: str + Padding strategy. + max_length: int + Maximum length of the input. + + Returns + ------- + BatchEncoding + Tokenized dictionary of input batch. + + """ return self.tokenizer( batch, return_attention_mask=return_attention_mask, @@ -133,23 +203,83 @@ def __call__( ) def encode(self, concept_tokens: str) -> List[int]: - """Encode the concept tokens into token ids.""" + """Encode the concept tokens into token ids. + + Parameters + ---------- + concept_tokens: str + Concept tokens. + + Returns + ------- + List[int] + Token ids. + + """ return self.tokenizer_object.encode(concept_tokens).ids def decode(self, concept_ids: List[int]) -> str: - """Decode the concept sequence token id into token concept.""" + """Decode the concept sequence token id into token concept. + + Parameters + ---------- + concept_ids: List[int] + Concept ids. + + Returns + ------- + str + Concept sequence. + + """ return self.tokenizer_object.decode(concept_ids) def token_to_id(self, token: str) -> int: - """Return the id corresponding to token.""" + """Return the id corresponding to token. + + Parameters + ---------- + token: str + Token. + + Returns + ------- + int + Token id. + + """ return self.tokenizer_object.token_to_id(token) def id_to_token(self, token_id: int) -> str: - """Return the token corresponding to id.""" + """Return the token corresponding to id. + + Parameters + ---------- + token_id: int + Token id. + + Returns + ------- + str + Token. + + """ return self.tokenizer_object.id_to_token(token_id) def get_all_token_indexes(self, with_special_tokens: bool = True) -> Set[int]: - """Return a set of all possible token ids.""" + """Return a set of all possible token ids. + + Parameters + ---------- + with_special_tokens: bool + Include special tokens. + + Returns + ------- + Set[int] + Set of all token ids. + + """ all_token_ids = set(self.tokenizer_vocab.values()) special_token_ids = set(self.get_special_token_ids()) @@ -158,27 +288,69 @@ def get_all_token_indexes(self, with_special_tokens: bool = True) -> Set[int]: ) def get_first_token_index(self) -> int: - """Return the smallest token id in vocabulary.""" + """Return the smallest token id in vocabulary. + + Returns + ------- + int + First token id. + + """ return min(self.tokenizer_vocab.values()) def get_last_token_index(self) -> int: - """Return the largest token id in vocabulary.""" + """Return the largest token id in vocabulary. + + Returns + ------- + int + Largest token id. + + """ return max(self.tokenizer_vocab.values()) def get_vocab_size(self) -> int: - """Return the number of possible tokens in vocabulary.""" + """Return the number of possible tokens in vocabulary. + + Returns + ------- + int + Number of tokens in vocabulary. + + """ return len(self.tokenizer) def get_pad_token_id(self) -> int: - """Return the token id of PAD token.""" + """Return the token id of PAD token. + + Returns + ------- + int + Token id of PAD token. + + """ return self.token_to_id(self.pad_token) def get_mask_token_id(self) -> int: - """Return the token id of MASK token.""" + """Return the token id of MASK token. + + Returns + ------- + int + Token id of MASK token. + + """ return self.token_to_id(self.mask_token) def get_special_token_ids(self) -> List[int]: - """Get a list of ids representing special tokens.""" + """Get a list of ids representing special tokens. + + Returns + ------- + List[int] + List of special token ids. + + """ self.special_token_ids = [] for special_token in self.special_tokens: @@ -188,5 +360,12 @@ def get_special_token_ids(self) -> List[int]: return self.special_token_ids def save_tokenizer_to_disk(self, save_dir: str) -> None: - """Save the tokenizer object to disk as a JSON file.""" + """Save the tokenizer object to disk as a JSON file. + + Parameters + ---------- + save_dir: str + Directory to save the tokenizer. + + """ self.tokenizer.save(path=save_dir) diff --git a/odyssey/models/__init__.py b/odyssey/models/__init__.py new file mode 100644 index 0000000..9e94c8d --- /dev/null +++ b/odyssey/models/__init__.py @@ -0,0 +1 @@ +"""Models sub-package.""" diff --git a/models/baseline/Bi-LSTM.ipynb b/odyssey/models/baseline/Bi-LSTM.ipynb similarity index 100% rename from models/baseline/Bi-LSTM.ipynb rename to odyssey/models/baseline/Bi-LSTM.ipynb diff --git a/models/baseline/Bi-LSTM.py b/odyssey/models/baseline/Bi-LSTM.py similarity index 97% rename from models/baseline/Bi-LSTM.py rename to odyssey/models/baseline/Bi-LSTM.py index 93739f6..8e20a38 100644 --- a/models/baseline/Bi-LSTM.py +++ b/odyssey/models/baseline/Bi-LSTM.py @@ -1,8 +1,4 @@ -""" -File: Bi-LSTM.ipynb. - -Code to train and evaluate a bi-directional LSTM model on MIMIC-IV FHIR dataset. -""" +"""Bi-directional LSTM model implementation.""" import os import sys @@ -21,9 +17,9 @@ from torch.utils.data import DataLoader, Dataset from tqdm import tqdm -from models.big_bird_cehr.data import FinetuneDataset -from models.big_bird_cehr.embeddings import Embeddings -from models.big_bird_cehr.tokenizer import HuggingFaceConceptTokenizer +from odyssey.models.big_bird_cehr.data import FinetuneDataset +from odyssey.models.big_bird_cehr.embeddings import Embeddings +from odyssey.models.big_bird_cehr.tokenizer import HuggingFaceConceptTokenizer ROOT = "/fs01/home/afallah/odyssey/odyssey" diff --git a/models/baseline/XGBoost.ipynb b/odyssey/models/baseline/XGBoost.ipynb similarity index 100% rename from models/baseline/XGBoost.ipynb rename to odyssey/models/baseline/XGBoost.ipynb diff --git a/models/cehr_bert/embeddings.py b/odyssey/models/cehr_bert/embeddings.py similarity index 100% rename from models/cehr_bert/embeddings.py rename to odyssey/models/cehr_bert/embeddings.py diff --git a/models/cehr_bert/model.py b/odyssey/models/cehr_bert/model.py similarity index 99% rename from models/cehr_bert/model.py rename to odyssey/models/cehr_bert/model.py index a1334dd..9af880d 100644 --- a/models/cehr_bert/model.py +++ b/odyssey/models/cehr_bert/model.py @@ -22,7 +22,7 @@ BertPooler, ) -from .embeddings import Embeddings +from odyssey.models.cehr_bert.embeddings import Embeddings class BertPretrain(pl.LightningModule): diff --git a/odyssey/models/cehr_big_bird/__init__.py b/odyssey/models/cehr_big_bird/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/models/big_bird_cehr/embeddings.py b/odyssey/models/cehr_big_bird/embeddings.py similarity index 97% rename from models/big_bird_cehr/embeddings.py rename to odyssey/models/cehr_big_bird/embeddings.py index 2e9f328..da43b44 100644 --- a/models/big_bird_cehr/embeddings.py +++ b/odyssey/models/cehr_big_bird/embeddings.py @@ -266,8 +266,21 @@ def cache_input( visit_orders: torch.Tensor, visit_segments: torch.Tensor, ) -> None: - """Cache values for time_stamps, ages, visit_orders & visit_segments inside the class object. + """Cache values for time_stamps, ages, visit_orders & visit_segments. + These values will be used by the forward pass to change the final embedding. + + Parameters + ---------- + time_stamps : torch.Tensor + Time stamps of the input data. + ages : torch.Tensor + Ages of the input data. + visit_orders : torch.Tensor + Visit orders of the input data. + visit_segments : torch.Tensor + Visit segments of the input data. + """ self.time_stamps = time_stamps self.ages = ages diff --git a/models/big_bird_cehr/model.py b/odyssey/models/cehr_big_bird/model.py similarity index 98% rename from models/big_bird_cehr/model.py rename to odyssey/models/cehr_big_bird/model.py index e8ee05b..2c5cf67 100644 --- a/models/big_bird_cehr/model.py +++ b/odyssey/models/cehr_big_bird/model.py @@ -1,8 +1,4 @@ -""" -File: model.py. - -Implement BigBird using HuggingFace for pretraining and finetuning. -""" +"""Big Bird transformer model.""" from typing import Any, Dict, Optional, Tuple, Union @@ -25,7 +21,7 @@ BigBirdForSequenceClassification, ) -from .embeddings import BigBirdEmbeddingsForCEHR +from odyssey.models.cehr_big_bird.embeddings import BigBirdEmbeddingsForCEHR class BigBirdPretrain(pl.LightningModule): @@ -305,7 +301,10 @@ def forward( """Forward pass for the model.""" concept_ids, type_ids, time_stamps, ages, visit_orders, visit_segments = inputs self.model.bert.embeddings.cache_input( - time_stamps, ages, visit_orders, visit_segments + time_stamps, + ages, + visit_orders, + visit_segments, ) if attention_mask is None: diff --git a/models/big_bird_cehr/playground.ipynb b/odyssey/models/cehr_big_bird/playground.ipynb similarity index 100% rename from models/big_bird_cehr/playground.ipynb rename to odyssey/models/cehr_big_bird/playground.ipynb diff --git a/models/big_bird_cehr/tokenizer.ipynb b/odyssey/models/cehr_big_bird/tokenizer.ipynb similarity index 100% rename from models/big_bird_cehr/tokenizer.ipynb rename to odyssey/models/cehr_big_bird/tokenizer.ipynb diff --git a/models/configs/cehr_bert.yaml b/odyssey/models/configs/cehr_bert.yaml similarity index 100% rename from models/configs/cehr_bert.yaml rename to odyssey/models/configs/cehr_bert.yaml diff --git a/models/configs/cehr_bigbird.yaml b/odyssey/models/configs/cehr_bigbird.yaml similarity index 100% rename from models/configs/cehr_bigbird.yaml rename to odyssey/models/configs/cehr_bigbird.yaml diff --git a/models/exbehrt/playground.ipynb b/odyssey/models/exbehrt/playground.ipynb similarity index 100% rename from models/exbehrt/playground.ipynb rename to odyssey/models/exbehrt/playground.ipynb diff --git a/lib/prediction.py b/odyssey/models/prediction.py similarity index 93% rename from lib/prediction.py rename to odyssey/models/prediction.py index 630ee37..2d66e4f 100644 --- a/lib/prediction.py +++ b/odyssey/models/prediction.py @@ -4,8 +4,8 @@ import torch -from lib.tokenizer import ConceptTokenizer -from models.big_bird_cehr.model import BigBirdFinetune, BigBirdPretrain +from odyssey.models.big_bird_cehr.model import BigBirdFinetune, BigBirdPretrain +from odyssey.tokenizer import ConceptTokenizer def load_finetuned_model( @@ -23,7 +23,8 @@ def load_finetuned_model( Parameters ---------- - model_path: Path to the finetuned model to load + model_path: str + Path to the finetuned model to load tokenizer: Loaded tokenizer object pre_model_config: Optional config to override default values of a pretrained model fine_model_config: Optional config to override default values of a finetuned model diff --git a/lib/utils.py b/odyssey/models/utils.py similarity index 100% rename from lib/utils.py rename to odyssey/models/utils.py diff --git a/odyssey/utils/log.py b/odyssey/utils/log.py new file mode 100644 index 0000000..05e8e69 --- /dev/null +++ b/odyssey/utils/log.py @@ -0,0 +1,138 @@ +"""Python logging function.""" + +import logging +from typing import Optional, Union + + +LOG_FORMAT = "%(asctime)-15s %(levelname)-5s %(name)-15s - %(message)s" + +# https://stackoverflow.com/questions/384076/how-can-i-color-python-logging-output +BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = range(8) +COLORS = { + "WARNING": YELLOW, + "INFO": WHITE, + "DEBUG": BLUE, + "CRITICAL": YELLOW, + "ERROR": RED, +} + +# The background is set with 40 plus the number of the color, and the foreground 30. +# These are the sequences need to get colored output. +RESET_SEQ = "\033[0m" +COLOR_SEQ = "\033[1;%dm" +BOLD_SEQ = "\033[1m" + + +def formatter_message(message: str, use_color: Optional[bool] = True) -> str: + """Format message. + + Parameters + ---------- + message: str + Message format. + use_color: bool, optional + Use colored logging. + + Returns + ------- + str + Formatted message. + + """ + if use_color: + message = message.replace("$RESET", RESET_SEQ).replace("$BOLD", BOLD_SEQ) + else: + message = message.replace("$RESET", "").replace("$BOLD", "") + return message + + +class Formatter(logging.Formatter): + """Formatter. + + Attributes + ---------- + use_color: bool + Flag to toggle use of color. + + """ + + def __init__( + self, + msg: Optional[str] = None, + use_color: bool = True, + ) -> None: + """Instantiate. + + Parameters + ---------- + msg: str + Message format. + use_color: bool + Flag to set using colored formatting. + + """ + if msg is None: + msg = formatter_message(LOG_FORMAT, True) + logging.Formatter.__init__(self, msg) + self.use_color = use_color + + def format(self, record: logging.LogRecord) -> str: # noqa: A003 + """Apply formatting. + + Parameters + ---------- + record: logging.LogRecord + Record object for logging. + + Returns + ------- + str + Formatted string for log. + + """ + levelname = record.levelname + if self.use_color and levelname in COLORS: + levelname_color = ( + COLOR_SEQ % (30 + COLORS[levelname]) + levelname + RESET_SEQ + ) + record.levelname = levelname_color + return logging.Formatter.format(self, record) + + +def setup_logging( + log_path: Optional[str] = None, + log_level: Union[int, str] = "DEBUG", + print_level: Union[int, str] = "INFO", + logger: Optional[logging.Logger] = None, + use_color: bool = True, +) -> None: + """Create logger, and set it up. + + Parameters + ---------- + log_path : str, optional + Path to output log file. + log_level : str, optional + Log level for logging, defaults to DEBUG. + print_level : str, optional + Print level for logging, defaults to INFO. + logger : logging.Logger, optional + Pass logger if already exists, else a new logger object is created. + use_color: bool, optional + Use colored logging. + + """ + fmt = formatter_message(LOG_FORMAT, use_color) + logger = logger if logger else logging.getLogger() + logger.setLevel(log_level) + logger.handlers = [] + + stream_handler = logging.StreamHandler() + stream_handler.setFormatter(Formatter(fmt, use_color=use_color)) + stream_handler.setLevel(print_level) + logger.addHandler(stream_handler) + + if log_path: + file_handler = logging.FileHandler(log_path) + file_handler.setFormatter(Formatter(fmt, use_color=use_color)) + logger.addHandler(file_handler) diff --git a/poetry.lock b/poetry.lock index 5e0dc20..df9c043 100644 --- a/poetry.lock +++ b/poetry.lock @@ -588,18 +588,18 @@ test = ["coverage", "pytest (>5.4.0)", "pytest-cov (>=2.10.0)", "pytest-runner"] [[package]] name = "filelock" -version = "3.13.1" +version = "3.13.3" description = "A platform independent file lock." optional = false python-versions = ">=3.8" files = [ - {file = "filelock-3.13.1-py3-none-any.whl", hash = "sha256:57dbda9b35157b05fb3e58ee91448612eb674172fab98ee235ccb0b5bee19a1c"}, - {file = "filelock-3.13.1.tar.gz", hash = "sha256:521f5f56c50f8426f5e03ad3b281b490a87ef15bc6c526f168290f0c7148d44e"}, + {file = "filelock-3.13.3-py3-none-any.whl", hash = "sha256:5ffa845303983e7a0b7ae17636509bc97997d58afeafa72fb141a17b152284cb"}, + {file = "filelock-3.13.3.tar.gz", hash = "sha256:a79895a25bbefdf55d1a2a0a80968f7dbb28edcd6d4234a0afb3f37ecde4b546"}, ] [package.extras] -docs = ["furo (>=2023.9.10)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.24)"] -testing = ["covdefaults (>=2.3)", "coverage (>=7.3.2)", "diff-cover (>=8)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)", "pytest-timeout (>=2.2)"] +docs = ["furo (>=2023.9.10)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.25.2)"] +testing = ["covdefaults (>=2.3)", "coverage (>=7.3.2)", "diff-cover (>=8.0.1)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)", "pytest-timeout (>=2.2)"] typing = ["typing-extensions (>=4.8)"] [[package]] @@ -706,13 +706,13 @@ files = [ [[package]] name = "fsspec" -version = "2024.3.0" +version = "2024.3.1" description = "File-system specification" optional = false python-versions = ">=3.8" files = [ - {file = "fsspec-2024.3.0-py3-none-any.whl", hash = "sha256:779001bd0122c9c4975cf03827d5e86c3afb914a3ae27040f15d341ab506a693"}, - {file = "fsspec-2024.3.0.tar.gz", hash = "sha256:f13a130c0ed07e15c4e1aeb0472a823e9c426b0b5792a1f40d902b0a71972d43"}, + {file = "fsspec-2024.3.1-py3-none-any.whl", hash = "sha256:918d18d41bf73f0e2b261824baeb1b124bcf771767e3a26425cd7dec3332f512"}, + {file = "fsspec-2024.3.1.tar.gz", hash = "sha256:f39780e282d7d117ffb42bb96992f8a90795e4d0fb0f661a70ca39fe9c43ded9"}, ] [package.dependencies] @@ -1131,13 +1131,13 @@ test = ["click (==8.1.7)", "cloudpickle (>=1.3,<3.0)", "coverage (==7.3.1)", "fa [[package]] name = "lightning-utilities" -version = "0.10.1" +version = "0.11.2" description = "Lightning toolbox for across the our ecosystem." optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "lightning-utilities-0.10.1.tar.gz", hash = "sha256:362755023dcf93b8fa519bc002ae41794943a3ffbc5318e40804d36aa14bf1fd"}, - {file = "lightning_utilities-0.10.1-py3-none-any.whl", hash = "sha256:e67be3f328b1c14f2b36cc09e84642db5b50afeab94e7704969b2130fe6a3bda"}, + {file = "lightning-utilities-0.11.2.tar.gz", hash = "sha256:adf4cf9c5d912fe505db4729e51d1369c6927f3a8ac55a9dff895ce5c0da08d9"}, + {file = "lightning_utilities-0.11.2-py3-none-any.whl", hash = "sha256:541f471ed94e18a28d72879338c8c52e873bb46f4c47644d89228faeb6751159"}, ] [package.dependencies] @@ -1409,13 +1409,13 @@ test = ["pep440", "pre-commit", "pytest", "testpath"] [[package]] name = "nbqa" -version = "1.8.4" +version = "1.8.5" description = "Run any standard Python code quality tool on a Jupyter Notebook" optional = false python-versions = ">=3.8.0" files = [ - {file = "nbqa-1.8.4-py3-none-any.whl", hash = "sha256:0e2acd73320ad1aa56f15200f9ea517c0ecb5ac388d217aee97fab66272c604b"}, - {file = "nbqa-1.8.4.tar.gz", hash = "sha256:ca983e115d81f5cf149f70c4bf5b8baa36694a3eaf0783fe508dbf05b9767e07"}, + {file = "nbqa-1.8.5-py3-none-any.whl", hash = "sha256:fe59ccb66f29bda2912c75cacf9cdbd34504923effb58ae1c88211d075213eff"}, + {file = "nbqa-1.8.5.tar.gz", hash = "sha256:91624e9c747bbe38ff14ebf75d17cfb838b5c0432b039bcb7e8ad0bb423ef7ef"}, ] [package.dependencies] @@ -1620,7 +1620,10 @@ files = [ ] [package.dependencies] -numpy = {version = ">=1.22.4,<2", markers = "python_version < \"3.11\""} +numpy = [ + {version = ">=1.22.4,<2", markers = "python_version < \"3.11\""}, + {version = ">=1.23.2,<2", markers = "python_version == \"3.11\""}, +] python-dateutil = ">=2.8.2" pytz = ">=2020.1" tzdata = ">=2022.7" @@ -1864,47 +1867,47 @@ tests = ["pytest"] [[package]] name = "pyarrow" -version = "15.0.1" +version = "15.0.2" description = "Python library for Apache Arrow" optional = false python-versions = ">=3.8" files = [ - {file = "pyarrow-15.0.1-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:c2ddb3be5ea938c329a84171694fc230b241ce1b6b0ff1a0280509af51c375fa"}, - {file = "pyarrow-15.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7543ea88a0ff72f8e6baaf9bfdbec2c62aeabdbede9e4a571c71cc3bc43b6302"}, - {file = "pyarrow-15.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1519e218a6941fc074e4501088d891afcb2adf77c236e03c34babcf3d6a0d1c7"}, - {file = "pyarrow-15.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:28cafa86e1944761970d3b3fc0411b14ff9b5c2b73cd22aaf470d7a3976335f5"}, - {file = "pyarrow-15.0.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:be5c3d463e33d03eab496e1af7916b1d44001c08f0f458ad27dc16093a020638"}, - {file = "pyarrow-15.0.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:47b1eda15d3aa3f49a07b1808648e1397e5dc6a80a30bf87faa8e2d02dad7ac3"}, - {file = "pyarrow-15.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:e524a31be7db22deebbbcf242b189063ab9a7652c62471d296b31bc6e3cae77b"}, - {file = "pyarrow-15.0.1-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:a476fefe8bdd56122fb0d4881b785413e025858803cc1302d0d788d3522b374d"}, - {file = "pyarrow-15.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:309e6191be385f2e220586bfdb643f9bb21d7e1bc6dd0a6963dc538e347b2431"}, - {file = "pyarrow-15.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:83bc586903dbeb4365cbc72b602f99f70b96c5882e5dfac5278813c7d624ca3c"}, - {file = "pyarrow-15.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:07e652daac6d8b05280cd2af31c0fb61a4490ec6a53dc01588014d9fa3fdbee9"}, - {file = "pyarrow-15.0.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:abad2e08652df153a72177ce20c897d083b0c4ebeec051239e2654ddf4d3c996"}, - {file = "pyarrow-15.0.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:cde663352bc83ad75ba7b3206e049ca1a69809223942362a8649e37bd22f9e3b"}, - {file = "pyarrow-15.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:1b6e237dd7a08482a8b8f3f6512d258d2460f182931832a8c6ef3953203d31e1"}, - {file = "pyarrow-15.0.1-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:7bd167536ee23192760b8c731d39b7cfd37914c27fd4582335ffd08450ff799d"}, - {file = "pyarrow-15.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7c08bb31eb2984ba5c3747d375bb522e7e536b8b25b149c9cb5e1c49b0ccb736"}, - {file = "pyarrow-15.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c0f9c1d630ed2524bd1ddf28ec92780a7b599fd54704cd653519f7ff5aec177a"}, - {file = "pyarrow-15.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5186048493395220550bca7b524420471aac2d77af831f584ce132680f55c3df"}, - {file = "pyarrow-15.0.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:31dc30c7ec8958da3a3d9f31d6c3630429b2091ede0ecd0d989fd6bec129f0e4"}, - {file = "pyarrow-15.0.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:3f111a014fb8ac2297b43a74bf4495cc479a332908f7ee49cb7cbd50714cb0c1"}, - {file = "pyarrow-15.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:a6d1f7c15d7f68f08490d0cb34611497c74285b8a6bbeab4ef3fc20117310983"}, - {file = "pyarrow-15.0.1-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:9ad931b996f51c2f978ed517b55cb3c6078272fb4ec579e3da5a8c14873b698d"}, - {file = "pyarrow-15.0.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:738f6b53ab1c2f66b2bde8a1d77e186aeaab702d849e0dfa1158c9e2c030add3"}, - {file = "pyarrow-15.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2c1c3fc16bc74e33bf8f1e5a212938ed8d88e902f372c4dac6b5bad328567d2f"}, - {file = "pyarrow-15.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e1fa92512128f6c1b8dde0468c1454dd70f3bff623970e370d52efd4d24fd0be"}, - {file = "pyarrow-15.0.1-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:b4157f307c202cbbdac147d9b07447a281fa8e63494f7fc85081da351ec6ace9"}, - {file = "pyarrow-15.0.1-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:b75e7da26f383787f80ad76143b44844ffa28648fcc7099a83df1538c078d2f2"}, - {file = "pyarrow-15.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:3a99eac76ae14096c209850935057b9e8ce97a78397c5cde8724674774f34e5d"}, - {file = "pyarrow-15.0.1-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:dd532d3177e031e9b2d2df19fd003d0cc0520d1747659fcabbd4d9bb87de508c"}, - {file = "pyarrow-15.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ce8c89848fd37e5313fc2ce601483038ee5566db96ba0808d5883b2e2e55dc53"}, - {file = "pyarrow-15.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:862eac5e5f3b6477f7a92b2f27e560e1f4e5e9edfca9ea9da8a7478bb4abd5ce"}, - {file = "pyarrow-15.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f0ea3a29cd5cb99bf14c1c4533eceaa00ea8fb580950fb5a89a5c771a994a4e"}, - {file = "pyarrow-15.0.1-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:bb902f780cfd624b2e8fd8501fadab17618fdb548532620ef3d91312aaf0888a"}, - {file = "pyarrow-15.0.1-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:4f87757f02735a6bb4ad2e1b98279ac45d53b748d5baf52401516413007c6999"}, - {file = "pyarrow-15.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:efd3816c7fbfcbd406ac0f69873cebb052effd7cdc153ae5836d1b00845845d7"}, - {file = "pyarrow-15.0.1.tar.gz", hash = "sha256:21d812548d39d490e0c6928a7c663f37b96bf764034123d4b4ab4530ecc757a9"}, + {file = "pyarrow-15.0.2-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:88b340f0a1d05b5ccc3d2d986279045655b1fe8e41aba6ca44ea28da0d1455d8"}, + {file = "pyarrow-15.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:eaa8f96cecf32da508e6c7f69bb8401f03745c050c1dd42ec2596f2e98deecac"}, + {file = "pyarrow-15.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:23c6753ed4f6adb8461e7c383e418391b8d8453c5d67e17f416c3a5d5709afbd"}, + {file = "pyarrow-15.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f639c059035011db8c0497e541a8a45d98a58dbe34dc8fadd0ef128f2cee46e5"}, + {file = "pyarrow-15.0.2-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:290e36a59a0993e9a5224ed2fb3e53375770f07379a0ea03ee2fce2e6d30b423"}, + {file = "pyarrow-15.0.2-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:06c2bb2a98bc792f040bef31ad3e9be6a63d0cb39189227c08a7d955db96816e"}, + {file = "pyarrow-15.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:f7a197f3670606a960ddc12adbe8075cea5f707ad7bf0dffa09637fdbb89f76c"}, + {file = "pyarrow-15.0.2-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:5f8bc839ea36b1f99984c78e06e7a06054693dc2af8920f6fb416b5bca9944e4"}, + {file = "pyarrow-15.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f5e81dfb4e519baa6b4c80410421528c214427e77ca0ea9461eb4097c328fa33"}, + {file = "pyarrow-15.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3a4f240852b302a7af4646c8bfe9950c4691a419847001178662a98915fd7ee7"}, + {file = "pyarrow-15.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e7d9cfb5a1e648e172428c7a42b744610956f3b70f524aa3a6c02a448ba853e"}, + {file = "pyarrow-15.0.2-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:2d4f905209de70c0eb5b2de6763104d5a9a37430f137678edfb9a675bac9cd98"}, + {file = "pyarrow-15.0.2-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:90adb99e8ce5f36fbecbbc422e7dcbcbed07d985eed6062e459e23f9e71fd197"}, + {file = "pyarrow-15.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:b116e7fd7889294cbd24eb90cd9bdd3850be3738d61297855a71ac3b8124ee38"}, + {file = "pyarrow-15.0.2-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:25335e6f1f07fdaa026a61c758ee7d19ce824a866b27bba744348fa73bb5a440"}, + {file = "pyarrow-15.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:90f19e976d9c3d8e73c80be84ddbe2f830b6304e4c576349d9360e335cd627fc"}, + {file = "pyarrow-15.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a22366249bf5fd40ddacc4f03cd3160f2d7c247692945afb1899bab8a140ddfb"}, + {file = "pyarrow-15.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c2a335198f886b07e4b5ea16d08ee06557e07db54a8400cc0d03c7f6a22f785f"}, + {file = "pyarrow-15.0.2-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:3e6d459c0c22f0b9c810a3917a1de3ee704b021a5fb8b3bacf968eece6df098f"}, + {file = "pyarrow-15.0.2-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:033b7cad32198754d93465dcfb71d0ba7cb7cd5c9afd7052cab7214676eec38b"}, + {file = "pyarrow-15.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:29850d050379d6e8b5a693098f4de7fd6a2bea4365bfd073d7c57c57b95041ee"}, + {file = "pyarrow-15.0.2-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:7167107d7fb6dcadb375b4b691b7e316f4368f39f6f45405a05535d7ad5e5058"}, + {file = "pyarrow-15.0.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e85241b44cc3d365ef950432a1b3bd44ac54626f37b2e3a0cc89c20e45dfd8bf"}, + {file = "pyarrow-15.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:248723e4ed3255fcd73edcecc209744d58a9ca852e4cf3d2577811b6d4b59818"}, + {file = "pyarrow-15.0.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ff3bdfe6f1b81ca5b73b70a8d482d37a766433823e0c21e22d1d7dde76ca33f"}, + {file = "pyarrow-15.0.2-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:f3d77463dee7e9f284ef42d341689b459a63ff2e75cee2b9302058d0d98fe142"}, + {file = "pyarrow-15.0.2-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:8c1faf2482fb89766e79745670cbca04e7018497d85be9242d5350cba21357e1"}, + {file = "pyarrow-15.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:28f3016958a8e45a1069303a4a4f6a7d4910643fc08adb1e2e4a7ff056272ad3"}, + {file = "pyarrow-15.0.2-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:89722cb64286ab3d4daf168386f6968c126057b8c7ec3ef96302e81d8cdb8ae4"}, + {file = "pyarrow-15.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:cd0ba387705044b3ac77b1b317165c0498299b08261d8122c96051024f953cd5"}, + {file = "pyarrow-15.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ad2459bf1f22b6a5cdcc27ebfd99307d5526b62d217b984b9f5c974651398832"}, + {file = "pyarrow-15.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58922e4bfece8b02abf7159f1f53a8f4d9f8e08f2d988109126c17c3bb261f22"}, + {file = "pyarrow-15.0.2-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:adccc81d3dc0478ea0b498807b39a8d41628fa9210729b2f718b78cb997c7c91"}, + {file = "pyarrow-15.0.2-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:8bd2baa5fe531571847983f36a30ddbf65261ef23e496862ece83bdceb70420d"}, + {file = "pyarrow-15.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:6669799a1d4ca9da9c7e06ef48368320f5856f36f9a4dd31a11839dda3f6cc8c"}, + {file = "pyarrow-15.0.2.tar.gz", hash = "sha256:9c9bc803cb3b7bfacc1e96ffbfd923601065d9d3f911179d81e72d99fd74a3d9"}, ] [package.dependencies] @@ -1961,7 +1964,10 @@ files = [ [package.dependencies] astroid = ">=3.1.0,<=3.2.0-dev0" colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""} -dill = {version = ">=0.2", markers = "python_version < \"3.11\""} +dill = [ + {version = ">=0.2", markers = "python_version < \"3.11\""}, + {version = ">=0.3.6", markers = "python_version >= \"3.11\""}, +] isort = ">=4.2.5,<5.13.0 || >5.13.0,<6" mccabe = ">=0.6,<0.8" platformdirs = ">=2.2.0" @@ -2071,13 +2077,13 @@ files = [ [[package]] name = "pyupgrade" -version = "3.15.1" +version = "3.15.2" description = "A tool to automatically upgrade syntax for newer versions." optional = false python-versions = ">=3.8.1" files = [ - {file = "pyupgrade-3.15.1-py2.py3-none-any.whl", hash = "sha256:c5e005de2805edcd333d1deb04553200ec69da85e4bc9db37b16345ed9e27ed9"}, - {file = "pyupgrade-3.15.1.tar.gz", hash = "sha256:7690857cae0f6253f39241dcd2e57118c333c438b78609fc3c17a5aa61227b7d"}, + {file = "pyupgrade-3.15.2-py2.py3-none-any.whl", hash = "sha256:ce309e0ff8ecb73f56a45f12570be84bbbde9540d13697cacb261a7f595fb1f5"}, + {file = "pyupgrade-3.15.2.tar.gz", hash = "sha256:c488d6896c546d25845712ef6402657123008d56c1063174e27aabe15bd6b4e5"}, ] [package.dependencies] @@ -2311,28 +2317,28 @@ files = [ [[package]] name = "ruff" -version = "0.3.3" +version = "0.3.4" description = "An extremely fast Python linter and code formatter, written in Rust." optional = false python-versions = ">=3.7" files = [ - {file = "ruff-0.3.3-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:973a0e388b7bc2e9148c7f9be8b8c6ae7471b9be37e1cc732f8f44a6f6d7720d"}, - {file = "ruff-0.3.3-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:cfa60d23269d6e2031129b053fdb4e5a7b0637fc6c9c0586737b962b2f834493"}, - {file = "ruff-0.3.3-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1eca7ff7a47043cf6ce5c7f45f603b09121a7cc047447744b029d1b719278eb5"}, - {file = "ruff-0.3.3-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e7d3f6762217c1da954de24b4a1a70515630d29f71e268ec5000afe81377642d"}, - {file = "ruff-0.3.3-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b24c19e8598916d9c6f5a5437671f55ee93c212a2c4c569605dc3842b6820386"}, - {file = "ruff-0.3.3-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:5a6cbf216b69c7090f0fe4669501a27326c34e119068c1494f35aaf4cc683778"}, - {file = "ruff-0.3.3-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:352e95ead6964974b234e16ba8a66dad102ec7bf8ac064a23f95371d8b198aab"}, - {file = "ruff-0.3.3-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8d6ab88c81c4040a817aa432484e838aaddf8bfd7ca70e4e615482757acb64f8"}, - {file = "ruff-0.3.3-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:79bca3a03a759cc773fca69e0bdeac8abd1c13c31b798d5bb3c9da4a03144a9f"}, - {file = "ruff-0.3.3-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:2700a804d5336bcffe063fd789ca2c7b02b552d2e323a336700abb8ae9e6a3f8"}, - {file = "ruff-0.3.3-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:fd66469f1a18fdb9d32e22b79f486223052ddf057dc56dea0caaf1a47bdfaf4e"}, - {file = "ruff-0.3.3-py3-none-musllinux_1_2_i686.whl", hash = "sha256:45817af234605525cdf6317005923bf532514e1ea3d9270acf61ca2440691376"}, - {file = "ruff-0.3.3-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:0da458989ce0159555ef224d5b7c24d3d2e4bf4c300b85467b08c3261c6bc6a8"}, - {file = "ruff-0.3.3-py3-none-win32.whl", hash = "sha256:f2831ec6a580a97f1ea82ea1eda0401c3cdf512cf2045fa3c85e8ef109e87de0"}, - {file = "ruff-0.3.3-py3-none-win_amd64.whl", hash = "sha256:be90bcae57c24d9f9d023b12d627e958eb55f595428bafcb7fec0791ad25ddfc"}, - {file = "ruff-0.3.3-py3-none-win_arm64.whl", hash = "sha256:0171aab5fecdc54383993389710a3d1227f2da124d76a2784a7098e818f92d61"}, - {file = "ruff-0.3.3.tar.gz", hash = "sha256:38671be06f57a2f8aba957d9f701ea889aa5736be806f18c0cd03d6ff0cbca8d"}, + {file = "ruff-0.3.4-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:60c870a7d46efcbc8385d27ec07fe534ac32f3b251e4fc44b3cbfd9e09609ef4"}, + {file = "ruff-0.3.4-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6fc14fa742e1d8f24910e1fff0bd5e26d395b0e0e04cc1b15c7c5e5fe5b4af91"}, + {file = "ruff-0.3.4-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d3ee7880f653cc03749a3bfea720cf2a192e4f884925b0cf7eecce82f0ce5854"}, + {file = "ruff-0.3.4-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:cf133dd744f2470b347f602452a88e70dadfbe0fcfb5fd46e093d55da65f82f7"}, + {file = "ruff-0.3.4-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3f3860057590e810c7ffea75669bdc6927bfd91e29b4baa9258fd48b540a4365"}, + {file = "ruff-0.3.4-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:986f2377f7cf12efac1f515fc1a5b753c000ed1e0a6de96747cdf2da20a1b369"}, + {file = "ruff-0.3.4-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c4fd98e85869603e65f554fdc5cddf0712e352fe6e61d29d5a6fe087ec82b76c"}, + {file = "ruff-0.3.4-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:64abeed785dad51801b423fa51840b1764b35d6c461ea8caef9cf9e5e5ab34d9"}, + {file = "ruff-0.3.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:df52972138318bc7546d92348a1ee58449bc3f9eaf0db278906eb511889c4b50"}, + {file = "ruff-0.3.4-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:98e98300056445ba2cc27d0b325fd044dc17fcc38e4e4d2c7711585bd0a958ed"}, + {file = "ruff-0.3.4-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:519cf6a0ebed244dce1dc8aecd3dc99add7a2ee15bb68cf19588bb5bf58e0488"}, + {file = "ruff-0.3.4-py3-none-musllinux_1_2_i686.whl", hash = "sha256:bb0acfb921030d00070539c038cd24bb1df73a2981e9f55942514af8b17be94e"}, + {file = "ruff-0.3.4-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:cf187a7e7098233d0d0c71175375c5162f880126c4c716fa28a8ac418dcf3378"}, + {file = "ruff-0.3.4-py3-none-win32.whl", hash = "sha256:af27ac187c0a331e8ef91d84bf1c3c6a5dea97e912a7560ac0cef25c526a4102"}, + {file = "ruff-0.3.4-py3-none-win_amd64.whl", hash = "sha256:de0d5069b165e5a32b3c6ffbb81c350b1e3d3483347196ffdf86dc0ef9e37dd6"}, + {file = "ruff-0.3.4-py3-none-win_arm64.whl", hash = "sha256:6810563cc08ad0096b57c717bd78aeac888a1bfd38654d9113cb3dc4d3f74232"}, + {file = "ruff-0.3.4.tar.gz", hash = "sha256:f0f4484c6541a99862b693e13a151435a279b271cff20e37101116a21e2a1ad1"}, ] [[package]] @@ -2421,13 +2427,13 @@ test = ["asv", "gmpy2", "hypothesis", "mpmath", "pooch", "pytest", "pytest-cov", [[package]] name = "sentry-sdk" -version = "1.42.0" +version = "1.43.0" description = "Python client for Sentry (https://sentry.io)" optional = false python-versions = "*" files = [ - {file = "sentry-sdk-1.42.0.tar.gz", hash = "sha256:4a8364b8f7edbf47f95f7163e48334c96100d9c098f0ae6606e2e18183c223e6"}, - {file = "sentry_sdk-1.42.0-py2.py3-none-any.whl", hash = "sha256:a654ee7e497a3f5f6368b36d4f04baeab1fe92b3105f7f6965d6ef0de35a9ba4"}, + {file = "sentry-sdk-1.43.0.tar.gz", hash = "sha256:41df73af89d22921d8733714fb0fc5586c3461907e06688e6537d01a27e0e0f6"}, + {file = "sentry_sdk-1.43.0-py2.py3-none-any.whl", hash = "sha256:8d768724839ca18d7b4c7463ef7528c40b7aa2bfbf7fe554d5f9a7c044acfd36"}, ] [package.dependencies] @@ -2441,6 +2447,7 @@ asyncpg = ["asyncpg (>=0.23)"] beam = ["apache-beam (>=2.12)"] bottle = ["bottle (>=0.12.13)"] celery = ["celery (>=3)"] +celery-redbeat = ["celery-redbeat (>=2)"] chalice = ["chalice (>=1.16.0)"] clickhouse-driver = ["clickhouse-driver (>=0.2.0)"] django = ["django (>=1.8)"] @@ -2605,60 +2612,60 @@ files = [ [[package]] name = "sqlalchemy" -version = "2.0.28" +version = "2.0.29" description = "Database Abstraction Library" optional = false python-versions = ">=3.7" files = [ - {file = "SQLAlchemy-2.0.28-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e0b148ab0438f72ad21cb004ce3bdaafd28465c4276af66df3b9ecd2037bf252"}, - {file = "SQLAlchemy-2.0.28-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:bbda76961eb8f27e6ad3c84d1dc56d5bc61ba8f02bd20fcf3450bd421c2fcc9c"}, - {file = "SQLAlchemy-2.0.28-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:feea693c452d85ea0015ebe3bb9cd15b6f49acc1a31c28b3c50f4db0f8fb1e71"}, - {file = "SQLAlchemy-2.0.28-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5da98815f82dce0cb31fd1e873a0cb30934971d15b74e0d78cf21f9e1b05953f"}, - {file = "SQLAlchemy-2.0.28-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:4a5adf383c73f2d49ad15ff363a8748319ff84c371eed59ffd0127355d6ea1da"}, - {file = "SQLAlchemy-2.0.28-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:56856b871146bfead25fbcaed098269d90b744eea5cb32a952df00d542cdd368"}, - {file = "SQLAlchemy-2.0.28-cp310-cp310-win32.whl", hash = "sha256:943aa74a11f5806ab68278284a4ddd282d3fb348a0e96db9b42cb81bf731acdc"}, - {file = "SQLAlchemy-2.0.28-cp310-cp310-win_amd64.whl", hash = "sha256:c6c4da4843e0dabde41b8f2e8147438330924114f541949e6318358a56d1875a"}, - {file = "SQLAlchemy-2.0.28-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:46a3d4e7a472bfff2d28db838669fc437964e8af8df8ee1e4548e92710929adc"}, - {file = "SQLAlchemy-2.0.28-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0d3dd67b5d69794cfe82862c002512683b3db038b99002171f624712fa71aeaa"}, - {file = "SQLAlchemy-2.0.28-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c61e2e41656a673b777e2f0cbbe545323dbe0d32312f590b1bc09da1de6c2a02"}, - {file = "SQLAlchemy-2.0.28-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0315d9125a38026227f559488fe7f7cee1bd2fbc19f9fd637739dc50bb6380b2"}, - {file = "SQLAlchemy-2.0.28-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:af8ce2d31679006e7b747d30a89cd3ac1ec304c3d4c20973f0f4ad58e2d1c4c9"}, - {file = "SQLAlchemy-2.0.28-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:81ba314a08c7ab701e621b7ad079c0c933c58cdef88593c59b90b996e8b58fa5"}, - {file = "SQLAlchemy-2.0.28-cp311-cp311-win32.whl", hash = "sha256:1ee8bd6d68578e517943f5ebff3afbd93fc65f7ef8f23becab9fa8fb315afb1d"}, - {file = "SQLAlchemy-2.0.28-cp311-cp311-win_amd64.whl", hash = "sha256:ad7acbe95bac70e4e687a4dc9ae3f7a2f467aa6597049eeb6d4a662ecd990bb6"}, - {file = "SQLAlchemy-2.0.28-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d3499008ddec83127ab286c6f6ec82a34f39c9817f020f75eca96155f9765097"}, - {file = "SQLAlchemy-2.0.28-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9b66fcd38659cab5d29e8de5409cdf91e9986817703e1078b2fdaad731ea66f5"}, - {file = "SQLAlchemy-2.0.28-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bea30da1e76cb1acc5b72e204a920a3a7678d9d52f688f087dc08e54e2754c67"}, - {file = "SQLAlchemy-2.0.28-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:124202b4e0edea7f08a4db8c81cc7859012f90a0d14ba2bf07c099aff6e96462"}, - {file = "SQLAlchemy-2.0.28-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:e23b88c69497a6322b5796c0781400692eca1ae5532821b39ce81a48c395aae9"}, - {file = "SQLAlchemy-2.0.28-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4b6303bfd78fb3221847723104d152e5972c22367ff66edf09120fcde5ddc2e2"}, - {file = "SQLAlchemy-2.0.28-cp312-cp312-win32.whl", hash = "sha256:a921002be69ac3ab2cf0c3017c4e6a3377f800f1fca7f254c13b5f1a2f10022c"}, - {file = "SQLAlchemy-2.0.28-cp312-cp312-win_amd64.whl", hash = "sha256:b4a2cf92995635b64876dc141af0ef089c6eea7e05898d8d8865e71a326c0385"}, - {file = "SQLAlchemy-2.0.28-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e91b5e341f8c7f1e5020db8e5602f3ed045a29f8e27f7f565e0bdee3338f2c7"}, - {file = "SQLAlchemy-2.0.28-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45c7b78dfc7278329f27be02c44abc0d69fe235495bb8e16ec7ef1b1a17952db"}, - {file = "SQLAlchemy-2.0.28-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3eba73ef2c30695cb7eabcdb33bb3d0b878595737479e152468f3ba97a9c22a4"}, - {file = "SQLAlchemy-2.0.28-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:5df5d1dafb8eee89384fb7a1f79128118bc0ba50ce0db27a40750f6f91aa99d5"}, - {file = "SQLAlchemy-2.0.28-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:2858bbab1681ee5406650202950dc8f00e83b06a198741b7c656e63818633526"}, - {file = "SQLAlchemy-2.0.28-cp37-cp37m-win32.whl", hash = "sha256:9461802f2e965de5cff80c5a13bc945abea7edaa1d29360b485c3d2b56cdb075"}, - {file = "SQLAlchemy-2.0.28-cp37-cp37m-win_amd64.whl", hash = "sha256:a6bec1c010a6d65b3ed88c863d56b9ea5eeefdf62b5e39cafd08c65f5ce5198b"}, - {file = "SQLAlchemy-2.0.28-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:843a882cadebecc655a68bd9a5b8aa39b3c52f4a9a5572a3036fb1bb2ccdc197"}, - {file = "SQLAlchemy-2.0.28-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:dbb990612c36163c6072723523d2be7c3eb1517bbdd63fe50449f56afafd1133"}, - {file = "SQLAlchemy-2.0.28-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bd7e4baf9161d076b9a7e432fce06217b9bd90cfb8f1d543d6e8c4595627edb9"}, - {file = "SQLAlchemy-2.0.28-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e0a5354cb4de9b64bccb6ea33162cb83e03dbefa0d892db88a672f5aad638a75"}, - {file = "SQLAlchemy-2.0.28-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:fffcc8edc508801ed2e6a4e7b0d150a62196fd28b4e16ab9f65192e8186102b6"}, - {file = "SQLAlchemy-2.0.28-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:aca7b6d99a4541b2ebab4494f6c8c2f947e0df4ac859ced575238e1d6ca5716b"}, - {file = "SQLAlchemy-2.0.28-cp38-cp38-win32.whl", hash = "sha256:8c7f10720fc34d14abad5b647bc8202202f4948498927d9f1b4df0fb1cf391b7"}, - {file = "SQLAlchemy-2.0.28-cp38-cp38-win_amd64.whl", hash = "sha256:243feb6882b06a2af68ecf4bec8813d99452a1b62ba2be917ce6283852cf701b"}, - {file = "SQLAlchemy-2.0.28-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:fc4974d3684f28b61b9a90fcb4c41fb340fd4b6a50c04365704a4da5a9603b05"}, - {file = "SQLAlchemy-2.0.28-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:87724e7ed2a936fdda2c05dbd99d395c91ea3c96f029a033a4a20e008dd876bf"}, - {file = "SQLAlchemy-2.0.28-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:68722e6a550f5de2e3cfe9da6afb9a7dd15ef7032afa5651b0f0c6b3adb8815d"}, - {file = "SQLAlchemy-2.0.28-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:328529f7c7f90adcd65aed06a161851f83f475c2f664a898af574893f55d9e53"}, - {file = "SQLAlchemy-2.0.28-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:df40c16a7e8be7413b885c9bf900d402918cc848be08a59b022478804ea076b8"}, - {file = "SQLAlchemy-2.0.28-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:426f2fa71331a64f5132369ede5171c52fd1df1bd9727ce621f38b5b24f48750"}, - {file = "SQLAlchemy-2.0.28-cp39-cp39-win32.whl", hash = "sha256:33157920b233bc542ce497a81a2e1452e685a11834c5763933b440fedd1d8e2d"}, - {file = "SQLAlchemy-2.0.28-cp39-cp39-win_amd64.whl", hash = "sha256:2f60843068e432311c886c5f03c4664acaef507cf716f6c60d5fde7265be9d7b"}, - {file = "SQLAlchemy-2.0.28-py3-none-any.whl", hash = "sha256:78bb7e8da0183a8301352d569900d9d3594c48ac21dc1c2ec6b3121ed8b6c986"}, - {file = "SQLAlchemy-2.0.28.tar.gz", hash = "sha256:dd53b6c4e6d960600fd6532b79ee28e2da489322fcf6648738134587faf767b6"}, + {file = "SQLAlchemy-2.0.29-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4c142852ae192e9fe5aad5c350ea6befe9db14370b34047e1f0f7cf99e63c63b"}, + {file = "SQLAlchemy-2.0.29-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:99a1e69d4e26f71e750e9ad6fdc8614fbddb67cfe2173a3628a2566034e223c7"}, + {file = "SQLAlchemy-2.0.29-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5ef3fbccb4058355053c51b82fd3501a6e13dd808c8d8cd2561e610c5456013c"}, + {file = "SQLAlchemy-2.0.29-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d6753305936eddc8ed190e006b7bb33a8f50b9854823485eed3a886857ab8d1"}, + {file = "SQLAlchemy-2.0.29-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0f3ca96af060a5250a8ad5a63699180bc780c2edf8abf96c58af175921df847a"}, + {file = "SQLAlchemy-2.0.29-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c4520047006b1d3f0d89e0532978c0688219857eb2fee7c48052560ae76aca1e"}, + {file = "SQLAlchemy-2.0.29-cp310-cp310-win32.whl", hash = "sha256:b2a0e3cf0caac2085ff172c3faacd1e00c376e6884b5bc4dd5b6b84623e29e4f"}, + {file = "SQLAlchemy-2.0.29-cp310-cp310-win_amd64.whl", hash = "sha256:01d10638a37460616708062a40c7b55f73e4d35eaa146781c683e0fa7f6c43fb"}, + {file = "SQLAlchemy-2.0.29-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:308ef9cb41d099099fffc9d35781638986870b29f744382904bf9c7dadd08513"}, + {file = "SQLAlchemy-2.0.29-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:296195df68326a48385e7a96e877bc19aa210e485fa381c5246bc0234c36c78e"}, + {file = "SQLAlchemy-2.0.29-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a13b917b4ffe5a0a31b83d051d60477819ddf18276852ea68037a144a506efb9"}, + {file = "SQLAlchemy-2.0.29-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f6d971255d9ddbd3189e2e79d743ff4845c07f0633adfd1de3f63d930dbe673"}, + {file = "SQLAlchemy-2.0.29-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:61405ea2d563407d316c63a7b5271ae5d274a2a9fbcd01b0aa5503635699fa1e"}, + {file = "SQLAlchemy-2.0.29-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:de7202ffe4d4a8c1e3cde1c03e01c1a3772c92858837e8f3879b497158e4cb44"}, + {file = "SQLAlchemy-2.0.29-cp311-cp311-win32.whl", hash = "sha256:b5d7ed79df55a731749ce65ec20d666d82b185fa4898430b17cb90c892741520"}, + {file = "SQLAlchemy-2.0.29-cp311-cp311-win_amd64.whl", hash = "sha256:205f5a2b39d7c380cbc3b5dcc8f2762fb5bcb716838e2d26ccbc54330775b003"}, + {file = "SQLAlchemy-2.0.29-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d96710d834a6fb31e21381c6d7b76ec729bd08c75a25a5184b1089141356171f"}, + {file = "SQLAlchemy-2.0.29-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:52de4736404e53c5c6a91ef2698c01e52333988ebdc218f14c833237a0804f1b"}, + {file = "SQLAlchemy-2.0.29-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c7b02525ede2a164c5fa5014915ba3591730f2cc831f5be9ff3b7fd3e30958e"}, + {file = "SQLAlchemy-2.0.29-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0dfefdb3e54cd15f5d56fd5ae32f1da2d95d78319c1f6dfb9bcd0eb15d603d5d"}, + {file = "SQLAlchemy-2.0.29-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:a88913000da9205b13f6f195f0813b6ffd8a0c0c2bd58d499e00a30eb508870c"}, + {file = "SQLAlchemy-2.0.29-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:fecd5089c4be1bcc37c35e9aa678938d2888845a134dd016de457b942cf5a758"}, + {file = "SQLAlchemy-2.0.29-cp312-cp312-win32.whl", hash = "sha256:8197d6f7a3d2b468861ebb4c9f998b9df9e358d6e1cf9c2a01061cb9b6cf4e41"}, + {file = "SQLAlchemy-2.0.29-cp312-cp312-win_amd64.whl", hash = "sha256:9b19836ccca0d321e237560e475fd99c3d8655d03da80c845c4da20dda31b6e1"}, + {file = "SQLAlchemy-2.0.29-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:87a1d53a5382cdbbf4b7619f107cc862c1b0a4feb29000922db72e5a66a5ffc0"}, + {file = "SQLAlchemy-2.0.29-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2a0732dffe32333211801b28339d2a0babc1971bc90a983e3035e7b0d6f06b93"}, + {file = "SQLAlchemy-2.0.29-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90453597a753322d6aa770c5935887ab1fc49cc4c4fdd436901308383d698b4b"}, + {file = "SQLAlchemy-2.0.29-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:ea311d4ee9a8fa67f139c088ae9f905fcf0277d6cd75c310a21a88bf85e130f5"}, + {file = "SQLAlchemy-2.0.29-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:5f20cb0a63a3e0ec4e169aa8890e32b949c8145983afa13a708bc4b0a1f30e03"}, + {file = "SQLAlchemy-2.0.29-cp37-cp37m-win32.whl", hash = "sha256:e5bbe55e8552019c6463709b39634a5fc55e080d0827e2a3a11e18eb73f5cdbd"}, + {file = "SQLAlchemy-2.0.29-cp37-cp37m-win_amd64.whl", hash = "sha256:c2f9c762a2735600654c654bf48dad388b888f8ce387b095806480e6e4ff6907"}, + {file = "SQLAlchemy-2.0.29-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7e614d7a25a43a9f54fcce4675c12761b248547f3d41b195e8010ca7297c369c"}, + {file = "SQLAlchemy-2.0.29-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:471fcb39c6adf37f820350c28aac4a7df9d3940c6548b624a642852e727ea586"}, + {file = "SQLAlchemy-2.0.29-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:988569c8732f54ad3234cf9c561364221a9e943b78dc7a4aaf35ccc2265f1930"}, + {file = "SQLAlchemy-2.0.29-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dddaae9b81c88083e6437de95c41e86823d150f4ee94bf24e158a4526cbead01"}, + {file = "SQLAlchemy-2.0.29-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:334184d1ab8f4c87f9652b048af3f7abea1c809dfe526fb0435348a6fef3d380"}, + {file = "SQLAlchemy-2.0.29-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:38b624e5cf02a69b113c8047cf7f66b5dfe4a2ca07ff8b8716da4f1b3ae81567"}, + {file = "SQLAlchemy-2.0.29-cp38-cp38-win32.whl", hash = "sha256:bab41acf151cd68bc2b466deae5deeb9e8ae9c50ad113444151ad965d5bf685b"}, + {file = "SQLAlchemy-2.0.29-cp38-cp38-win_amd64.whl", hash = "sha256:52c8011088305476691b8750c60e03b87910a123cfd9ad48576d6414b6ec2a1d"}, + {file = "SQLAlchemy-2.0.29-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3071ad498896907a5ef756206b9dc750f8e57352113c19272bdfdc429c7bd7de"}, + {file = "SQLAlchemy-2.0.29-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:dba622396a3170974f81bad49aacebd243455ec3cc70615aeaef9e9613b5bca5"}, + {file = "SQLAlchemy-2.0.29-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7b184e3de58009cc0bf32e20f137f1ec75a32470f5fede06c58f6c355ed42a72"}, + {file = "SQLAlchemy-2.0.29-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c37f1050feb91f3d6c32f864d8e114ff5545a4a7afe56778d76a9aec62638ba"}, + {file = "SQLAlchemy-2.0.29-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bda7ce59b06d0f09afe22c56714c65c957b1068dee3d5e74d743edec7daba552"}, + {file = "SQLAlchemy-2.0.29-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:25664e18bef6dc45015b08f99c63952a53a0a61f61f2e48a9e70cec27e55f699"}, + {file = "SQLAlchemy-2.0.29-cp39-cp39-win32.whl", hash = "sha256:77d29cb6c34b14af8a484e831ab530c0f7188f8efed1c6a833a2c674bf3c26ec"}, + {file = "SQLAlchemy-2.0.29-cp39-cp39-win_amd64.whl", hash = "sha256:04c487305ab035a9548f573763915189fc0fe0824d9ba28433196f8436f1449c"}, + {file = "SQLAlchemy-2.0.29-py3-none-any.whl", hash = "sha256:dc4ee2d4ee43251905f88637d5281a8d52e916a021384ec10758826f5cbae305"}, + {file = "SQLAlchemy-2.0.29.tar.gz", hash = "sha256:bd9566b8e58cabd700bc367b60e90d9349cd16f0984973f98a9a09f9c64e86f0"}, ] [package.dependencies] @@ -2725,13 +2732,13 @@ doc = ["reno", "sphinx", "tornado (>=4.5)"] [[package]] name = "threadpoolctl" -version = "3.3.0" +version = "3.4.0" description = "threadpoolctl" optional = false python-versions = ">=3.8" files = [ - {file = "threadpoolctl-3.3.0-py3-none-any.whl", hash = "sha256:6155be1f4a39f31a18ea70f94a77e0ccd57dced08122ea61109e7da89883781e"}, - {file = "threadpoolctl-3.3.0.tar.gz", hash = "sha256:5dac632b4fa2d43f42130267929af3ba01399ef4bd1882918e92dbc30365d30c"}, + {file = "threadpoolctl-3.4.0-py3-none-any.whl", hash = "sha256:8f4c689a65b23e5ed825c8436a92b818aac005e0f3715f6a1664d7c7ee29d262"}, + {file = "threadpoolctl-3.4.0.tar.gz", hash = "sha256:f11b491a03661d6dd7ef692dd422ab34185d982466c49c8f98c8f716b5c93196"}, ] [[package]] @@ -2820,13 +2827,13 @@ opt-einsum = ["opt-einsum (>=3.3)"] [[package]] name = "torchmetrics" -version = "1.3.1" +version = "1.3.2" description = "PyTorch native Metrics" optional = false python-versions = ">=3.8" files = [ - {file = "torchmetrics-1.3.1-py3-none-any.whl", hash = "sha256:a44bd1edee629bbf463eb81bfba8300b3785d8b3b8d758bdcafa862b80955b4f"}, - {file = "torchmetrics-1.3.1.tar.gz", hash = "sha256:8d371f7597a1a5eb02d5f2ed59642d6fef09093926997ce91e18b1147cc8defa"}, + {file = "torchmetrics-1.3.2-py3-none-any.whl", hash = "sha256:44ca3a9f86dc050cb3f554836ef291698ea797778457195b4f685fce8e2e64a3"}, + {file = "torchmetrics-1.3.2.tar.gz", hash = "sha256:0a67694a4c4265eeb54cda741eaf5cb1f3a71da74b7e7e6215ad156c9f2379f6"}, ] [package.dependencies] @@ -2836,14 +2843,14 @@ packaging = ">17.1" torch = ">=1.10.0" [package.extras] -all = ["SciencePlots (>=2.0.0)", "ipadic (>=1.0.0)", "matplotlib (>=3.3.0)", "mecab-ko (>=1.0.0)", "mecab-ko-dic (>=1.0.0)", "mecab-python3 (>=1.0.6)", "mypy (==1.8.0)", "nltk (>=3.6)", "piq (<=0.8.0)", "pycocotools (>2.0.0)", "pystoi (>=0.3.0)", "regex (>=2021.9.24)", "scipy (>1.0.0)", "sentencepiece (>=0.1.98)", "torch (==2.2.0)", "torch-fidelity (<=0.4.0)", "torchaudio (>=0.10.0)", "torchvision (>=0.8)", "tqdm (>=4.41.0)", "transformers (>4.4.0)", "transformers (>=4.10.0)", "types-PyYAML", "types-emoji", "types-protobuf", "types-requests", "types-setuptools", "types-six", "types-tabulate"] +all = ["SciencePlots (>=2.0.0)", "ipadic (>=1.0.0)", "matplotlib (>=3.3.0)", "mecab-ko (>=1.0.0)", "mecab-ko-dic (>=1.0.0)", "mecab-python3 (>=1.0.6)", "mypy (==1.8.0)", "nltk (>=3.6)", "piq (<=0.8.0)", "pycocotools (>2.0.0)", "pystoi (>=0.3.0)", "regex (>=2021.9.24)", "scipy (>1.0.0)", "sentencepiece (>=0.2.0)", "torch (==2.2.1)", "torch-fidelity (<=0.4.0)", "torchaudio (>=0.10.0)", "torchvision (>=0.8)", "tqdm (>=4.41.0)", "transformers (>4.4.0)", "transformers (>=4.10.0)", "types-PyYAML", "types-emoji", "types-protobuf", "types-requests", "types-setuptools", "types-six", "types-tabulate"] audio = ["pystoi (>=0.3.0)", "torchaudio (>=0.10.0)"] detection = ["pycocotools (>2.0.0)", "torchvision (>=0.8)"] -dev = ["SciencePlots (>=2.0.0)", "bert-score (==0.3.13)", "dython (<=0.7.5)", "fairlearn", "fast-bss-eval (>=0.1.0)", "faster-coco-eval (>=1.3.3)", "huggingface-hub (<0.21)", "ipadic (>=1.0.0)", "jiwer (>=2.3.0)", "kornia (>=0.6.7)", "lpips (<=0.1.4)", "matplotlib (>=3.3.0)", "mecab-ko (>=1.0.0)", "mecab-ko-dic (>=1.0.0)", "mecab-python3 (>=1.0.6)", "mir-eval (>=0.6)", "monai (==1.3.0)", "mypy (==1.8.0)", "netcal (>1.0.0)", "nltk (>=3.6)", "numpy (<1.25.0)", "pandas (>1.0.0)", "pandas (>=1.4.0)", "piq (<=0.8.0)", "pycocotools (>2.0.0)", "pystoi (>=0.3.0)", "pytorch-msssim (==1.0.0)", "regex (>=2021.9.24)", "rouge-score (>0.1.0)", "sacrebleu (>=2.3.0)", "scikit-image (>=0.19.0)", "scipy (>1.0.0)", "sentencepiece (>=0.1.98)", "sewar (>=0.4.4)", "statsmodels (>0.13.5)", "torch (==2.2.0)", "torch-complex (<=0.4.3)", "torch-fidelity (<=0.4.0)", "torchaudio (>=0.10.0)", "torchvision (>=0.8)", "tqdm (>=4.41.0)", "transformers (>4.4.0)", "transformers (>=4.10.0)", "types-PyYAML", "types-emoji", "types-protobuf", "types-requests", "types-setuptools", "types-six", "types-tabulate"] +dev = ["SciencePlots (>=2.0.0)", "bert-score (==0.3.13)", "dython (<=0.7.5)", "fairlearn", "fast-bss-eval (>=0.1.0)", "faster-coco-eval (>=1.3.3)", "huggingface-hub (<0.22)", "ipadic (>=1.0.0)", "jiwer (>=2.3.0)", "kornia (>=0.6.7)", "lpips (<=0.1.4)", "matplotlib (>=3.3.0)", "mecab-ko (>=1.0.0)", "mecab-ko-dic (>=1.0.0)", "mecab-python3 (>=1.0.6)", "mir-eval (>=0.6)", "monai (==1.3.0)", "mypy (==1.8.0)", "netcal (>1.0.0)", "nltk (>=3.6)", "numpy (<1.25.0)", "pandas (>1.0.0)", "pandas (>=1.4.0)", "piq (<=0.8.0)", "pycocotools (>2.0.0)", "pystoi (>=0.3.0)", "pytorch-msssim (==1.0.0)", "regex (>=2021.9.24)", "rouge-score (>0.1.0)", "sacrebleu (>=2.3.0)", "scikit-image (>=0.19.0)", "scipy (>1.0.0)", "sentencepiece (>=0.2.0)", "sewar (>=0.4.4)", "statsmodels (>0.13.5)", "torch (==2.2.1)", "torch-complex (<=0.4.3)", "torch-fidelity (<=0.4.0)", "torchaudio (>=0.10.0)", "torchvision (>=0.8)", "tqdm (>=4.41.0)", "transformers (>4.4.0)", "transformers (>=4.10.0)", "types-PyYAML", "types-emoji", "types-protobuf", "types-requests", "types-setuptools", "types-six", "types-tabulate"] image = ["scipy (>1.0.0)", "torch-fidelity (<=0.4.0)", "torchvision (>=0.8)"] multimodal = ["piq (<=0.8.0)", "transformers (>=4.10.0)"] -text = ["ipadic (>=1.0.0)", "mecab-ko (>=1.0.0)", "mecab-ko-dic (>=1.0.0)", "mecab-python3 (>=1.0.6)", "nltk (>=3.6)", "regex (>=2021.9.24)", "sentencepiece (>=0.1.98)", "tqdm (>=4.41.0)", "transformers (>4.4.0)"] -typing = ["mypy (==1.8.0)", "torch (==2.2.0)", "types-PyYAML", "types-emoji", "types-protobuf", "types-requests", "types-setuptools", "types-six", "types-tabulate"] +text = ["ipadic (>=1.0.0)", "mecab-ko (>=1.0.0)", "mecab-ko-dic (>=1.0.0)", "mecab-python3 (>=1.0.6)", "nltk (>=3.6)", "regex (>=2021.9.24)", "sentencepiece (>=0.2.0)", "tqdm (>=4.41.0)", "transformers (>4.4.0)"] +typing = ["mypy (==1.8.0)", "torch (==2.2.1)", "types-PyYAML", "types-emoji", "types-protobuf", "types-requests", "types-setuptools", "types-six", "types-tabulate"] visual = ["SciencePlots (>=2.0.0)", "matplotlib (>=3.3.0)"] [[package]] @@ -3111,5 +3118,5 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" -python-versions = ">=3.9, <3.11" -content-hash = "975625efdd8f132df288096d8e51b302f49513b35566fc1e7cd1ae3fdb5ef741" +python-versions = ">=3.9, <3.12" +content-hash = "c46f4647f117a5280e8b74844b45f4ad00d5d93b7b95640b0b1a58728b8ec71d" diff --git a/pretrain.py b/pretrain.py index 55374f8..c123fd4 100644 --- a/pretrain.py +++ b/pretrain.py @@ -13,16 +13,16 @@ from sklearn.model_selection import train_test_split from torch.utils.data import DataLoader -from lib.data import PretrainDataset -from lib.tokenizer import ConceptTokenizer -from lib.utils import ( +from odyssey.data.dataset import PretrainDataset +from odyssey.data.tokenizer import ConceptTokenizer +from odyssey.models.cehr_big_bird.model import BigBirdPretrain +from odyssey.models.cehr_bert.model import BertPretrain +from odyssey.models.utils import ( get_run_id, load_config, load_pretrain_data, seed_everything, ) -from models.big_bird_cehr.model import BigBirdPretrain -from models.cehr_bert.model import BertPretrain def main(args: Dict[str, Any], model_config: Dict[str, Any]) -> None: diff --git a/pyproject.toml b/pyproject.toml index bb1ad82..f0b1939 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,15 +1,18 @@ [tool.poetry] name = "odyssey" version = "0.1.0" -description = "A library for developing generalizable patient representation models" -authors = ["Vector AI Engineering "] +description = "A toolkit to develop clinical foundation models using electronic health records" +authors = ["Vector AI Engineering "] license = "Apache-2.0" repository = "https://github.com/VectorInstitute/odyssey" documentation = "https://vectorinstitute.github.io/odyssey/" +packages = [ + { include = "odyssey" }, +] readme = "README.md" [tool.poetry.dependencies] -python = ">=3.9, <3.11" +python = ">=3.9, <3.12" keras = "2.15.0" lightning = "2.2.0" scikit-learn = "^1.2.2" @@ -102,7 +105,7 @@ ignore = [ "__init__.py" = ["E402", "F401", "F403", "F811"] [tool.ruff.lint.pep8-naming] -ignore-names = ["X*", "setUp"] +ignore-names = ["X*", "setUp", "tearDown"] [tool.ruff.lint.isort] lines-after-imports = 2 @@ -113,6 +116,11 @@ convention = "numpy" [tool.ruff.lint.pycodestyle] max-doc-length = 88 +[tool.coverage] + [tool.coverage.run] + source=["odyssey"] + omit=["tests/*", "*__init__.py"] + [build-system] requires = ["poetry-core>=1.0.0"] build-backend = "poetry.core.masonry.api" diff --git a/tests/data/test_collect.py b/tests/odyssey/data/mimiciv/test_collect.py similarity index 89% rename from tests/data/test_collect.py rename to tests/odyssey/data/mimiciv/test_collect.py index 226c76d..31852c0 100644 --- a/tests/data/test_collect.py +++ b/tests/odyssey/data/mimiciv/test_collect.py @@ -4,7 +4,11 @@ import shutil from unittest import TestCase -from data.collect import DATA_COLLECTION_CONFIG, PATIENT, FHIRDataCollector +from odyssey.data.mimiciv.collect import ( + DATA_COLLECTION_CONFIG, + PATIENT, + FHIRDataCollector, +) class TestFHIRDataCollector(TestCase): @@ -20,7 +24,7 @@ def setUp(self) -> None: buffer_size=10, ) - def tearDown(self) -> None: + def tearDown(self) -> None: # noqa: N802 """Tear down FHIRDataCollector.""" if os.path.exists(self.save_dir): shutil.rmtree(self.save_dir)