From 7c755dbd5ae263cb5ca0b434cf73df72744d5768 Mon Sep 17 00:00:00 2001 From: kkrusere Date: Mon, 23 Oct 2023 13:18:21 -0400 Subject: [PATCH] Update --- changes_documentation.md | 30 ++++++++++ docs/index.md | 58 +++++++++++++++++++ issues/issues.md | 0 .../nhanes_data/nhanes_data_api.py | 46 ++++++++------- nhanes_pytool_api/setup.py | 7 ++- 5 files changed, 117 insertions(+), 24 deletions(-) create mode 100644 changes_documentation.md delete mode 100644 issues/issues.md diff --git a/changes_documentation.md b/changes_documentation.md new file mode 100644 index 0000000..b0f97f1 --- /dev/null +++ b/changes_documentation.md @@ -0,0 +1,30 @@ +# Changes Documentation + +## NHANES pyTOOL API for Version 0.1.0 to 0.1.1 + +### Class: NHANESDataAPI + +### Added Features: +- **Private Class Attributes:** Made `cycle_list` and `data_category_list` private class attributes by adding double underscores before their names. This ensures they are only accessible within the class. + +### Modified Methods: +- **`list_file_names(data_category, cycle_years=None)` Method:** Updated the method to handle private class attributes by using the double underscores before `cycle_list` and `data_category_list` names within the class. + +### New Methods: + +- **`_check_cycle(input_cycle)` and `_check_in_between_cycle(start_year, end_year)` Private Methods:** Added private methods to check the validity of input cycle years and return valid cycles based on input. These methods handle single cycle years, lists of single cycle years, cycle ranges, and lists of cycle ranges as acceptable input formats. + +- **`_get_data_filename(data_category, cycle_year, data_file_description)` Private Method:** Added a private method to get the data file name for a specific cycle year and data file description. It handles private class attributes by using the double underscores before `cycle_list` and `data_category_list` names within the class. + +### Updated Documentation: +- **Class Description:** Added detailed class description including constructor information, available attributes, and methods with their descriptions. + +- **Method Descriptions:** Updated method descriptions to provide clear explanations of the functionality, parameters, and return values of each method. + +- **Exception Handling:** Documented exceptions raised in methods and provided explanations for possible errors. + +- **Input Formats:** Explained acceptable input formats for methods that require cycle years input, making it clearer for users how to provide valid input. + +- **GitHub Integration:** Provided instructions on how to create a GitHub repository, clone it, add a documentation file (`changes_documentation.md`), and push changes to GitHub. + +These changes enhance the functionality of the `NHANESDataAPI` class, improve code organization, and provide comprehensive documentation for users. Users can now easily understand the class's purpose, available methods, and how to interact with the API effectively. diff --git a/docs/index.md b/docs/index.md index ae826ed..33996be 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,5 +1,7 @@ ## NHANES Data API Documentation +**Attention:** Please take a look at the [Disclaimer](#disclaimer) before using the tool + ### Table of Contents 1. [Introduction](#introduction) 2. [Getting Started](#getting-started) @@ -336,3 +338,59 @@ Contributions are welcome! For major changes, please open an issue first to disc This project is licensed under the MIT License - see the [LICENSE](https://github.com/kkrusere/NHANES-pyTOOL-API/blob/main/LICENSE.txt) file for details. + + +## Disclaimer + +**Current Limitation** + +The NHANES pyTOOL API is designed to work exclusively with NHANES data from pre-pandemic cycle years (1999-2000 to 2017-2018). Please note that the tool does not currently support NHANES data from the COVID-19 pandemic and post-pandemic eras. + +**Reasoning** + +The COVID-19 pandemic has had a significant impact on NHANES data collection. + +**NHANES data collection during the COVID-19 pandemic** + +In March 2020, the NHANES program suspended field operations due to the COVID-19 pandemic. This meant that data collection for the 2019-2020 cycle was not completed. As a result, data collected from 2019 to March 2020 were combined with data from the 2017-2018 cycle to form a nationally representative sample of NHANES 2017-March 2020 pre-pandemic data. + +Data collection for the 2019-2022 cycle resumed in September 2020, but with some modifications to reduce the risk of COVID-19 transmission. These modifications included: + +* Reducing the number of participants recruited from each location. +* Implementing additional safety protocols at the MEC, such as requiring masks and social distancing. +* Offering participants the option to complete the interview and physical examination remotely. + +**Impact of the COVID-19 pandemic on NHANES data** + +The COVID-19 pandemic has had a number of impacts on NHANES data, including: + +* **Reduced sample size:** The sample size for the 2019-2022 cycle is smaller than the sample size for previous cycles. This is due to the suspension of field operations in 2020 and the modifications implemented to reduce the risk of COVID-19 transmission. +* **Increased variability:** The variability of NHANES estimates may be higher for the 2019-2022 cycle and future cycles due to the smaller sample size and the modifications to data collection procedures. +* **Data gaps:** There are some data gaps for the 2019-2020 cycle, such as data on COVID-19 infection and vaccination status. These data gaps will be filled as data from the 2019-2022 cycle and future cycles is released. + + +These changes to NHANES data collection make it more difficult to develop and maintain a tool that can reliably work with data from both the pre-pandemic and pandemic eras. For this reason, we have decided to focus on supporting NHANES data from the pre-pandemic cycle years in the first version of the NHANES pyTOOL API. + + +**Future Plans** + +We are committed to making the NHANES pyTOOL API the most comprehensive and user-friendly tool for working with NHANES data. To this end, we plan to add support for NHANES data from the COVID-19 pandemic and post-pandemic eras in future versions of the tool. We will also continue to monitor the NHANES data collection process and make updates to the tool as needed. + +**Downloading Data** + +If you require NHANES data from during the COVID-19 pandemic and post-pandemic eras (2019-2020 +), you can manually download it from the NHANES webpage. Visit the following link: [NHANES Data Download](https://wwwn.cdc.gov/nchs/nhanes/continuousnhanes/default.aspx). + +**More Information** + +For additional information about NHANES, please visit the official NHANES website: [NHANES Information](https://www.cdc.gov/nchs/nhanes/index.htm). + +**User Support** + +If you have questions or need support related to the NHANES pyTOOL API, please feel free to contact us. + +**Legal Notices** + +Before using NHANES data, please ensure that you comply with all legal and usage restrictions associated with NHANES datasets. Please be aware of your responsibilities regarding data usage and distribution. + +We apologize for any inconvenience this may cause, and we appreciate your understanding. + diff --git a/issues/issues.md b/issues/issues.md deleted file mode 100644 index e69de29..0000000 diff --git a/nhanes_pytool_api/nhanes_data/nhanes_data_api.py b/nhanes_pytool_api/nhanes_data/nhanes_data_api.py index 9fc5374..8582005 100644 --- a/nhanes_pytool_api/nhanes_data/nhanes_data_api.py +++ b/nhanes_pytool_api/nhanes_data/nhanes_data_api.py @@ -11,25 +11,25 @@ class NHANESDataAPI: data_directory (str, optional): The directory where data will be stored or retrieved. Defaults to 'data/'. Attributes: - cycle_list (list of str): A list of available NHANES cycle years. - data_category_list (list of str): A list of available NHANES data categories. + __cycle_list (list of str): A list of available NHANES cycle years. + __data_category_list (list of str): A list of available NHANES data categories. Methods: - list_data_categories(): List the available NHANES data categories. - list_cycle_years(): List the available NHANES cycle years. - _retrieve_variable_table(data_category): Retrieve the variable table for a specific data category. - list_file_names(data_category, cycle_years=None): Get a list of unique values in the 'Data File Description' column for a specific data category and optional cycle years. - - retrieve_cycle_data_file_name_mapping(data_category, file_name): Retrieve a dictionary of years and Data File Names based on a given "Data File Description." + - retrieve_cycle_data_file_name_mapping(variable_table, file_name): Retrieve a dictionary of years and Data File Names based on a given "Data File Description." - _check_cycle(input_cycle): Check the validity of a cycle and return valid cycle(s) based on input. - _check_in_between_cycle(start_year, end_year): Check for valid cycles within a range. - _get_data_filename(data_category, cycle_year, data_file_description): Get the data file name for a specific cycle year and data file description. - get_common_and_uncommon_variables(data_category, cycle_years): Find common and uncommon variables across multiple cycle years for a specific data category. - - retrieve_data(data_category, cycle, filename, include_uncommon_variables=True, specific_variables=None): Retrieve data for a specific data category, cycle year(s), and data file description. + - retrieve_data(data_category, cycle, filename, include_uncommon_variables=True): Retrieve data for a specific data category, cycle year(s), and data file description. - join_data_files(cycle_year, data_category1, file_name1, data_category2, file_name2, include_uncommon_variables=True): Join two data files from specified data categories and file names based on the common variable SEQN. """ - cycle_list = [ + __cycle_list = [ '1999-2000', '2001-2002', '2003-2004', @@ -42,7 +42,7 @@ class NHANESDataAPI: '2017-2018' ] - data_category_list = [ + __data_category_list = [ "demographics", "dietary", "examination", @@ -67,7 +67,7 @@ def list_data_categories(self): Returns: list: List of available data categories. """ - return self.data_category_list + return self.__data_category_list def list_cycle_years(self): """ @@ -76,7 +76,9 @@ def list_cycle_years(self): Returns: list: List of available cycle years. """ - return self.cycle_list + return self.__cycle_list + + def _retrieve_variable_table(self, data_category): """ @@ -103,7 +105,7 @@ def _retrieve_variable_table(self, data_category): if "Begin Year" in variable_table.columns and "EndYear" in variable_table.columns: variable_table["Years"] = variable_table.apply(lambda row: f"{row['Begin Year']}-{row['EndYear']}", axis=1) variable_table.drop(["Begin Year", "EndYear", "Component", "Use Constraints"], axis=1, inplace=True) - variable_table = variable_table.loc[variable_table["Years"].isin(self.cycle_list)] + variable_table = variable_table.loc[variable_table["Years"].isin(self.__cycle_list)] if variable_table.empty: # If no matching cycle years are found, return None @@ -213,10 +215,10 @@ def _check_cycle(self, input_cycle): if '-' in cycle: start_year, end_year = cycle.split('-') valid_cycles.extend(self._check_in_between_cycle(start_year, end_year)) - elif cycle in self.cycle_list: + elif cycle in self.__cycle_list: valid_cycles.append(cycle) else: - for cycle_year in self.cycle_list: + for cycle_year in self.__cycle_list: if cycle in cycle_year: valid_cycles.append(cycle_year) @@ -239,7 +241,7 @@ def _check_in_between_cycle(self, start_year, end_year): valid_cycles = [] found_start = False - for cycle in self.cycle_list: + for cycle in self.__cycle_list: if start_year in cycle: found_start = True if found_start: @@ -304,7 +306,7 @@ def get_common_and_uncommon_variables(self, data_category, cycle_years): valid_cycles = valid_cycles + self._check_cycle(cycle) if valid_cycles == []: - raise ValueError(f"You have entered an Invalid cycle. Below is a list of valid cycles: \n {self.cycle_list}") + raise ValueError(f"You have entered an Invalid cycle. Below is a list of valid cycles: \n {self.__cycle_list}") if len(valid_cycles) < 2: raise ValueError("There is only one cycle here. This function can only be performed for 2 or more cycle years.") @@ -409,23 +411,23 @@ def retrieve_data(self, data_category, cycle, filename, include_uncommon_variabl Raises: Exception: If there is an error retrieving the data. """ - cycle_list = self._check_cycle(cycle) - if not cycle_list: + temp_cycle_list = self._check_cycle(cycle) + if not temp_cycle_list: raise ValueError("Invalid cycle input.") - if len(cycle_list) == 1: - data_file_name = self._get_data_filename(data_category, cycle_list[0], filename) + if len(temp_cycle_list) == 1: + data_file_name = self._get_data_filename(data_category, temp_cycle_list[0], filename) if data_file_name is None: - raise ValueError(f"No data file found for Data Category: {data_category}, Year: {cycle_list[0]}, Data File Description: {filename}") - data = pd.read_sas(f"https://wwwn.cdc.gov/Nchs/Nhanes/{cycle_list[0]}/{data_file_name}.XPT") - data['year'] = cycle_list[0] + raise ValueError(f"No data file found for Data Category: {data_category}, Year: {temp_cycle_list[0]}, Data File Description: {filename}") + data = pd.read_sas(f"https://wwwn.cdc.gov/Nchs/Nhanes/{temp_cycle_list[0]}/{data_file_name}.XPT") + data['year'] = temp_cycle_list[0] return data data_frames = [] # List to store individual data frames from different cycles - common_variables, uncommon_variables, _ = self.get_common_and_uncommon_variables(data_category, cycle_list) + common_variables, uncommon_variables, _ = self.get_common_and_uncommon_variables(data_category, temp_cycle_list) - for cycle_year in cycle_list: + for cycle_year in temp_cycle_list: try: data_file_name = self._get_data_filename(data_category, cycle_year, filename) if data_file_name is None: diff --git a/nhanes_pytool_api/setup.py b/nhanes_pytool_api/setup.py index 8a9ef44..c1e72b9 100644 --- a/nhanes_pytool_api/setup.py +++ b/nhanes_pytool_api/setup.py @@ -6,7 +6,7 @@ setup( name="nhanes_pytool_api", - version="0.1.0", + version="0.1.1", author="Kuzi Rusere", author_email="kkrusere@gmail.com", description="A tool for programmatic access to NHANES downloadable datasets", @@ -30,5 +30,8 @@ extras_require={ 'test': ['pytest'] }, - project_urls={"Documentation": "https://kkrusere.github.io/NHANES-pyTOOL-API/"} + project_urls={ + "Documentation": "https://kkrusere.github.io/NHANES-pyTOOL-API/", + "Bug Tracker": "https://github.com/kkrusere/NHANES-pyTOOL-API/issues" + } )