diff --git a/scripts/us_cdc/environmental_health_toxicology/parse_air_quality.py b/scripts/us_cdc/environmental_health_toxicology/parse_air_quality.py index 1c658b3d8a..943973a299 100644 --- a/scripts/us_cdc/environmental_health_toxicology/parse_air_quality.py +++ b/scripts/us_cdc/environmental_health_toxicology/parse_air_quality.py @@ -58,7 +58,9 @@ def main(): """Main function to generate the cleaned csv file.""" + print(sys.argv[1]) file_path = sys.argv[1] + print("filepath--------", file_path) output_file = sys.argv[2] clean_air_quality_data(file_path, output_file) @@ -73,12 +75,9 @@ def clean_air_quality_data(file_path, output_file): """ print("Cleaning file...") data = pd.read_csv(file_path) - if "Ozone" in file_path and "County" in file_path: - data["Month"] = data["Month"].map(MONTH_MAP) - data["date"] = pd.to_datetime(data[["Year", "Month", "Day"]], - yearfirst=True) - else: - data["date"] = pd.to_datetime(data["date"], yearfirst=True) + data["date"] = pd.to_datetime(data["date"], yearfirst=True) + data["statefips"] = data["statefips"].astype(str).str.zfill(2) + if "PM2.5" in file_path: census_tract = "DS_PM" elif "Ozone" in file_path: @@ -96,11 +95,11 @@ def clean_air_quality_data(file_path, output_file): data["dcid"] = "geoId/" + data["ctfips"].astype(str) data['StatisticalVariable'] = data['StatisticalVariable'].map(STATVARS) elif "County" in file_path and "PM" in file_path: - data["countyfips"] = "1200" + data["countyfips"].astype(str) - data["dcid"] = "geoId/" + data["countyfips"].astype(str) + data["countyfips"] = data["countyfips"].astype(str).str.zfill(3) + data["dcid"] = "geoId/" + data["statefips"] + data["countyfips"] elif "County" in file_path and "Ozone" in file_path: - data["countyfips"] = "1200" + data["countyfips"].astype(str) - data["dcid"] = "geoId/" + data["countyfips"].astype(str) + data["countyfips"] = data["countyfips"].astype(str).str.zfill(3) + data["dcid"] = "geoId/" + data["statefips"] + data["countyfips"] data.to_csv(output_file, float_format='%.6f', index=False) print("Finished cleaning file!")