diff --git a/cleaning_extracting_data_for_2015-2019.ipynb b/cleaning_extracting_data_for_2015-2019.ipynb index 23b0cf7..74801d6 100644 --- a/cleaning_extracting_data_for_2015-2019.ipynb +++ b/cleaning_extracting_data_for_2015-2019.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -13,7 +13,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -27,7 +27,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -37,7 +37,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -48,7 +48,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -89,7 +89,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -101,7 +101,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -111,7 +111,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -182,7 +182,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -347,7 +347,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -359,20 +359,7 @@ "278 TEHAMA DALE 2028-07-09 2018-07-09 856 0 0 \n", "\n", " Deaths_FF Deaths_Civil Duration \n", - "278 0 0 -3652.0 \n", - " County Fire Name Start Contained Acres Strux_Destr Strux_Dmgd \\\n", - "26 MONO VAN DYKE 2015-02-06 2015-02-10 509 0 0 \n", - "0 INYO ROUND 2015-02-06 2015-02-13 7,000 43 5 \n", - "1 RIVERSIDE HIGHWAY 2015-04-18 2015-04-24 1,049 0 0 \n", - "27 SAN DIEGO CARL 2015-04-28 2015-04-29 4,000 0 0 \n", - "28 SAN DIEGO MORTAR 2015-04-28 2015-04-29 800 0 0 \n", - "\n", - " Deaths_FF Deaths_Civil Duration \n", - "26 0 0 5.0 \n", - "0 0 0 8.0 \n", - "1 0 0 7.0 \n", - "27 0 0 2.0 \n", - "28 0 0 2.0 \n" + "278 0 0 -3652.0 \n" ] } ], @@ -397,16 +384,55 @@ " print(\"No rows found where 'Start' is after 'Cont.'\")\n", "\n", "# Update 'Start' to be equal to 'Cont.' where 'Start' is after 'Cont.' as in it seems like simplie mistype in the year\n", - "fires_2015_2019_filtered.loc[fires_2015_2019_filtered['Start'] > fires_2015_2019_filtered['Contained'], 'Start'] = fires_2015_2019_filtered['Contained']\n", - "\n", + "fires_2015_2019_filtered.loc[fires_2015_2019_filtered['Start'] > fires_2015_2019_filtered['Contained'], 'Start'] = fires_2015_2019_filtered['Contained']\n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "No errors were found in fire duration count.\n", + " County Fire Name Start Contained Acres Strux_Destr Strux_Dmgd \\\n", + "26 MONO VAN DYKE 2015-02-06 2015-02-10 509 0 0 \n", + "0 INYO ROUND 2015-02-06 2015-02-13 7,000 43 5 \n", + "1 RIVERSIDE HIGHWAY 2015-04-18 2015-04-24 1,049 0 0 \n", + "27 SAN DIEGO CARL 2015-04-28 2015-04-29 4,000 0 0 \n", + "28 SAN DIEGO MORTAR 2015-04-28 2015-04-29 800 0 0 \n", + "\n", + " Deaths_FF Deaths_Civil Duration \n", + "26 0 0 4 days \n", + "0 0 0 7 days \n", + "1 0 0 6 days \n", + "27 0 0 1 days \n", + "28 0 0 1 days \n" + ] + } + ], + "source": [ "# Assign the cleaned data to a new variable\n", "fires_2015_2019_cleaned = fires_2015_2019_filtered\n", "\n", + "# based on incorred 'Start\" date corrected the calculation of duration of the fire\n", + "fires_2015_2019_cleaned['Duration'] = fires_2015_2019_cleaned['Contained'] - fires_2015_2019_cleaned['Start']\n", + "\n", + "# Display the rows where 'Duration' is negative\n", + "negative_duration = fires_2015_2019_cleaned[fires_2015_2019_cleaned['Duration'] < pd.Timedelta(0)]\n", + "if not negative_duration.empty:\n", + " print(\"Duration is incorrect:\")\n", + " print(negative_duration)\n", + "else:\n", + " print(\"No errors were found in fire duration count.\")\n", + "\n", "# Save a cleaned data to \"Outputs\" folder\n", "fires_2015_2019_cleaned.to_csv('Outputs/fires_2015_2019_cleaned.csv', index=False)\n", "\n", "# Optionally, print the cleaned data to check\n", - "print(fires_2015_2019_cleaned.head())\n" + "print(fires_2015_2019_cleaned.head())" ] } ],