From 70a4a76d6cc88f2c47d65c04f42d903764e64851 Mon Sep 17 00:00:00 2001 From: Aaron Cruz Date: Wed, 26 Apr 2023 22:31:19 -0400 Subject: [PATCH] Add files via upload --- Python-Basic-Data-Cleaning.ipynb | 451 ++----------------------------- 1 file changed, 18 insertions(+), 433 deletions(-) diff --git a/Python-Basic-Data-Cleaning.ipynb b/Python-Basic-Data-Cleaning.ipynb index 5f422d1..81b7df3 100644 --- a/Python-Basic-Data-Cleaning.ipynb +++ b/Python-Basic-Data-Cleaning.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "1e82ccd2", + "id": "a1dd91f1", "metadata": {}, "source": [ "# Python Basic Data Cleaning\n", @@ -12,7 +12,7 @@ { "cell_type": "code", "execution_count": 2, - "id": "8173d467", + "id": "dd24ccc0", "metadata": {}, "outputs": [], "source": [ @@ -23,7 +23,7 @@ { "cell_type": "code", "execution_count": 3, - "id": "1bf13012", + "id": "e75c7c42", "metadata": {}, "outputs": [ { @@ -237,7 +237,7 @@ { "cell_type": "code", "execution_count": 4, - "id": "873574d0", + "id": "89727911", "metadata": {}, "outputs": [], "source": [ @@ -252,7 +252,7 @@ { "cell_type": "code", "execution_count": 5, - "id": "b6d54339", + "id": "122bfa3b", "metadata": {}, "outputs": [], "source": [ @@ -272,7 +272,7 @@ { "cell_type": "code", "execution_count": 6, - "id": "34d08c1e", + "id": "b962446e", "metadata": {}, "outputs": [ { @@ -490,7 +490,7 @@ { "cell_type": "code", "execution_count": 7, - "id": "581d7f08", + "id": "5f2a4bac", "metadata": {}, "outputs": [], "source": [ @@ -502,7 +502,7 @@ { "cell_type": "code", "execution_count": 8, - "id": "1d84d2aa", + "id": "19f80a57", "metadata": {}, "outputs": [ { @@ -747,7 +747,7 @@ { "cell_type": "code", "execution_count": 9, - "id": "5aa8bedd", + "id": "5dba9a9c", "metadata": {}, "outputs": [ { @@ -800,7 +800,7 @@ { "cell_type": "code", "execution_count": 10, - "id": "5e6aa7c0", + "id": "791a379b", "metadata": {}, "outputs": [], "source": [ @@ -811,7 +811,7 @@ { "cell_type": "code", "execution_count": 11, - "id": "26c83e4f", + "id": "dd76c23f", "metadata": {}, "outputs": [ { @@ -1224,7 +1224,7 @@ { "cell_type": "code", "execution_count": 12, - "id": "98919531", + "id": "d0ab5134", "metadata": {}, "outputs": [ { @@ -1277,7 +1277,7 @@ { "cell_type": "code", "execution_count": 78, - "id": "8080c34a", + "id": "76350710", "metadata": {}, "outputs": [ { @@ -1333,7 +1333,7 @@ { "cell_type": "code", "execution_count": 13, - "id": "6c26e24d", + "id": "52ff9cea", "metadata": {}, "outputs": [ { @@ -1384,7 +1384,7 @@ { "cell_type": "code", "execution_count": 14, - "id": "a341d5b0", + "id": "a7531b07", "metadata": {}, "outputs": [], "source": [ @@ -1395,7 +1395,7 @@ { "cell_type": "code", "execution_count": 15, - "id": "a1582e57", + "id": "23aac6d1", "metadata": {}, "outputs": [ { @@ -1446,7 +1446,7 @@ { "cell_type": "code", "execution_count": 24, - "id": "2f875bc1", + "id": "4443cdeb", "metadata": {}, "outputs": [ { @@ -1860,425 +1860,10 @@ "df.replace('NWAmes', 'Northwest Ames')" ] }, - { - "cell_type": "code", - "execution_count": 25, - "id": "b2e95c2f", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
OrderMS SubClassMS ZoningLot FrontageLot AreaUtilitiesLot ConfigLand SlopeNeighborhoodBldg Type...Gr Liv AreaBedroom AbvGrTotRms AbvGrdPaved DriveWood Deck SFFenceMo SoldYr SoldSale TypeSalePrice
PID
5273021102020RL85.013175AllPubInsideGtlNorthwest Ames1Fam...207337Y349MnPrv22010WD210000
5273581402120RL105.011751AllPubInsideGtlNorthwest Ames1Fam...184437Y0MnPrv12010COD190000
5273582002285RL85.010625AllPubInsideGtlNorthwest Ames1Fam...117336Y0MnPrv12010WD170000
53415210011560RL80.010421AllPubInsideGtlNorthwest Ames1Fam...171437Y228MnPrv32010WD196500
53417623011760RL80.09600AllPubInsideGtlNorthwest Ames1Fam...164547Y0GdWo62010WD171000
..................................................................
902204120265350RM50.06000AllPubInsideGtlOld Town1Fam...156657Y24MnPrv82006WD139000
902206090265550RM56.09576AllPubInsideGtlOld Town1Fam...118235Y0GdWo52006WD120000
902400110266775RM90.022950AllPubInsideGtlOld Town1Fam...3608412Y0GdPrv62006WD475000
9032290402675190RM65.07800AllPubInsideGtlOld Town2fmCon...111835Y0MnPrv52006WD119900
903234030268330RM50.06000AllPubInsideGtlOld Town1Fam...110325Y166MnPrv72006WD110500
\n", - "

87 rows × 31 columns

\n", - "
" - ], - "text/plain": [ - " Order MS SubClass MS Zoning Lot Frontage Lot Area Utilities \\\n", - "PID \n", - "527302110 20 20 RL 85.0 13175 AllPub \n", - "527358140 21 20 RL 105.0 11751 AllPub \n", - "527358200 22 85 RL 85.0 10625 AllPub \n", - "534152100 115 60 RL 80.0 10421 AllPub \n", - "534176230 117 60 RL 80.0 9600 AllPub \n", - "... ... ... ... ... ... ... \n", - "902204120 2653 50 RM 50.0 6000 AllPub \n", - "902206090 2655 50 RM 56.0 9576 AllPub \n", - "902400110 2667 75 RM 90.0 22950 AllPub \n", - "903229040 2675 190 RM 65.0 7800 AllPub \n", - "903234030 2683 30 RM 50.0 6000 AllPub \n", - "\n", - " Lot Config Land Slope Neighborhood Bldg Type ... Gr Liv Area \\\n", - "PID ... \n", - "527302110 Inside Gtl Northwest Ames 1Fam ... 2073 \n", - "527358140 Inside Gtl Northwest Ames 1Fam ... 1844 \n", - "527358200 Inside Gtl Northwest Ames 1Fam ... 1173 \n", - "534152100 Inside Gtl Northwest Ames 1Fam ... 1714 \n", - "534176230 Inside Gtl Northwest Ames 1Fam ... 1645 \n", - "... ... ... ... ... ... ... \n", - "902204120 Inside Gtl Old Town 1Fam ... 1566 \n", - "902206090 Inside Gtl Old Town 1Fam ... 1182 \n", - "902400110 Inside Gtl Old Town 1Fam ... 3608 \n", - "903229040 Inside Gtl Old Town 2fmCon ... 1118 \n", - "903234030 Inside Gtl Old Town 1Fam ... 1103 \n", - "\n", - " Bedroom AbvGr TotRms AbvGrd Paved Drive Wood Deck SF Fence \\\n", - "PID \n", - "527302110 3 7 Y 349 MnPrv \n", - "527358140 3 7 Y 0 MnPrv \n", - "527358200 3 6 Y 0 MnPrv \n", - "534152100 3 7 Y 228 MnPrv \n", - "534176230 4 7 Y 0 GdWo \n", - "... ... ... ... ... ... \n", - "902204120 5 7 Y 24 MnPrv \n", - "902206090 3 5 Y 0 GdWo \n", - "902400110 4 12 Y 0 GdPrv \n", - "903229040 3 5 Y 0 MnPrv \n", - "903234030 2 5 Y 166 MnPrv \n", - "\n", - " Mo Sold Yr Sold Sale Type SalePrice \n", - "PID \n", - "527302110 2 2010 WD 210000 \n", - "527358140 1 2010 COD 190000 \n", - "527358200 1 2010 WD 170000 \n", - "534152100 3 2010 WD 196500 \n", - "534176230 6 2010 WD 171000 \n", - "... ... ... ... ... \n", - "902204120 8 2006 WD 139000 \n", - "902206090 5 2006 WD 120000 \n", - "902400110 6 2006 WD 475000 \n", - "903229040 5 2006 WD 119900 \n", - "903234030 7 2006 WD 110500 \n", - "\n", - "[87 rows x 31 columns]" - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df" - ] - }, { "cell_type": "code", "execution_count": 19, - "id": "071db539", + "id": "3977747d", "metadata": {}, "outputs": [], "source": [