From ef2325e445cac28fe7f21fedbb7cc475f653ca19 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 4 Jun 2024 11:22:14 -0400 Subject: [PATCH] [pre-commit.ci] pre-commit autoupdate (#42) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [pre-commit.ci] pre-commit autoupdate updates: - [github.com/pre-commit/pre-commit-hooks: v4.5.0 → v4.6.0](https://github.com/pre-commit/pre-commit-hooks/compare/v4.5.0...v4.6.0) - [github.com/igorshubovych/markdownlint-cli: v0.39.0 → v0.41.0](https://github.com/igorshubovych/markdownlint-cli/compare/v0.39.0...v0.41.0) - [github.com/codespell-project/codespell: v2.2.6 → v2.3.0](https://github.com/codespell-project/codespell/compare/v2.2.6...v2.3.0) - [github.com/astral-sh/ruff-pre-commit: v0.2.1 → v0.4.7](https://github.com/astral-sh/ruff-pre-commit/compare/v0.2.1...v0.4.7) * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 8 +- .../01-business-activity-trends.ipynb | 534 ++++++++++++------ notebooks/business-activity-trends/README.md | 16 +- .../base_index_earthquake.ipynb | 64 +-- .../earthquake_intensity.ipynb | 168 ++++-- .../03a-meta-internet-connectivity.ipynb | 225 +++++--- .../03b-ookla-speedtest-analysis.ipynb | 315 +++++++---- notebooks/mobility/activity.ipynb | 12 +- notebooks/mobility/visits.ipynb | 10 +- .../01_clean_gas_flaring_data.R | 2 - notebooks/nighttime-lights/03_aggregate.R | 2 - notebooks/nighttime-lights/04_append.R | 2 +- .../nighttime-lights/05_avg_ntl_by_period.R | 2 +- .../nighttime-lights/05_maps_ntl_changes.R | 23 +- .../nighttime-lights/05_ntl_trends_daily.R | 19 +- .../nighttime-lights/05_ntl_trends_monthly.R | 10 +- notebooks/nighttime-lights/_main.R | 7 +- notebooks/surveys/README.md | 2 +- notebooks/surveys/premise.ipynb | 325 +++++++---- 19 files changed, 1119 insertions(+), 627 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0f77dd2..30f0077 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,7 +2,7 @@ # See https://pre-commit.com/hooks.html for more hooks repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.5.0 + rev: v4.6.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer @@ -14,7 +14,7 @@ repos: args: [--allow-missing-credentials] - id: detect-private-key - repo: https://github.com/igorshubovych/markdownlint-cli - rev: v0.39.0 + rev: v0.41.0 hooks: - id: markdownlint name: Markdownlint @@ -24,13 +24,13 @@ repos: "--disable=MD033", # no-inline-html ] - repo: https://github.com/codespell-project/codespell - rev: v2.2.6 + rev: v2.3.0 hooks: - id: codespell name: codespell description: Checks for common misspellings in text files - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.2.1 + rev: v0.4.7 hooks: - id: ruff types_or: [python, pyi, jupyter] diff --git a/notebooks/business-activity-trends/01-business-activity-trends.ipynb b/notebooks/business-activity-trends/01-business-activity-trends.ipynb index 1c36757..fa25be6 100644 --- a/notebooks/business-activity-trends/01-business-activity-trends.ipynb +++ b/notebooks/business-activity-trends/01-business-activity-trends.ipynb @@ -13,21 +13,19 @@ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", - "from bokeh.plotting import figure, output_file, show, output_notebook\n", + "from bokeh.plotting import figure, show, output_notebook\n", "from bokeh.models import Span\n", "from shapely.geometry import Point\n", "import geopandas as gpd\n", "import glob\n", "from datetime import datetime\n", - "from bokeh.layouts import Row, column, gridplot\n", - "from bokeh.models import Title, Legend, TapTool, Range1d, Tabs, TabPanel\n", - "import matplotlib as mpl\n", - "from bokeh.core.validation import silence\n", + "from bokeh.layouts import column\n", + "from bokeh.models import Legend, Tabs, TabPanel\n", "from bokeh.core.validation.warnings import MISSING_RENDERERS, EMPTY_LAYOUT\n", "\n", "# Set fonts for matplotlib\n", - "plt.rcParams['font.family'] = 'Arial'\n", - "plt.rcParams['font.size'] = 14" + "plt.rcParams[\"font.family\"] = \"Arial\"\n", + "plt.rcParams[\"font.size\"] = 14" ] }, { @@ -52,7 +50,7 @@ ], "source": [ "import bokeh\n", - "from bokeh.core.validation.warnings import EMPTY_LAYOUT\n", + "\n", "bokeh.core.validation.silence(EMPTY_LAYOUT, True)\n", "bokeh.core.validation.silence(EMPTY_LAYOUT, True)" ] @@ -99,8 +97,12 @@ }, "outputs": [], "source": [ - "turkey_adm2 = gpd.read_file('../../data/turkey_administrativelevels0_1_2/tur_polbna_adm2.shp')\n", - "turkey_adm1 = gpd.read_file('../../data/turkey_administrativelevels0_1_2/tur_polbnda_adm1.shp')" + "turkey_adm2 = gpd.read_file(\n", + " \"../../data/turkey_administrativelevels0_1_2/tur_polbna_adm2.shp\"\n", + ")\n", + "turkey_adm1 = gpd.read_file(\n", + " \"../../data/turkey_administrativelevels0_1_2/tur_polbnda_adm1.shp\"\n", + ")" ] }, { @@ -140,11 +142,22 @@ }, "outputs": [], "source": [ - "all_files = glob.glob('../../data/business_activity_trend/raw/2022_February/*.csv')\n", - "\n", - "businessActivity2022 = pd.DataFrame(columns = ['polygon_id', 'polygon_name', 'polygon_level', 'polygon_version',\n", - " 'country', 'business_vertical', 'activity_quantile', 'latitude',\n", - " 'longitude', 'ds'])\n", + "all_files = glob.glob(\"../../data/business_activity_trend/raw/2022_February/*.csv\")\n", + "\n", + "businessActivity2022 = pd.DataFrame(\n", + " columns=[\n", + " \"polygon_id\",\n", + " \"polygon_name\",\n", + " \"polygon_level\",\n", + " \"polygon_version\",\n", + " \"country\",\n", + " \"business_vertical\",\n", + " \"activity_quantile\",\n", + " \"latitude\",\n", + " \"longitude\",\n", + " \"ds\",\n", + " ]\n", + ")\n", "\n", "li = []\n", "\n", @@ -153,7 +166,7 @@ " li.append(df1)\n", "\n", "businessActivity2022 = pd.concat(li, axis=0)\n", - "businessActivity2022 = businessActivity2022[businessActivity2022['country']=='TR']" + "businessActivity2022 = businessActivity2022[businessActivity2022[\"country\"] == \"TR\"]" ] }, { @@ -167,8 +180,10 @@ "outputs": [], "source": [ "# convert columns to datetime\n", - "businessActivity2022['ds'] = businessActivity2022['ds'].apply(lambda x: pd.to_datetime(x))\n", - "businessActivity2022 = businessActivity2022[businessActivity2022['country']=='TR' ]" + "businessActivity2022[\"ds\"] = businessActivity2022[\"ds\"].apply(\n", + " lambda x: pd.to_datetime(x)\n", + ")\n", + "businessActivity2022 = businessActivity2022[businessActivity2022[\"country\"] == \"TR\"]" ] }, { @@ -189,9 +204,11 @@ } ], "source": [ - "business_verticals = list(businessActivity2022['business_vertical'].unique())\n", + "business_verticals = list(businessActivity2022[\"business_vertical\"].unique())\n", "\n", - "print(f'COVID-19 Business Actvity Trends has the following business verticals {business_verticals}')" + "print(\n", + " f\"COVID-19 Business Actvity Trends has the following business verticals {business_verticals}\"\n", + ")" ] }, { @@ -205,26 +222,27 @@ "outputs": [], "source": [ "# define color palette\n", - "color_palette = [ '#4E79A7', # Blue\n", - " '#F28E2B', # Orange\n", - " '#E15759', # Red\n", - " '#76B7B2', # Teal\n", - " '#59A14F', # Green\n", - " '#EDC948', # Yellow\n", - " '#B07AA1', # Purple\n", - " '#FF9DA7', # Pink\n", - " '#9C755F', # Brown\n", - " '#BAB0AC', # Gray\n", - " '#7C7C7C', # Dark gray\n", - " '#6B4C9A', # Violet\n", - " '#D55E00', # Orange-red\n", - " '#CC61B0', # Magenta\n", - " '#0072B2', # Bright blue\n", - " '#329262', # Peacock green\n", - " '#9E5B5A', # Brick red\n", - " '#636363', # Medium gray\n", - " '#CD9C00', # Gold\n", - " '#5D69B1', # Medium blue\n", + "color_palette = [\n", + " \"#4E79A7\", # Blue\n", + " \"#F28E2B\", # Orange\n", + " \"#E15759\", # Red\n", + " \"#76B7B2\", # Teal\n", + " \"#59A14F\", # Green\n", + " \"#EDC948\", # Yellow\n", + " \"#B07AA1\", # Purple\n", + " \"#FF9DA7\", # Pink\n", + " \"#9C755F\", # Brown\n", + " \"#BAB0AC\", # Gray\n", + " \"#7C7C7C\", # Dark gray\n", + " \"#6B4C9A\", # Violet\n", + " \"#D55E00\", # Orange-red\n", + " \"#CC61B0\", # Magenta\n", + " \"#0072B2\", # Bright blue\n", + " \"#329262\", # Peacock green\n", + " \"#9E5B5A\", # Brick red\n", + " \"#636363\", # Medium gray\n", + " \"#CD9C00\", # Gold\n", + " \"#5D69B1\", # Medium blue\n", "]" ] }, @@ -240,57 +258,76 @@ "source": [ "bokeh.core.validation.silence(EMPTY_LAYOUT, True)\n", "\n", - "def get_line_plot(businessActivity,title, source, earthquakes=False, subtitle=None):\n", "\n", - " p2 = figure(x_axis_type = 'datetime', width = 800, height = 400, toolbar_location='above')\n", + "def get_line_plot(businessActivity, title, source, earthquakes=False, subtitle=None):\n", + " p2 = figure(x_axis_type=\"datetime\", width=800, height=400, toolbar_location=\"above\")\n", " p2.add_layout(Legend(), \"right\")\n", "\n", - " for id, business_vertical in enumerate(businessActivity['business_vertical'].unique()):\n", - " df = businessActivity[businessActivity['business_vertical']==business_vertical][['ds', 'activity_quantile']].reset_index(drop=True)\n", - " p2.line(df['ds'], df['activity_quantile'], line_width=2, line_color = color_palette[id], legend_label=business_vertical)\n", + " for id, business_vertical in enumerate(\n", + " businessActivity[\"business_vertical\"].unique()\n", + " ):\n", + " df = businessActivity[\n", + " businessActivity[\"business_vertical\"] == business_vertical\n", + " ][[\"ds\", \"activity_quantile\"]].reset_index(drop=True)\n", + " p2.line(\n", + " df[\"ds\"],\n", + " df[\"activity_quantile\"],\n", + " line_width=2,\n", + " line_color=color_palette[id],\n", + " legend_label=business_vertical,\n", + " )\n", "\n", - " p2.legend.click_policy='hide'\n", + " p2.legend.click_policy = \"hide\"\n", " if subtitle is not None:\n", " p2.title = subtitle\n", "\n", - "\n", - " title_fig = figure(title=title, toolbar_location=None,width=800, height=40, )\n", + " title_fig = figure(\n", + " title=title,\n", + " toolbar_location=None,\n", + " width=800,\n", + " height=40,\n", + " )\n", " title_fig.title.align = \"left\"\n", " title_fig.title.text_font_size = \"20pt\"\n", " title_fig.border_fill_alpha = 0\n", - " title_fig.outline_line_width=0\n", - "\n", - " #with silence(MISSING_RENDERERS):\n", - " sub_title = figure(title=source, toolbar_location=None,width=800, height=40, )\n", + " title_fig.outline_line_width = 0\n", + "\n", + " # with silence(MISSING_RENDERERS):\n", + " sub_title = figure(\n", + " title=source,\n", + " toolbar_location=None,\n", + " width=800,\n", + " height=40,\n", + " )\n", " sub_title.title.align = \"left\"\n", " sub_title.title.text_font_size = \"10pt\"\n", - " sub_title.title.text_font_style=\"normal\"\n", + " sub_title.title.text_font_style = \"normal\"\n", " sub_title.border_fill_alpha = 0\n", - " sub_title.outline_line_width=0\n", + " sub_title.outline_line_width = 0\n", "\n", " layout = column(title_fig, p2, sub_title)\n", "\n", " if earthquakes:\n", - " p2.renderers.extend([\n", - " Span(\n", - " location=datetime(2023, 2, 6),\n", - " dimension=\"height\",\n", - " line_color='#7C7C7C',\n", - " line_width=2,\n", - " line_dash=(4,4)\n", - " ),\n", - " Span(\n", - " location=datetime(2023, 2, 20),\n", - " dimension=\"height\",\n", - " line_color='#7C7C7C',\n", - " line_width=2,\n", - " line_dash=(4,4)\n", - " ),\n", - " ]\n", - ")\n", - "\n", - " return layout\n", - "\n" + " p2.renderers.extend(\n", + " [\n", + " Span(\n", + " location=datetime(2023, 2, 6),\n", + " dimension=\"height\",\n", + " line_color=\"#7C7C7C\",\n", + " line_width=2,\n", + " line_dash=(4, 4),\n", + " ),\n", + " Span(\n", + " location=datetime(2023, 2, 20),\n", + " dimension=\"height\",\n", + " line_color=\"#7C7C7C\",\n", + " line_width=2,\n", + " line_dash=(4, 4),\n", + " ),\n", + " ]\n", + " )\n", + "\n", + " return layout" ] }, { @@ -304,11 +341,22 @@ "outputs": [], "source": [ "# Get all datasets for earthquake-triggered business activity trends\n", - "all_files = glob.glob('../../data/business_activity_trend/raw/*.csv')\n", - "\n", - "businessActivity2023 = pd.DataFrame(columns = ['polygon_id', 'polygon_name', 'polygon_level', 'polygon_version',\n", - " 'country', 'business_vertical', 'activity_quantile', 'latitude',\n", - " 'longitude', 'ds'])\n", + "all_files = glob.glob(\"../../data/business_activity_trend/raw/*.csv\")\n", + "\n", + "businessActivity2023 = pd.DataFrame(\n", + " columns=[\n", + " \"polygon_id\",\n", + " \"polygon_name\",\n", + " \"polygon_level\",\n", + " \"polygon_version\",\n", + " \"country\",\n", + " \"business_vertical\",\n", + " \"activity_quantile\",\n", + " \"latitude\",\n", + " \"longitude\",\n", + " \"ds\",\n", + " ]\n", + ")\n", "\n", "li = []\n", "\n", @@ -330,8 +378,10 @@ "outputs": [], "source": [ "# convert columns to datetime\n", - "businessActivity2023['ds'] = businessActivity2023['ds'].apply(lambda x: pd.to_datetime(x))\n", - "businessActivity2023 = businessActivity2023[businessActivity2023['country']=='TR' ]" + "businessActivity2023[\"ds\"] = businessActivity2023[\"ds\"].apply(\n", + " lambda x: pd.to_datetime(x)\n", + ")\n", + "businessActivity2023 = businessActivity2023[businessActivity2023[\"country\"] == \"TR\"]" ] }, { @@ -352,7 +402,9 @@ } ], "source": [ - "print(f'Business Activity Trends has {len(businessActivity2023[\"polygon_name\"].unique())} districts and the boundaries map from UNOCHA contains {len(turkey_adm2[\"adm2_en\"].unique())} districts. {len(turkey_adm2[\"adm2_en\"].unique())-len(businessActivity2023[\"polygon_name\"].unique())} districts do not have data')" + "print(\n", + " f'Business Activity Trends has {len(businessActivity2023[\"polygon_name\"].unique())} districts and the boundaries map from UNOCHA contains {len(turkey_adm2[\"adm2_en\"].unique())} districts. {len(turkey_adm2[\"adm2_en\"].unique())-len(businessActivity2023[\"polygon_name\"].unique())} districts do not have data'\n", + ")" ] }, { @@ -377,17 +429,44 @@ "# Convert polygon name to upper to match with boundaries file and change column name\n", "import unicodedata\n", "\n", - "businessActivity2023['polygon_name'] = businessActivity2023['polygon_name'].apply(lambda x: x.upper())\n", - "businessActivity2023['polygon_name'] = businessActivity2023['polygon_name'].apply(lambda x: unicodedata.normalize('NFD', x).encode('ascii', 'ignore').decode('utf-8'))\n", + "businessActivity2023[\"polygon_name\"] = businessActivity2023[\"polygon_name\"].apply(\n", + " lambda x: x.upper()\n", + ")\n", + "businessActivity2023[\"polygon_name\"] = businessActivity2023[\"polygon_name\"].apply(\n", + " lambda x: unicodedata.normalize(\"NFD\", x).encode(\"ascii\", \"ignore\").decode(\"utf-8\")\n", + ")\n", "\n", "# Fixing the business activity trends admin names to match UNOCHA\n", - "businessActivity2023['polygon_name'].replace({'EYUP':'EYUPSULTAN', 'ONDOKUZ MAYIS':'19 MAYIS','KAZAN':'KAHRAMANKAZAN',\n", - " 'DOGUBEYAZIT': 'DOGUBAYAZIT', 'MUSTAFA KEMALPASA':'MUSTAFAKEMALPASA', 'SULTAN KOCHISAR':'SEREFLIKOCHISAR',\n", - " 'SINCANLI':'SINANPASA', 'AKKOY':'PAMUKKALE', 'SULTAN KARAHISAR':'SEBINKARAHISAR' }, inplace = True)\n", + "businessActivity2023[\"polygon_name\"].replace(\n", + " {\n", + " \"EYUP\": \"EYUPSULTAN\",\n", + " \"ONDOKUZ MAYIS\": \"19 MAYIS\",\n", + " \"KAZAN\": \"KAHRAMANKAZAN\",\n", + " \"DOGUBEYAZIT\": \"DOGUBAYAZIT\",\n", + " \"MUSTAFA KEMALPASA\": \"MUSTAFAKEMALPASA\",\n", + " \"SULTAN KOCHISAR\": \"SEREFLIKOCHISAR\",\n", + " \"SINCANLI\": \"SINANPASA\",\n", + " \"AKKOY\": \"PAMUKKALE\",\n", + " \"SULTAN KARAHISAR\": \"SEBINKARAHISAR\",\n", + " },\n", + " inplace=True,\n", + ")\n", "\n", - "matched_districts = list(set(businessActivity2023['polygon_name'].unique()).intersection(set(turkey_adm2['adm2_en'].unique())))\n", - "print(list(businessActivity2023[~(businessActivity2023[\"polygon_name\"].isin(matched_districts))]['polygon_name'].unique()))\n", - "print('The above districts remain unmapped. Note: Merkez district unmapped is due to a bug in the GADM data. To account for this, we used the lat long coordinates and joined with the shapefiles being used in this project from UNOCHA')" + "matched_districts = list(\n", + " set(businessActivity2023[\"polygon_name\"].unique()).intersection(\n", + " set(turkey_adm2[\"adm2_en\"].unique())\n", + " )\n", + ")\n", + "print(\n", + " list(\n", + " businessActivity2023[\n", + " ~(businessActivity2023[\"polygon_name\"].isin(matched_districts))\n", + " ][\"polygon_name\"].unique()\n", + " )\n", + ")\n", + "print(\n", + " \"The above districts remain unmapped. Note: Merkez district unmapped is due to a bug in the GADM data. To account for this, we used the lat long coordinates and joined with the shapefiles being used in this project from UNOCHA\"\n", + ")" ] }, { @@ -401,9 +480,19 @@ "outputs": [], "source": [ "gdf = convert_to_gdf(businessActivity2023)\n", - "gdf = turkey_adm2.sjoin(gdf)[[ 'adm2_en', 'adm1_en', 'adm1', 'pcode', 'geometry',\n", - " 'business_vertical', 'activity_quantile', 'ds']]\n", - "gdf = gdf.sort_values(by='ds')" + "gdf = turkey_adm2.sjoin(gdf)[\n", + " [\n", + " \"adm2_en\",\n", + " \"adm1_en\",\n", + " \"adm1\",\n", + " \"pcode\",\n", + " \"geometry\",\n", + " \"business_vertical\",\n", + " \"activity_quantile\",\n", + " \"ds\",\n", + " ]\n", + "]\n", + "gdf = gdf.sort_values(by=\"ds\")" ] }, { @@ -427,36 +516,50 @@ } ], "source": [ - "fig, axs = plt.subplots(2,2,figsize = (24,10), sharex=True, sharey=True)\n", + "fig, axs = plt.subplots(2, 2, figsize=(24, 10), sharex=True, sharey=True)\n", "ax = axs.flatten()\n", "\n", "\n", - "dates_of_interest = ['2023-02-05', '2023-02-12', '2023-02-19', '2023-02-26']\n", + "dates_of_interest = [\"2023-02-05\", \"2023-02-12\", \"2023-02-19\", \"2023-02-26\"]\n", "images = []\n", "\n", "for i in range(0, len(dates_of_interest)):\n", - " turkey_adm2.boundary.plot(ax=ax[i], edgecolor = '#D3D3D3', linewidth=0.5)\n", - " im=gdf[gdf['ds']==dates_of_interest[i]][['activity_quantile', 'geometry']].plot(column = 'activity_quantile', ax=ax[i], legend = False, cmap = 'Spectral', vmin=0, vmax =1)\n", + " turkey_adm2.boundary.plot(ax=ax[i], edgecolor=\"#D3D3D3\", linewidth=0.5)\n", + " im = gdf[gdf[\"ds\"] == dates_of_interest[i]][[\"activity_quantile\", \"geometry\"]].plot(\n", + " column=\"activity_quantile\",\n", + " ax=ax[i],\n", + " legend=False,\n", + " cmap=\"Spectral\",\n", + " vmin=0,\n", + " vmax=1,\n", + " )\n", " images.append(im.collections[0])\n", - " ax[i].set_title(f'{dates_of_interest[i]}', fontsize = 14, )\n", - " ax[i].title.set_position([0,0])\n", + " ax[i].set_title(\n", + " f\"{dates_of_interest[i]}\",\n", + " fontsize=14,\n", + " )\n", + " ax[i].title.set_position([0, 0])\n", "\n", " ax[i].set_xticks([])\n", " ax[i].set_yticks([])\n", - " ax[i].spines['top'].set_visible(False)\n", - " ax[i].spines['bottom'].set_visible(False)\n", - " ax[i].spines['right'].set_visible(False)\n", - " ax[i].spines['left'].set_visible(False)\n", + " ax[i].spines[\"top\"].set_visible(False)\n", + " ax[i].spines[\"bottom\"].set_visible(False)\n", + " ax[i].spines[\"right\"].set_visible(False)\n", + " ax[i].spines[\"left\"].set_visible(False)\n", "\n", "cbar = fig.colorbar(images[1], ax=axs)\n", - "suptitle= fig.suptitle('Business Activity Trends (Admin 2 level compared to 90 day prior baseline)', fontsize = 20, fontweight = 'bold')\n", + "suptitle = fig.suptitle(\n", + " \"Business Activity Trends (Admin 2 level compared to 90 day prior baseline)\",\n", + " fontsize=20,\n", + " fontweight=\"bold\",\n", + ")\n", "suptitle.set_y(0.95)\n", "suptitle.set_x(0.3)\n", - "#ax[0].cax = cbar.ax[0]\n", + "# ax[0].cax = cbar.ax[0]\n", "\n", "for im in images:\n", " im.set_clim(vmin=0, vmax=1)\n", - " im.set_cmap('Spectral')\n", + " im.set_cmap(\"Spectral\")\n", " im.set_norm(cbar.norm)" ] }, @@ -529,21 +632,36 @@ "tabs = []\n", "\n", "# Taking the mean activity quantile for the entire country to allow for comparison with last year's data\n", - "df = businessActivity2023.groupby(['country', 'business_vertical', 'ds']).mean('activity_quantile')[['activity_quantile']].reset_index()\n", + "df = (\n", + " businessActivity2023.groupby([\"country\", \"business_vertical\", \"ds\"])\n", + " .mean(\"activity_quantile\")[[\"activity_quantile\"]]\n", + " .reset_index()\n", + ")\n", "\n", "tabs.append(\n", " TabPanel(\n", - " child=get_line_plot(df, f\"Business Activity in 2023\", \"Source: Data for Good Meta\", earthquakes=True, subtitle = 'National average post earthquake compared to 90 day prior baseline'),\n", - " title='2023',\n", - " )\n", - " )\n", + " child=get_line_plot(\n", + " df,\n", + " \"Business Activity in 2023\",\n", + " \"Source: Data for Good Meta\",\n", + " earthquakes=True,\n", + " subtitle=\"National average post earthquake compared to 90 day prior baseline\",\n", + " ),\n", + " title=\"2023\",\n", + " )\n", + ")\n", "\n", "tabs.append(\n", " TabPanel(\n", - " child=get_line_plot(businessActivity2022, f\"Business Activity in 2022 (National average post COVID-19)\", \"Source: Data for Good Meta\", subtitle = 'National average post COVID-19 compared to pre-pandemic baseline'),\n", - " title='2022',\n", - " )\n", - " )\n", + " child=get_line_plot(\n", + " businessActivity2022,\n", + " \"Business Activity in 2022 (National average post COVID-19)\",\n", + " \"Source: Data for Good Meta\",\n", + " subtitle=\"National average post COVID-19 compared to pre-pandemic baseline\",\n", + " ),\n", + " title=\"2022\",\n", + " )\n", + ")\n", "\n", "tabs = Tabs(tabs=tabs, sizing_mode=\"scale_both\")\n", "show(tabs, warn_on_missing_glyphs=False)" @@ -573,8 +691,8 @@ }, "outputs": [], "source": [ - "aoi = gpd.read_file('../../data/SYRTUR_tessellation.geojson')\n", - "aoi = gdf[gdf['pcode'].isin(aoi['ADM2_PCODE'].unique())]" + "aoi = gpd.read_file(\"../../data/SYRTUR_tessellation.geojson\")\n", + "aoi = gdf[gdf[\"pcode\"].isin(aoi[\"ADM2_PCODE\"].unique())]" ] }, { @@ -595,10 +713,12 @@ } ], "source": [ - "nr_adm = len(aoi['pcode'].unique())\n", - "#affected_adm2 = list(aoi['pcode'].unique())\n", - "affected_adm2 = list(turkey_adm2[turkey_adm2['pcode'].isin(aoi['pcode'].unique())]['adm2_en'])\n", - "print(f'There are {nr_adm} admin-2 regions which are of interest')" + "nr_adm = len(aoi[\"pcode\"].unique())\n", + "# affected_adm2 = list(aoi['pcode'].unique())\n", + "affected_adm2 = list(\n", + " turkey_adm2[turkey_adm2[\"pcode\"].isin(aoi[\"pcode\"].unique())][\"adm2_en\"]\n", + ")\n", + "print(f\"There are {nr_adm} admin-2 regions which are of interest\")" ] }, { @@ -613,12 +733,24 @@ "source": [ "# Obtain admin areas of interest using multiple ways\n", "# 1. Get the areas with the highest earthquake intensity\n", - "earthquake_intensity = pd.read_csv('../../data/turkiye_earthquke_intensity_adm2.csv')\n", - "earthquake_intensity.drop(columns = 'Unnamed: 0', inplace = True)\n", - "affected_adm2 = list(earthquake_intensity.sort_values(by='mean_of_mean_intensity_feb06', ascending = False).head(7)['adm2_en'])\n", + "earthquake_intensity = pd.read_csv(\"../../data/turkiye_earthquke_intensity_adm2.csv\")\n", + "earthquake_intensity.drop(columns=\"Unnamed: 0\", inplace=True)\n", + "affected_adm2 = list(\n", + " earthquake_intensity.sort_values(\n", + " by=\"mean_of_mean_intensity_feb06\", ascending=False\n", + " ).head(7)[\"adm2_en\"]\n", + ")\n", "\n", "# 2. Get limited areas manually from the list of earthquake impacted areas\n", - "affected_adm2 = ['NURDAGI', 'ISLAHIYE', 'DEFNE', 'ANTAKYA', 'KUMLU', 'SEHITKAMIL', 'HASSA']" + "affected_adm2 = [\n", + " \"NURDAGI\",\n", + " \"ISLAHIYE\",\n", + " \"DEFNE\",\n", + " \"ANTAKYA\",\n", + " \"KUMLU\",\n", + " \"SEHITKAMIL\",\n", + " \"HASSA\",\n", + "]" ] }, { @@ -631,7 +763,7 @@ }, "outputs": [], "source": [ - "gdf['business_vertical'].fillna('Unknown', inplace = True)" + "gdf[\"business_vertical\"].fillna(\"Unknown\", inplace=True)" ] }, { @@ -645,9 +777,20 @@ }, "outputs": [], "source": [ - "business_verticals = ['Public Good', 'Retail', 'Grocery & Convenience Stores',\n", - " 'Professional Services', 'Restaurants', 'Manufacturing',\n", - " 'Business & Utility Services', 'Lifestyle Services', 'Home Services', 'Travel', 'Local Events', 'All']" + "business_verticals = [\n", + " \"Public Good\",\n", + " \"Retail\",\n", + " \"Grocery & Convenience Stores\",\n", + " \"Professional Services\",\n", + " \"Restaurants\",\n", + " \"Manufacturing\",\n", + " \"Business & Utility Services\",\n", + " \"Lifestyle Services\",\n", + " \"Home Services\",\n", + " \"Travel\",\n", + " \"Local Events\",\n", + " \"All\",\n", + "]" ] }, { @@ -675,8 +818,21 @@ "# #convert date column to datetime and subtract one week\n", "# gdf['date'] = pd.to_datetime(gdf['ds']) - pd.to_timedelta(7, unit='d')\n", "\n", - "#calculate sum of values, grouped by week\n", - "week = gdf.groupby([pd.Grouper(key='ds', freq='W-MON'), 'adm2_en', 'adm1_en', 'adm1', 'pcode', 'business_vertical']).mean('activity_quantile')[['activity_quantile']].reset_index()" + "# calculate sum of values, grouped by week\n", + "week = (\n", + " gdf.groupby(\n", + " [\n", + " pd.Grouper(key=\"ds\", freq=\"W-MON\"),\n", + " \"adm2_en\",\n", + " \"adm1_en\",\n", + " \"adm1\",\n", + " \"pcode\",\n", + " \"business_vertical\",\n", + " ]\n", + " )\n", + " .mean(\"activity_quantile\")[[\"activity_quantile\"]]\n", + " .reset_index()\n", + ")" ] }, { @@ -748,14 +904,20 @@ "tabs = []\n", "\n", "for adm in affected_adm2:\n", - " df = gdf[gdf['adm2_en']==adm] \n", + " df = gdf[gdf[\"adm2_en\"] == adm]\n", "\n", " tabs.append(\n", " TabPanel(\n", - " child=get_line_plot(df, f\"Business Activity in Affected Areas\", \"Source: Data for Good Meta\", earthquakes=True, subtitle = 'GADM2 level average post earthquake compared to 90 day prior baseline'),\n", - " title=adm.capitalize(),\n", - " )\n", - " )\n", + " child=get_line_plot(\n", + " df,\n", + " \"Business Activity in Affected Areas\",\n", + " \"Source: Data for Good Meta\",\n", + " earthquakes=True,\n", + " subtitle=\"GADM2 level average post earthquake compared to 90 day prior baseline\",\n", + " ),\n", + " title=adm.capitalize(),\n", + " )\n", + " )\n", "\n", "tabs = Tabs(tabs=tabs, sizing_mode=\"scale_both\")\n", "show(tabs, warn_on_missing_glyphs=False)" @@ -830,14 +992,20 @@ "tabs = []\n", "\n", "for adm in affected_adm2:\n", - " df = week[week['adm2_en']==adm] \n", + " df = week[week[\"adm2_en\"] == adm]\n", "\n", " tabs.append(\n", " TabPanel(\n", - " child=get_line_plot(df, f\"Weekly Business Activity in Affected Areas\", \"Source: Data for Good Meta\", earthquakes=True, subtitle = 'GADM2 level average post earthquake compared to 90 day prior baseline'),\n", - " title=adm.capitalize(),\n", - " )\n", - " )\n", + " child=get_line_plot(\n", + " df,\n", + " \"Weekly Business Activity in Affected Areas\",\n", + " \"Source: Data for Good Meta\",\n", + " earthquakes=True,\n", + " subtitle=\"GADM2 level average post earthquake compared to 90 day prior baseline\",\n", + " ),\n", + " title=adm.capitalize(),\n", + " )\n", + " )\n", "\n", "tabs = Tabs(tabs=tabs, sizing_mode=\"scale_both\")\n", "show(tabs, warn_on_missing_glyphs=False)" @@ -867,39 +1035,69 @@ }, "outputs": [], "source": [ - "#p = gdf.groupby(['adm2_en', 'ds']).mean().reset_index()\n", + "# p = gdf.groupby(['adm2_en', 'ds']).mean().reset_index()\n", "df = pd.DataFrame()\n", "\n", - "for business_vertical in ['Retail', 'Manufacturing', 'All']:\n", + "for business_vertical in [\"Retail\", \"Manufacturing\", \"All\"]:\n", + " p = gdf.groupby([\"adm2_en\", \"ds\", \"business_vertical\"]).mean().reset_index()\n", + "\n", + " for date in [\"2023-02-05\", \"2023-02-06\"]:\n", + " df = (\n", + " p[p[\"ds\"] == date][\n", + " [\"ds\", \"adm2_en\", \"activity_quantile\", \"business_vertical\"]\n", + " ]\n", + " .merge(turkey_adm2[[\"adm2_en\", \"adm1_en\"]], on=\"adm2_en\", how=\"right\")\n", + " .reset_index(drop=True)\n", + " )\n", + " df[\"ds\"] = date\n", + " df[\"business_vertical\"] = business_vertical\n", + " df.rename(columns={\"ds\": \"date\"}, inplace=True)\n", + " df = pd.concat(\n", + " [df, df.sort_values(by=\"adm2_en\")]\n", + " ) # .to_csv(f'../data/network_coverage/final/business_activity_trends_{date}.csv')\n", + "\n", + " p[\"date\"] = pd.to_datetime(p[\"ds\"]) - pd.to_timedelta(7, unit=\"d\")\n", + "\n", + " # calculate sum of values, grouped by week\n", + " week = (\n", + " p.groupby(\n", + " [pd.Grouper(key=\"date\", freq=\"W-MON\"), \"adm2_en\", \"business_vertical\"]\n", + " )\n", + " .mean()\n", + " .reset_index()\n", + " )\n", + "\n", + " for date in [\"2023-02-13\", \"2023-02-20\", \"2023-02-27\", \"2023-03-06\"]:\n", + " df = (\n", + " week[week[\"date\"] == date][\n", + " [\"date\", \"adm2_en\", \"activity_quantile\", \"business_vertical\"]\n", + " ]\n", + " .merge(turkey_adm2[[\"adm2_en\", \"adm1_en\"]], on=\"adm2_en\", how=\"right\")\n", + " .reset_index(drop=True)\n", + " )\n", + " df[\"date\"] = date\n", + " df[\"business_vertical\"] = business_vertical\n", + " df = pd.concat(\n", + " [df, df1.sort_values(by=\"adm2_en\")]\n", + " ) # .to_csv(f'../data/network_coverage/final/business_activity_trends_{date}.csv')\n", "\n", - " p = gdf.groupby(['adm2_en', 'ds', 'business_vertical']).mean().reset_index()\n", - "\n", - " for date in ['2023-02-05', '2023-02-06']:\n", - " \n", - " df = p[p['ds']==date][['ds','adm2_en', 'activity_quantile', 'business_vertical']].merge(turkey_adm2[['adm2_en', 'adm1_en']],on='adm2_en', how = 'right').reset_index(drop=True)\n", - " df['ds'] = date\n", - " df['business_vertical'] = business_vertical\n", - " df.rename(columns = {'ds':'date'}, inplace = True)\n", - " df = pd.concat([df, df.sort_values(by='adm2_en')])#.to_csv(f'../data/network_coverage/final/business_activity_trends_{date}.csv')\n", - "\n", - " p['date'] = pd.to_datetime(p['ds']) - pd.to_timedelta(7, unit='d')\n", - "\n", - " #calculate sum of values, grouped by week\n", - " week = p.groupby([pd.Grouper(key='date', freq='W-MON'), 'adm2_en', 'business_vertical']).mean().reset_index()\n", - "\n", - " for date in ['2023-02-13', '2023-02-20', '2023-02-27', '2023-03-06']:\n", - " \n", - " df = week[week['date']==date][['date','adm2_en', 'activity_quantile', 'business_vertical']].merge(turkey_adm2[['adm2_en', 'adm1_en']],on='adm2_en', how = 'right').reset_index(drop=True)\n", - " df['date'] = date\n", - " df['business_vertical'] = business_vertical\n", - " df = pd.concat([df,df1.sort_values(by='adm2_en')])#.to_csv(f'../data/network_coverage/final/business_activity_trends_{date}.csv')\n", - " \n", "# combning all files into a single file\n", "df = df.fillna(-100)\n", - "df = df.pivot_table(index=['adm1_en', 'adm2_en','business_vertical'], values='activity_quantile',columns='date').reset_index().sort_values(by=['business_vertical', 'adm2_en']).reset_index(drop=True)\n", - "df.replace(-100,np.nan, inplace = True)\n", + "df = (\n", + " df.pivot_table(\n", + " index=[\"adm1_en\", \"adm2_en\", \"business_vertical\"],\n", + " values=\"activity_quantile\",\n", + " columns=\"date\",\n", + " )\n", + " .reset_index()\n", + " .sort_values(by=[\"business_vertical\", \"adm2_en\"])\n", + " .reset_index(drop=True)\n", + ")\n", + "df.replace(-100, np.nan, inplace=True)\n", "\n", - "df.to_csv('../data/business_activity_trend/final/business_activity_trends_indicators.csv')" + "df.to_csv(\n", + " \"../data/business_activity_trend/final/business_activity_trends_indicators.csv\"\n", + ")" ] } ], diff --git a/notebooks/business-activity-trends/README.md b/notebooks/business-activity-trends/README.md index 1b346ef..68149e4 100644 --- a/notebooks/business-activity-trends/README.md +++ b/notebooks/business-activity-trends/README.md @@ -1,10 +1,10 @@ # Business Activity Trends -Business Activity Trends During Crisis uses data about posting activity on Facebook to measure how local businesses are affected by and recover from crisis events. Given the broad presence of small businesses on the Facebook platform, this dataset aims to provide timely estimates of global business activity without the common limitations of traditional data collection methods, such as scale, speed and nonstandardization. This is a crisis-triggered dataset i.e., it has been created by Meta to support humanitarian relief for the earthquake in Turkiye. Details about this dataset can be found on [Meta's Data For Good page](https://dataforgood.facebook.com/dfg/tools/business-activity-trends). +Business Activity Trends During Crisis uses data about posting activity on Facebook to measure how local businesses are affected by and recover from crisis events. Given the broad presence of small businesses on the Facebook platform, this dataset aims to provide timely estimates of global business activity without the common limitations of traditional data collection methods, such as scale, speed and nonstandardization. This is a crisis-triggered dataset i.e., it has been created by Meta to support humanitarian relief for the earthquake in Turkiye. Details about this dataset can be found on [Meta's Data For Good page](https://dataforgood.facebook.com/dfg/tools/business-activity-trends). ## Data -The Business Activity Trends dataset was provided by [Meta](https://dataforgood.facebook.com/dfg/tools/business-activity-trends) through the proposal [Türkiye Rapid Damage Needs Assessment](https://portal.datapartnership.org/readableproposal/427) of the [Development Data Partnership](https://datapartnership.org). The data consisted of daily business activity quantile information at a GADM 2 level broken down by business vertical from the 5th of February, 2023. Each cell (row) of the dataset contains data on the daily activity within a polygon-vertical combination. +The Business Activity Trends dataset was provided by [Meta](https://dataforgood.facebook.com/dfg/tools/business-activity-trends) through the proposal [Türkiye Rapid Damage Needs Assessment](https://portal.datapartnership.org/readableproposal/427) of the [Development Data Partnership](https://datapartnership.org). The data consisted of daily business activity quantile information at a GADM 2 level broken down by business vertical from the 5th of February, 2023. Each cell (row) of the dataset contains data on the daily activity within a polygon-vertical combination. **Population Sample** The Business Activity Trends During Crisis dataset uses a static sample of businesses’ Facebook Pages for each crisis defined at each crisis date. It does not take into account new Pages businesses created during the crisis, nor does it exclude Pages removed during the crisis. The sample for each crisis is defined as Facebook Pages that meet the following criteria: @@ -18,7 +18,7 @@ The Business Activity Trends During Crisis dataset uses a static sample of busin **Business Vericals** -The business verticals are categories determined by the admins of the Facebook Business Page. +The business verticals are categories determined by the admins of the Facebook Business Page. * *All*: Refers to all businesses in the polygon. This includes all of the following categories except public good, because the activity of public good Pages tends to differ from other businesses during crises. * *Grocery and convenience stores*: Retailers that sell everyday consumable goods including food (typically unprepared foods and ingredients) and a limited range of household goods (like toilet paper). These can include grocery stores, convenience stores, pharmacies and general stores. @@ -38,21 +38,21 @@ Restaurants: Businesses that sell prepared food and beverages for on-premise or This method for understanding local economic activity was first described by the University of Bristol team and published in [Nature Communications](https://www.nature.com/articles/s41467-020-15405-7). Business activity is measured by the volume of posts made by business Pages on Facebook on a daily basis, where a post is defined broadly to include posts, stories and reels created by the business Page anywhere on Facebook. In practice, almost all posts are either made on the business Page itself or in Facebook Groups. -For each crisis event, a baseline posting pattern is established using the 90 days prior to the event start date. Meta then measures the daily posting activity relative to the expected posting activity based on the baseline period. Individual business Page activity is then aggregated by business vertical (proxy for economic sector) and by GADM administrative polygons geographically. +For each crisis event, a baseline posting pattern is established using the 90 days prior to the event start date. Meta then measures the daily posting activity relative to the expected posting activity based on the baseline period. Individual business Page activity is then aggregated by business vertical (proxy for economic sector) and by GADM administrative polygons geographically. -The business activity is measured through activity quantiles. This is equivalent to the 7-day average of what University of Bristol researchers call the [aggregated probability integral transform metric](https://www.nature.com/articles/s41467-020-15405-7). It is calculated by first computing the approximate quantiles (the midquantiles in the article) of each Page’s daily activity relative to their baseline activity. The quantiles are summed and the sum is then shifted, rescaled and variance-adjusted to follow a standard normal distribution. The adjusted sum is then probability transformed through a standard normal cumulative distribution function to get a value between 0 and 1. Following this, the average of this value over the last 7 days is obtained to smooth out daily fluctuations. This metric is given a quantile interpretation since it compares the daily activity to the distribution of daily activity within the baseline period, where a value around 0.5 is considered normal activity. *This is a one-vote-per-Page metric that gives equal weight to all businesses and is not heavily influenced by businesses that post a lot.* +The business activity is measured through activity quantiles. This is equivalent to the 7-day average of what University of Bristol researchers call the [aggregated probability integral transform metric](https://www.nature.com/articles/s41467-020-15405-7). It is calculated by first computing the approximate quantiles (the midquantiles in the article) of each Page’s daily activity relative to their baseline activity. The quantiles are summed and the sum is then shifted, rescaled and variance-adjusted to follow a standard normal distribution. The adjusted sum is then probability transformed through a standard normal cumulative distribution function to get a value between 0 and 1. Following this, the average of this value over the last 7 days is obtained to smooth out daily fluctuations. This metric is given a quantile interpretation since it compares the daily activity to the distribution of daily activity within the baseline period, where a value around 0.5 is considered normal activity. *This is a one-vote-per-Page metric that gives equal weight to all businesses and is not heavily influenced by businesses that post a lot.* -The full technical details of the methodology used for this datset can be found in the [white paper](https://scontent-iad3-2.xx.fbcdn.net/v/t39.8562-6/313431392_1209469252938025_9085357585007907228_n.pdf?_nc_cat=100&ccb=1-7&_nc_sid=ae5e01&_nc_ohc=XYjhPigfKDwAX-PRwOp&_nc_ht=scontent-iad3-2.xx&oh=00_AfAXU8Aylea13vEKHZoffq3qBQw2TVadXDPcKp40Ib5Ziw&oe=6428FDCD) authored by researchers from Meta. +The full technical details of the methodology used for this datset can be found in the [white paper](https://scontent-iad3-2.xx.fbcdn.net/v/t39.8562-6/313431392_1209469252938025_9085357585007907228_n.pdf?_nc_cat=100&ccb=1-7&_nc_sid=ae5e01&_nc_ohc=XYjhPigfKDwAX-PRwOp&_nc_ht=scontent-iad3-2.xx&oh=00_AfAXU8Aylea13vEKHZoffq3qBQw2TVadXDPcKp40Ib5Ziw&oe=6428FDCD) authored by researchers from Meta. ## Implementation -Once the data was obtained from the Meta Data For Good portal, the polygons were transformed to align with the shapefiles provided by UNOCHA. More details can be found in the attached notebook. +Once the data was obtained from the Meta Data For Good portal, the polygons were transformed to align with the shapefiles provided by UNOCHA. More details can be found in the attached notebook. ## Limitations -One of the biggest limitations of using this dataset is that it is based entirely on Facebook users. Therefore, it is important to note that this dataset may not be representative of the entire Turkish population evenly (Palen & Anderson, 2016). The methodology uses posts on Facebook business pages and groups to estimate changes in business activity. This framework is best used to see how quickly business have recovered from a natural disaster, in this case, the earthquake (Eyre et. al., 2020). The methodology relies on the assumption that businesses tend to publish more posts when they are open and fewer when they are closed, hence analysing the aggregated posting activity of a group of businesses over time it is possible to infer when they are open or closed. +One of the biggest limitations of using this dataset is that it is based entirely on Facebook users. Therefore, it is important to note that this dataset may not be representative of the entire Turkish population evenly (Palen & Anderson, 2016). The methodology uses posts on Facebook business pages and groups to estimate changes in business activity. This framework is best used to see how quickly business have recovered from a natural disaster, in this case, the earthquake (Eyre et. al., 2020). The methodology relies on the assumption that businesses tend to publish more posts when they are open and fewer when they are closed, hence analysing the aggregated posting activity of a group of businesses over time it is possible to infer when they are open or closed. ## Citations diff --git a/notebooks/earthquake-intensity/base_index_earthquake.ipynb b/notebooks/earthquake-intensity/base_index_earthquake.ipynb index b072878..1844fce 100644 --- a/notebooks/earthquake-intensity/base_index_earthquake.ipynb +++ b/notebooks/earthquake-intensity/base_index_earthquake.ipynb @@ -20,13 +20,11 @@ }, "outputs": [], "source": [ - "import geopandas as gpd\n", "import pandas as pd\n", "from urllib.request import urlopen\n", "import json\n", "from pandas import json_normalize\n", - "import plotly.express as px\n", - "from datetime import datetime" + "import plotly.express as px" ] }, { @@ -47,17 +45,19 @@ "outputs": [], "source": [ "# Download the data in GeoJSON format\n", - "with urlopen(\"https://earthquake.usgs.gov/fdsnws/event/1/query.geojson?starttime=2010-02-01%2000:00:00&endtime=2023-04-28%2023:59:59&maxlatitude=42.778&minlatitude=33.394&maxlongitude=47.813&minlongitude=24.961&minmagnitude=1&orderby=time\") as response:\n", + "with urlopen(\n", + " \"https://earthquake.usgs.gov/fdsnws/event/1/query.geojson?starttime=2010-02-01%2000:00:00&endtime=2023-04-28%2023:59:59&maxlatitude=42.778&minlatitude=33.394&maxlongitude=47.813&minlongitude=24.961&minmagnitude=1&orderby=time\"\n", + ") as response:\n", " data = json.load(response)\n", "\n", "# Convert the data into a DataFrame\n", - "earthquakes = json_normalize(data['features'])\n", + "earthquakes = json_normalize(data[\"features\"])\n", "\n", "# Extract and process the required columns\n", - "earthquakes['time'] = pd.to_datetime(earthquakes['properties.time'], unit='ms')\n", - "earthquakes['month'] = earthquakes['time'].dt.to_period('M').astype(str)\n", - "earthquakes['quarter'] = earthquakes['time'].dt.to_period('Q').astype(str)\n", - "earthquakes['mag'] = earthquakes['properties.mag']" + "earthquakes[\"time\"] = pd.to_datetime(earthquakes[\"properties.time\"], unit=\"ms\")\n", + "earthquakes[\"month\"] = earthquakes[\"time\"].dt.to_period(\"M\").astype(str)\n", + "earthquakes[\"quarter\"] = earthquakes[\"time\"].dt.to_period(\"Q\").astype(str)\n", + "earthquakes[\"mag\"] = earthquakes[\"properties.mag\"]" ] }, { @@ -1302,7 +1302,7 @@ ], "range": [ 0, - 12578.947368421053 + 12578.947368421052 ], "title": { "text": "Index (base Jan-2010=100)" @@ -1345,9 +1345,9 @@ } ], "source": [ - "monthly_counts = earthquakes.groupby('month').size()\n", + "monthly_counts = earthquakes.groupby(\"month\").size()\n", "monthly_counts /= monthly_counts[0] / 100\n", - "fig1 = px.bar(monthly_counts.reset_index(), x='month', y=0)\n", + "fig1 = px.bar(monthly_counts.reset_index(), x=\"month\", y=0)\n", "fig1.update_layout(yaxis_title=\"Index (base Jan-2010=100)\")\n", "fig1.show()" ] @@ -1449,11 +1449,11 @@ "xaxis": "x", "y": [ 100, - 91.65722525491813, + 91.65722525491812, 91.59862338889262, - 95.46498766272441, - 93.97109739575491, - 94.71624266144815, + 95.4649876627244, + 93.97109739575492, + 94.71624266144816, 94.07045009784736, 95.63083662026098, 93.4743338852928, @@ -1463,44 +1463,44 @@ 93.161556860187, 92.624238582178, 91.39502376292982, - 93.30467421641909, + 93.30467421641907, 89.54990215264188, - 94.10120212468547, - 92.69539518351371, + 94.10120212468549, + 92.69539518351372, 91.86165234408236, - 91.59491193737769, + 91.59491193737767, 91.99378381489582, - 93.90900195694715, + 93.90900195694717, 94.97716894977168, 91.28546966731896, 92.9622381677176, 90.34151884350734, 93.4092815208275, - 96.82308364908191, + 96.82308364908192, 95.67806137319398, - 93.41875988526391, + 93.41875988526392, 96.5881051646388, 93.41252997436676, 92.97362780728729, 93.81930854533594, - 93.69464990546949, + 93.69464990546948, 94.9599379684673, 92.76861429503307, - 95.24496171936691, + 95.24496171936693, 96.43835616438356, 95.41784445894034, 94.76874612190348, 93.20618044805327, - 94.74419904948279, + 94.7441990494828, 94.6432717503068, 94.55377912343536, - 93.97187117069335, + 93.97187117069336, 96.65714927336305, 95.69471624266144, 94.57883998834158, - 93.08023483365949, + 93.08023483365947, 95.64579256360078, - 95.47096857054981, + 95.4709685705498, 90.20263872230288 ], "yaxis": "y" @@ -2355,7 +2355,7 @@ ], "range": [ 88.96934116112197, - 100.58056099151989 + 100.58056099151987 ], "title": { "text": "Index (base Jan-2010=100)" @@ -2398,9 +2398,9 @@ } ], "source": [ - "quarterly_avgs = earthquakes.groupby('quarter')['mag'].mean()\n", + "quarterly_avgs = earthquakes.groupby(\"quarter\")[\"mag\"].mean()\n", "quarterly_avgs /= quarterly_avgs[0] / 100\n", - "fig2 = px.line(quarterly_avgs.reset_index(), x='quarter', y='mag')\n", + "fig2 = px.line(quarterly_avgs.reset_index(), x=\"quarter\", y=\"mag\")\n", "fig2.update_layout(yaxis_title=\"Index (base Jan-2010=100)\")\n", "fig2.show()" ] diff --git a/notebooks/earthquake-intensity/earthquake_intensity.ipynb b/notebooks/earthquake-intensity/earthquake_intensity.ipynb index a6fde15..658c40c 100644 --- a/notebooks/earthquake-intensity/earthquake_intensity.ipynb +++ b/notebooks/earthquake-intensity/earthquake_intensity.ipynb @@ -10,7 +10,6 @@ }, "outputs": [], "source": [ - "import pandas as pd\n", "import geopandas as gpd\n", "import matplotlib.pyplot as plt\n", "from mpl_toolkits.axes_grid1 import make_axes_locatable" @@ -36,12 +35,24 @@ }, "outputs": [], "source": [ - "mi_feb20 = gpd.read_file('../../data/earthquake-intensity/raw/ShakeMap Feb20 6p3/mi.shp')\n", - "mi_feb06_7p5 = gpd.read_file('../../data/earthquake-intensity/raw/ShakeMap Feb06 7p5/mi.shp')\n", - "mi_feb06_7p8 = gpd.read_file('../../data/earthquake-intensity/raw/ShakeMap Feb06 7p8/mi.shp')\n", - "mi_feb6_6p0_1 = gpd.read_file('../../data/earthquake-intensity/raw/ShakeMap Feb06 6p0_1/mi.shp')\n", - "mi_feb6_6p0_2 = gpd.read_file('../../data/earthquake-intensity/raw/ShakeMap Feb06 6p0_2/mi.shp')\n", - "mi_feb6_6p7 = gpd.read_file('../../data/earthquake-intensity/raw/ShakeMap Feb06 6p7/mi.shp')\n" + "mi_feb20 = gpd.read_file(\n", + " \"../../data/earthquake-intensity/raw/ShakeMap Feb20 6p3/mi.shp\"\n", + ")\n", + "mi_feb06_7p5 = gpd.read_file(\n", + " \"../../data/earthquake-intensity/raw/ShakeMap Feb06 7p5/mi.shp\"\n", + ")\n", + "mi_feb06_7p8 = gpd.read_file(\n", + " \"../../data/earthquake-intensity/raw/ShakeMap Feb06 7p8/mi.shp\"\n", + ")\n", + "mi_feb6_6p0_1 = gpd.read_file(\n", + " \"../../data/earthquake-intensity/raw/ShakeMap Feb06 6p0_1/mi.shp\"\n", + ")\n", + "mi_feb6_6p0_2 = gpd.read_file(\n", + " \"../../data/earthquake-intensity/raw/ShakeMap Feb06 6p0_2/mi.shp\"\n", + ")\n", + "mi_feb6_6p7 = gpd.read_file(\n", + " \"../../data/earthquake-intensity/raw/ShakeMap Feb06 6p7/mi.shp\"\n", + ")" ] }, { @@ -54,7 +65,9 @@ }, "outputs": [], "source": [ - "turkey_adm2 = gpd.read_file('../../data/turkey_administrativelevels0_1_2/tur_polbna_adm2.shp')" + "turkey_adm2 = gpd.read_file(\n", + " \"../../data/turkey_administrativelevels0_1_2/tur_polbna_adm2.shp\"\n", + ")" ] }, { @@ -67,12 +80,24 @@ }, "outputs": [], "source": [ - "gdf_adm4_7p8 = mi_feb06_7p8[['PARAMVALUE', 'geometry']].sjoin(turkey_adm2[['adm2_en', 'adm1_en', 'geometry']])\n", - "gdf_adm4_7p5 = mi_feb06_7p5[['PARAMVALUE', 'geometry']].sjoin(turkey_adm2[['adm2_en', 'adm1_en', 'geometry']])\n", - "mi_feb6_6p0_1 = mi_feb6_6p0_1[['PARAMVALUE', 'geometry']].sjoin(turkey_adm2[['adm2_en', 'adm1_en', 'geometry']])\n", - "mi_feb6_6p0_2 = mi_feb6_6p0_2[['PARAMVALUE', 'geometry']].sjoin(turkey_adm2[['adm2_en', 'adm1_en', 'geometry']])\n", - "gdf_adm4_6p7 = mi_feb6_6p7[['PARAMVALUE', 'geometry']].sjoin(turkey_adm2[['adm2_en', 'adm1_en', 'geometry']])\n", - "gdf_adm4_feb20 = mi_feb20[['PARAMVALUE', 'geometry']].sjoin(turkey_adm2[['adm2_en', 'adm1_en', 'geometry']])" + "gdf_adm4_7p8 = mi_feb06_7p8[[\"PARAMVALUE\", \"geometry\"]].sjoin(\n", + " turkey_adm2[[\"adm2_en\", \"adm1_en\", \"geometry\"]]\n", + ")\n", + "gdf_adm4_7p5 = mi_feb06_7p5[[\"PARAMVALUE\", \"geometry\"]].sjoin(\n", + " turkey_adm2[[\"adm2_en\", \"adm1_en\", \"geometry\"]]\n", + ")\n", + "mi_feb6_6p0_1 = mi_feb6_6p0_1[[\"PARAMVALUE\", \"geometry\"]].sjoin(\n", + " turkey_adm2[[\"adm2_en\", \"adm1_en\", \"geometry\"]]\n", + ")\n", + "mi_feb6_6p0_2 = mi_feb6_6p0_2[[\"PARAMVALUE\", \"geometry\"]].sjoin(\n", + " turkey_adm2[[\"adm2_en\", \"adm1_en\", \"geometry\"]]\n", + ")\n", + "gdf_adm4_6p7 = mi_feb6_6p7[[\"PARAMVALUE\", \"geometry\"]].sjoin(\n", + " turkey_adm2[[\"adm2_en\", \"adm1_en\", \"geometry\"]]\n", + ")\n", + "gdf_adm4_feb20 = mi_feb20[[\"PARAMVALUE\", \"geometry\"]].sjoin(\n", + " turkey_adm2[[\"adm2_en\", \"adm1_en\", \"geometry\"]]\n", + ")" ] }, { @@ -85,12 +110,42 @@ }, "outputs": [], "source": [ - "df_7p8 = gdf_adm4_7p8.groupby(['adm2_en', 'adm1_en']).mean('PARAMVALUE').reset_index()[[ 'adm2_en', 'adm1_en','PARAMVALUE']].sort_values(by='PARAMVALUE', ascending = False)\n", - "df_7p5 = gdf_adm4_7p5.groupby(['adm2_en', 'adm1_en']).mean('PARAMVALUE').reset_index()[[ 'adm2_en', 'adm1_en','PARAMVALUE']].sort_values(by='PARAMVALUE', ascending = False)\n", - "df_6p0_1 = mi_feb6_6p0_1.groupby(['adm2_en', 'adm1_en']).mean('PARAMVALUE').reset_index()[[ 'adm2_en', 'adm1_en','PARAMVALUE']].sort_values(by='PARAMVALUE', ascending = False)\n", - "df_6p0_2 = mi_feb6_6p0_2.groupby(['adm2_en', 'adm1_en']).mean('PARAMVALUE').reset_index()[[ 'adm2_en', 'adm1_en','PARAMVALUE']].sort_values(by='PARAMVALUE', ascending = False)\n", - "df_6p7 = gdf_adm4_6p7.groupby(['adm2_en', 'adm1_en']).mean('PARAMVALUE').reset_index()[[ 'adm2_en', 'adm1_en','PARAMVALUE']].sort_values(by='PARAMVALUE', ascending = False)\n", - "df_feb20 = gdf_adm4_feb20.groupby(['adm2_en', 'adm1_en']).mean('PARAMVALUE').reset_index()[['adm2_en', 'adm1_en','PARAMVALUE']].sort_values(by='PARAMVALUE', ascending = False)\n" + "df_7p8 = (\n", + " gdf_adm4_7p8.groupby([\"adm2_en\", \"adm1_en\"])\n", + " .mean(\"PARAMVALUE\")\n", + " .reset_index()[[\"adm2_en\", \"adm1_en\", \"PARAMVALUE\"]]\n", + " .sort_values(by=\"PARAMVALUE\", ascending=False)\n", + ")\n", + "df_7p5 = (\n", + " gdf_adm4_7p5.groupby([\"adm2_en\", \"adm1_en\"])\n", + " .mean(\"PARAMVALUE\")\n", + " .reset_index()[[\"adm2_en\", \"adm1_en\", \"PARAMVALUE\"]]\n", + " .sort_values(by=\"PARAMVALUE\", ascending=False)\n", + ")\n", + "df_6p0_1 = (\n", + " mi_feb6_6p0_1.groupby([\"adm2_en\", \"adm1_en\"])\n", + " .mean(\"PARAMVALUE\")\n", + " .reset_index()[[\"adm2_en\", \"adm1_en\", \"PARAMVALUE\"]]\n", + " .sort_values(by=\"PARAMVALUE\", ascending=False)\n", + ")\n", + "df_6p0_2 = (\n", + " mi_feb6_6p0_2.groupby([\"adm2_en\", \"adm1_en\"])\n", + " .mean(\"PARAMVALUE\")\n", + " .reset_index()[[\"adm2_en\", \"adm1_en\", \"PARAMVALUE\"]]\n", + " .sort_values(by=\"PARAMVALUE\", ascending=False)\n", + ")\n", + "df_6p7 = (\n", + " gdf_adm4_6p7.groupby([\"adm2_en\", \"adm1_en\"])\n", + " .mean(\"PARAMVALUE\")\n", + " .reset_index()[[\"adm2_en\", \"adm1_en\", \"PARAMVALUE\"]]\n", + " .sort_values(by=\"PARAMVALUE\", ascending=False)\n", + ")\n", + "df_feb20 = (\n", + " gdf_adm4_feb20.groupby([\"adm2_en\", \"adm1_en\"])\n", + " .mean(\"PARAMVALUE\")\n", + " .reset_index()[[\"adm2_en\", \"adm1_en\", \"PARAMVALUE\"]]\n", + " .sort_values(by=\"PARAMVALUE\", ascending=False)\n", + ")" ] }, { @@ -103,12 +158,12 @@ }, "outputs": [], "source": [ - "df_7p8.rename(columns = {'PARAMVALUE':'mean_7p8'}, inplace=True)\n", - "df_7p5.rename(columns = {'PARAMVALUE':'mean_7p5'}, inplace=True)\n", - "df_6p0_1.rename(columns = {'PARAMVALUE':'mean_6p0_1'}, inplace=True)\n", - "df_6p0_2.rename(columns = {'PARAMVALUE':'mean_6p0_2'}, inplace=True)\n", - "df_6p7.rename(columns = {'PARAMVALUE':'mean_6p7'}, inplace=True)\n", - "df_feb20.rename(columns = {'PARAMVALUE':'mean_feb20'}, inplace=True)" + "df_7p8.rename(columns={\"PARAMVALUE\": \"mean_7p8\"}, inplace=True)\n", + "df_7p5.rename(columns={\"PARAMVALUE\": \"mean_7p5\"}, inplace=True)\n", + "df_6p0_1.rename(columns={\"PARAMVALUE\": \"mean_6p0_1\"}, inplace=True)\n", + "df_6p0_2.rename(columns={\"PARAMVALUE\": \"mean_6p0_2\"}, inplace=True)\n", + "df_6p7.rename(columns={\"PARAMVALUE\": \"mean_6p7\"}, inplace=True)\n", + "df_feb20.rename(columns={\"PARAMVALUE\": \"mean_feb20\"}, inplace=True)" ] }, { @@ -121,7 +176,15 @@ }, "outputs": [], "source": [ - "df = turkey_adm2[['adm2_en', 'adm1_en']].merge(df_7p5, on = ['adm2_en', 'adm1_en'], how = 'left').merge(df_7p8, on = ['adm2_en', 'adm1_en'], how='left').merge(df_6p0_2, on = ['adm2_en', 'adm1_en'], how='left').merge(df_6p0_1,on=['adm2_en', 'adm1_en'], how='left').merge(df_6p7, on = ['adm2_en', 'adm1_en'], how='left').merge(df_feb20, on = ['adm2_en', 'adm1_en'], how='left')" + "df = (\n", + " turkey_adm2[[\"adm2_en\", \"adm1_en\"]]\n", + " .merge(df_7p5, on=[\"adm2_en\", \"adm1_en\"], how=\"left\")\n", + " .merge(df_7p8, on=[\"adm2_en\", \"adm1_en\"], how=\"left\")\n", + " .merge(df_6p0_2, on=[\"adm2_en\", \"adm1_en\"], how=\"left\")\n", + " .merge(df_6p0_1, on=[\"adm2_en\", \"adm1_en\"], how=\"left\")\n", + " .merge(df_6p7, on=[\"adm2_en\", \"adm1_en\"], how=\"left\")\n", + " .merge(df_feb20, on=[\"adm2_en\", \"adm1_en\"], how=\"left\")\n", + ")" ] }, { @@ -134,7 +197,7 @@ }, "outputs": [], "source": [ - "df = df.groupby(['adm2_en', 'adm1_en']).mean().reset_index().fillna(0)" + "df = df.groupby([\"adm2_en\", \"adm1_en\"]).mean().reset_index().fillna(0)" ] }, { @@ -147,10 +210,13 @@ }, "outputs": [], "source": [ - "import numpy as np\n", - "from statistics import mean\n", - "#df['mean_intensity_feb6'] = df.apply(lambda x: mean([x['mean_7p5'], x['mean_7p8'], x['mean_6p0_1'], x['mean_6p0_2'], x['mean_6p7']]), axis=1)\n", - "df['max_intensity_feb06'] = df.apply(lambda x: max(x['mean_7p5'], x['mean_7p8'], x['mean_6p0_1'], x['mean_6p0_2'], x['mean_6p7']), axis=1)" + "# df['mean_intensity_feb6'] = df.apply(lambda x: mean([x['mean_7p5'], x['mean_7p8'], x['mean_6p0_1'], x['mean_6p0_2'], x['mean_6p7']]), axis=1)\n", + "df[\"max_intensity_feb06\"] = df.apply(\n", + " lambda x: max(\n", + " x[\"mean_7p5\"], x[\"mean_7p8\"], x[\"mean_6p0_1\"], x[\"mean_6p0_2\"], x[\"mean_6p7\"]\n", + " ),\n", + " axis=1,\n", + ")" ] }, { @@ -163,7 +229,9 @@ }, "outputs": [], "source": [ - "df[['adm2_en', 'adm1_en', 'max_intensity_feb06', 'mean_feb20']].to_csv('../../data/earthquake-intensity/turkiye_adm2_earthquake_intensity.csv')" + "df[[\"adm2_en\", \"adm1_en\", \"max_intensity_feb06\", \"mean_feb20\"]].to_csv(\n", + " \"../../data/earthquake-intensity/turkiye_adm2_earthquake_intensity.csv\"\n", + ")" ] }, { @@ -176,7 +244,7 @@ }, "outputs": [], "source": [ - "gdf = turkey_adm2.merge(df, on = ['adm2_en', 'adm1_en'])" + "gdf = turkey_adm2.merge(df, on=[\"adm2_en\", \"adm1_en\"])" ] }, { @@ -210,34 +278,36 @@ } ], "source": [ - "fig, ax = plt.subplots(figsize=(12,6))\n", + "fig, ax = plt.subplots(figsize=(12, 6))\n", "plt.rcParams[\"font.family\"] = \"cursive\"\n", "\n", - "gdf.plot(column='max_intensity_feb06', cmap = 'viridis', ax=ax)\n", - "#turkey_adm2.boundary.plot(ax=ax, edgecolor = '#D3D3D3', linewidth=0.5)\n", + "gdf.plot(column=\"max_intensity_feb06\", cmap=\"viridis\", ax=ax)\n", + "# turkey_adm2.boundary.plot(ax=ax, edgecolor = '#D3D3D3', linewidth=0.5)\n", "\n", - "ax.spines['top'].set_visible(False)\n", - "ax.spines['bottom'].set_visible(False)\n", - "ax.spines['right'].set_visible(False)\n", - "ax.spines['left'].set_visible(False)\n", + "ax.spines[\"top\"].set_visible(False)\n", + "ax.spines[\"bottom\"].set_visible(False)\n", + "ax.spines[\"right\"].set_visible(False)\n", + "ax.spines[\"left\"].set_visible(False)\n", "\n", "ax.set_xticks([])\n", "ax.set_yticks([])\n", "\n", - "vmin = gdf['max_intensity_feb06'].min()\n", - "vmax = gdf['max_intensity_feb06'].max()\n", + "vmin = gdf[\"max_intensity_feb06\"].min()\n", + "vmax = gdf[\"max_intensity_feb06\"].max()\n", "\n", - "sm = plt.cm.ScalarMappable(cmap='viridis',norm=plt.Normalize(vmin=vmin, vmax=vmax))\n", + "sm = plt.cm.ScalarMappable(cmap=\"viridis\", norm=plt.Normalize(vmin=vmin, vmax=vmax))\n", "\n", "\n", "divider = make_axes_locatable(ax)\n", "dvider_kwargs = dict(position=\"left\", size=\"1.5%\", pad=0)\n", - "fig.colorbar(sm, cax=divider.append_axes(**dvider_kwargs),\n", - " #format = matplotlib.ticker.FuncFormatter(lambda x, pos: ''),\n", - " #ticks = matplotlib.ticker.FixedLocator([])\n", - " )\n", + "fig.colorbar(\n", + " sm,\n", + " cax=divider.append_axes(**dvider_kwargs),\n", + " # format = matplotlib.ticker.FuncFormatter(lambda x, pos: ''),\n", + " # ticks = matplotlib.ticker.FixedLocator([])\n", + ")\n", "\n", - "#plt.savefig('../images/final_check_in/01_earthquake_intensity_feb6.png')" + "# plt.savefig('../images/final_check_in/01_earthquake_intensity_feb6.png')" ] } ], diff --git a/notebooks/internet-connectivity/03a-meta-internet-connectivity.ipynb b/notebooks/internet-connectivity/03a-meta-internet-connectivity.ipynb index f5a82d8..2a4d53a 100644 --- a/notebooks/internet-connectivity/03a-meta-internet-connectivity.ipynb +++ b/notebooks/internet-connectivity/03a-meta-internet-connectivity.ipynb @@ -23,20 +23,17 @@ ], "source": [ "import pandas as pd\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "from bokeh.plotting import figure, output_file, show, output_notebook\n", + "from bokeh.plotting import figure, show, output_notebook\n", "from bokeh.models import Span\n", - "from shapely.geometry import Point\n", "import geopandas as gpd\n", "import glob\n", "from datetime import datetime\n", "import bokeh\n", - "from bokeh.layouts import Row, column, gridplot\n", - "from bokeh.models import Title, Legend, TapTool, Range1d, TabPanel, Tabs\n", - "import matplotlib as mpl\n", + "from bokeh.layouts import column\n", + "from bokeh.models import Legend, TabPanel, Tabs\n", "\n", "from bokeh.core.validation.warnings import EMPTY_LAYOUT, MISSING_RENDERERS\n", + "\n", "bokeh.core.validation.silence(EMPTY_LAYOUT, True)\n", "bokeh.core.validation.silence(MISSING_RENDERERS, True)" ] @@ -51,26 +48,27 @@ }, "outputs": [], "source": [ - "color_palette = [ '#4E79A7', # Blue\n", - " '#F28E2B', # Orange\n", - " '#E15759', # Red\n", - " '#76B7B2', # Teal\n", - " '#59A14F', # Green\n", - " '#EDC948', # Yellow\n", - " '#B07AA1', # Purple\n", - " '#FF9DA7', # Pink\n", - " '#9C755F', # Brown\n", - " '#BAB0AC', # Gray\n", - " '#7C7C7C', # Dark gray\n", - " '#6B4C9A', # Violet\n", - " '#D55E00', # Orange-red\n", - " '#CC61B0', # Magenta\n", - " '#0072B2', # Bright blue\n", - " '#329262', # Peacock green\n", - " '#9E5B5A', # Brick red\n", - " '#636363', # Medium gray\n", - " '#CD9C00', # Gold\n", - " '#5D69B1', # Medium blue\n", + "color_palette = [\n", + " \"#4E79A7\", # Blue\n", + " \"#F28E2B\", # Orange\n", + " \"#E15759\", # Red\n", + " \"#76B7B2\", # Teal\n", + " \"#59A14F\", # Green\n", + " \"#EDC948\", # Yellow\n", + " \"#B07AA1\", # Purple\n", + " \"#FF9DA7\", # Pink\n", + " \"#9C755F\", # Brown\n", + " \"#BAB0AC\", # Gray\n", + " \"#7C7C7C\", # Dark gray\n", + " \"#6B4C9A\", # Violet\n", + " \"#D55E00\", # Orange-red\n", + " \"#CC61B0\", # Magenta\n", + " \"#0072B2\", # Bright blue\n", + " \"#329262\", # Peacock green\n", + " \"#9E5B5A\", # Brick red\n", + " \"#636363\", # Medium gray\n", + " \"#CD9C00\", # Gold\n", + " \"#5D69B1\", # Medium blue\n", "]" ] }, @@ -98,21 +96,21 @@ }, "outputs": [], "source": [ - "all_files = glob.glob('../../data/meta-network-connectivity/network_undetected/*.csv')\n", + "all_files = glob.glob(\"../../data/meta-network-connectivity/network_undetected/*.csv\")\n", "\n", "networkCoverage = pd.DataFrame()\n", "\n", "li = []\n", "\n", "for file in all_files:\n", - " #print(file.split('_')[3])\n", + " # print(file.split('_')[3])\n", " df1 = pd.read_csv(file)\n", - " #print(file.split('_')[2])\n", - " df1['date'] = datetime.strptime(file.split('_')[2], '%Y-%m-%d')\n", + " # print(file.split('_')[2])\n", + " df1[\"date\"] = datetime.strptime(file.split(\"_\")[2], \"%Y-%m-%d\")\n", " li.append(df1)\n", "\n", "networkCoverage = pd.concat(li, axis=0)\n", - "networkCoverage = networkCoverage[networkCoverage['country']=='TR']" + "networkCoverage = networkCoverage[networkCoverage[\"country\"] == \"TR\"]" ] }, { @@ -126,10 +124,20 @@ "outputs": [], "source": [ "# read turkey shapefile and areas of interest\n", - "turkey_adm2 = gpd.read_file('../../data/turkey_administrativelevels0_1_2/tur_polbna_adm2.shp')\n", - "aoi = gpd.read_file('../../data/SYRTUR_tessellation.geojson')\n", - "affected_adm1 = list(turkey_adm2[turkey_adm2['adm1'].isin(aoi['ADM1_PCODE'].unique())]['adm1_en'].unique())\n", - "affected_adm2 = list(turkey_adm2[turkey_adm2['pcode'].isin(aoi['ADM2_PCODE'].unique())]['adm2_en'].unique())" + "turkey_adm2 = gpd.read_file(\n", + " \"../../data/turkey_administrativelevels0_1_2/tur_polbna_adm2.shp\"\n", + ")\n", + "aoi = gpd.read_file(\"../../data/SYRTUR_tessellation.geojson\")\n", + "affected_adm1 = list(\n", + " turkey_adm2[turkey_adm2[\"adm1\"].isin(aoi[\"ADM1_PCODE\"].unique())][\n", + " \"adm1_en\"\n", + " ].unique()\n", + ")\n", + "affected_adm2 = list(\n", + " turkey_adm2[turkey_adm2[\"pcode\"].isin(aoi[\"ADM2_PCODE\"].unique())][\n", + " \"adm2_en\"\n", + " ].unique()\n", + ")" ] }, { @@ -145,14 +153,22 @@ "# filter and clean the dataset\n", "def clean_df(networkCoverage, date=None):\n", " if date is not None:\n", - " df = networkCoverage[networkCoverage['date']==date]\n", + " df = networkCoverage[networkCoverage[\"date\"] == date]\n", " else:\n", " df = networkCoverage\n", - " df = gpd.GeoDataFrame(df, geometry = gpd.points_from_xy(df.lon, df.lat))\n", + " df = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.lon, df.lat))\n", " df = df.set_crs(4326, allow_override=True)\n", - " df = turkey_adm2.sjoin(df, how = 'left')\n", - " df = df.groupby(['adm2_en', 'date']).sum('no_coverage').reset_index()[['adm2_en', 'date', 'no_coverage']]\n", - " df = df.merge(turkey_adm2[['adm2_en', 'adm1_en']], on = ['adm2_en'], how = 'right').sort_values(by=['adm2_en', 'date']).reset_index(drop=True)\n", + " df = turkey_adm2.sjoin(df, how=\"left\")\n", + " df = (\n", + " df.groupby([\"adm2_en\", \"date\"])\n", + " .sum(\"no_coverage\")\n", + " .reset_index()[[\"adm2_en\", \"date\", \"no_coverage\"]]\n", + " )\n", + " df = (\n", + " df.merge(turkey_adm2[[\"adm2_en\", \"adm1_en\"]], on=[\"adm2_en\"], how=\"right\")\n", + " .sort_values(by=[\"adm2_en\", \"date\"])\n", + " .reset_index(drop=True)\n", + " )\n", "\n", " return df" ] @@ -168,8 +184,14 @@ "outputs": [], "source": [ "networkCoverage = clean_df(networkCoverage)\n", - "#calculate sum of values, grouped by week\n", - "week = networkCoverage.groupby([pd.Grouper(key='date', freq='W-MON'), 'adm2_en', 'adm1_en']).mean().reset_index()" + "# calculate sum of values, grouped by week\n", + "week = (\n", + " networkCoverage.groupby(\n", + " [pd.Grouper(key=\"date\", freq=\"W-MON\"), \"adm2_en\", \"adm1_en\"]\n", + " )\n", + " .mean()\n", + " .reset_index()\n", + ")" ] }, { @@ -183,8 +205,8 @@ }, "outputs": [], "source": [ - "aoi = gpd.read_file('../../data/SYRTUR_tessellation.geojson')['ADM2_PCODE'].unique()\n", - "aoi_adm1 = turkey_adm2[turkey_adm2['pcode'].isin(aoi)]['adm1_en'].unique()" + "aoi = gpd.read_file(\"../../data/SYRTUR_tessellation.geojson\")[\"ADM2_PCODE\"].unique()\n", + "aoi_adm1 = turkey_adm2[turkey_adm2[\"pcode\"].isin(aoi)][\"adm1_en\"].unique()" ] }, { @@ -199,57 +221,74 @@ "source": [ "bokeh.core.validation.silence(EMPTY_LAYOUT, True)\n", "\n", - "def get_line_plot(networkCoverage,title, source, earthquakes=False, subtitle=None):\n", "\n", - " p2 = figure(x_axis_type = 'datetime', width = 800, height = 400, toolbar_location='above')\n", + "def get_line_plot(networkCoverage, title, source, earthquakes=False, subtitle=None):\n", + " p2 = figure(x_axis_type=\"datetime\", width=800, height=400, toolbar_location=\"above\")\n", " p2.add_layout(Legend(), \"right\")\n", "\n", - " for id, adm2 in enumerate(networkCoverage['adm2_en'].unique()):\n", - " df = networkCoverage[networkCoverage['adm2_en']==adm2][['date', 'no_coverage']].reset_index(drop=True)\n", - " p2.line(df['date'], df['no_coverage'], line_width=2, line_color = color_palette[id], legend_label=adm2)\n", + " for id, adm2 in enumerate(networkCoverage[\"adm2_en\"].unique()):\n", + " df = networkCoverage[networkCoverage[\"adm2_en\"] == adm2][\n", + " [\"date\", \"no_coverage\"]\n", + " ].reset_index(drop=True)\n", + " p2.line(\n", + " df[\"date\"],\n", + " df[\"no_coverage\"],\n", + " line_width=2,\n", + " line_color=color_palette[id],\n", + " legend_label=adm2,\n", + " )\n", "\n", - " p2.legend.click_policy='hide'\n", + " p2.legend.click_policy = \"hide\"\n", " if subtitle is not None:\n", " p2.title = subtitle\n", "\n", - "\n", - " title_fig = figure(title=title, toolbar_location=None,width=800, height=40, )\n", + " title_fig = figure(\n", + " title=title,\n", + " toolbar_location=None,\n", + " width=800,\n", + " height=40,\n", + " )\n", " title_fig.title.align = \"left\"\n", " title_fig.title.text_font_size = \"20pt\"\n", " title_fig.border_fill_alpha = 0\n", - " title_fig.outline_line_width=0\n", + " title_fig.outline_line_width = 0\n", "\n", - " #with silence(MISSING_RENDERERS):\n", - " sub_title = figure(title=source, toolbar_location=None,width=800, height=40, )\n", + " # with silence(MISSING_RENDERERS):\n", + " sub_title = figure(\n", + " title=source,\n", + " toolbar_location=None,\n", + " width=800,\n", + " height=40,\n", + " )\n", " sub_title.title.align = \"left\"\n", " sub_title.title.text_font_size = \"10pt\"\n", - " sub_title.title.text_font_style=\"normal\"\n", + " sub_title.title.text_font_style = \"normal\"\n", " sub_title.border_fill_alpha = 0\n", - " sub_title.outline_line_width=0\n", + " sub_title.outline_line_width = 0\n", "\n", " layout = column(title_fig, p2, sub_title)\n", "\n", " if earthquakes:\n", - " p2.renderers.extend([\n", - " Span(\n", - " location=datetime(2023, 2, 6),\n", - " dimension=\"height\",\n", - " line_color='#7C7C7C',\n", - " line_width=2,\n", - " line_dash=(4,4)\n", - " ),\n", - " Span(\n", - " location=datetime(2023, 2, 20),\n", - " dimension=\"height\",\n", - " line_color='#7C7C7C',\n", - " line_width=2,\n", - " line_dash=(4,4)\n", - " ),\n", - " ]\n", - ")\n", + " p2.renderers.extend(\n", + " [\n", + " Span(\n", + " location=datetime(2023, 2, 6),\n", + " dimension=\"height\",\n", + " line_color=\"#7C7C7C\",\n", + " line_width=2,\n", + " line_dash=(4, 4),\n", + " ),\n", + " Span(\n", + " location=datetime(2023, 2, 20),\n", + " dimension=\"height\",\n", + " line_color=\"#7C7C7C\",\n", + " line_width=2,\n", + " line_dash=(4, 4),\n", + " ),\n", + " ]\n", + " )\n", "\n", - " return layout\n", - "\n" + " return layout" ] }, { @@ -318,14 +357,20 @@ "tabs = []\n", "\n", "for adm in aoi_adm1:\n", - " df = networkCoverage[networkCoverage['adm1_en']==adm] \n", + " df = networkCoverage[networkCoverage[\"adm1_en\"] == adm]\n", "\n", " tabs.append(\n", " TabPanel(\n", - " child=get_line_plot(df, f\"Areas with Undetected Network post earthquake\", \"Source: Data for Good Meta\", earthquakes=True, subtitle = '~600m areas without network post earthquake compared to 30 day prior baseline'),\n", - " title=adm.capitalize(),\n", - " )\n", - " )\n", + " child=get_line_plot(\n", + " df,\n", + " \"Areas with Undetected Network post earthquake\",\n", + " \"Source: Data for Good Meta\",\n", + " earthquakes=True,\n", + " subtitle=\"~600m areas without network post earthquake compared to 30 day prior baseline\",\n", + " ),\n", + " title=adm.capitalize(),\n", + " )\n", + " )\n", "\n", "tabs = Tabs(tabs=tabs, sizing_mode=\"scale_both\")\n", "show(tabs, warn_on_missing_glyphs=False)" @@ -398,14 +443,20 @@ "tabs = []\n", "\n", "for adm in aoi_adm1:\n", - " df = week[week['adm1_en']==adm] \n", + " df = week[week[\"adm1_en\"] == adm]\n", "\n", " tabs.append(\n", " TabPanel(\n", - " child=get_line_plot(df, f\"Weekly trends for Areas with Undetected Network post earthquake\", \"Source: Data for Good Meta\", earthquakes=True, subtitle = '~600m areas without network post earthquake compared to 30 day prior baseline'),\n", - " title=adm.capitalize(),\n", - " )\n", - " )\n", + " child=get_line_plot(\n", + " df,\n", + " \"Weekly trends for Areas with Undetected Network post earthquake\",\n", + " \"Source: Data for Good Meta\",\n", + " earthquakes=True,\n", + " subtitle=\"~600m areas without network post earthquake compared to 30 day prior baseline\",\n", + " ),\n", + " title=adm.capitalize(),\n", + " )\n", + " )\n", "\n", "tabs = Tabs(tabs=tabs, sizing_mode=\"scale_both\")\n", "show(tabs, warn_on_missing_glyphs=False)" diff --git a/notebooks/internet-connectivity/03b-ookla-speedtest-analysis.ipynb b/notebooks/internet-connectivity/03b-ookla-speedtest-analysis.ipynb index 6566d60..f79318b 100644 --- a/notebooks/internet-connectivity/03b-ookla-speedtest-analysis.ipynb +++ b/notebooks/internet-connectivity/03b-ookla-speedtest-analysis.ipynb @@ -11,28 +11,23 @@ "outputs": [], "source": [ "import pandas as pd\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "from bokeh.plotting import figure, output_file, show, output_notebook\n", + "from bokeh.plotting import figure, show, output_notebook\n", "from bokeh.models import Span\n", - "from shapely.geometry import Point\n", "import geopandas as gpd\n", "import glob\n", "from datetime import datetime\n", "import bokeh\n", - "from bokeh.layouts import Row, column, gridplot\n", - "from bokeh.models import Title, Legend, TapTool, Range1d, TabPanel, Tabs\n", - "import matplotlib as mpl\n", + "from bokeh.layouts import column\n", + "from bokeh.models import Legend, TabPanel, Tabs\n", "\n", "from bokeh.core.validation.warnings import EMPTY_LAYOUT, MISSING_RENDERERS\n", + "\n", "bokeh.core.validation.silence(EMPTY_LAYOUT, True)\n", "bokeh.core.validation.silence(MISSING_RENDERERS, True)\n", "\n", "import dask.dataframe as dd\n", "from sklearn.preprocessing import StandardScaler\n", - "import dask_geopandas as dg\n", - "\n", - "\n" + "import dask_geopandas as dg" ] }, { @@ -45,26 +40,27 @@ }, "outputs": [], "source": [ - "color_palette = [ '#4E79A7', # Blue\n", - " '#F28E2B', # Orange\n", - " '#E15759', # Red\n", - " '#76B7B2', # Teal\n", - " '#59A14F', # Green\n", - " '#EDC948', # Yellow\n", - " '#B07AA1', # Purple\n", - " '#FF9DA7', # Pink\n", - " '#9C755F', # Brown\n", - " '#BAB0AC', # Gray\n", - " '#7C7C7C', # Dark gray\n", - " '#6B4C9A', # Violet\n", - " '#D55E00', # Orange-red\n", - " '#CC61B0', # Magenta\n", - " '#0072B2', # Bright blue\n", - " '#329262', # Peacock green\n", - " '#9E5B5A', # Brick red\n", - " '#636363', # Medium gray\n", - " '#CD9C00', # Gold\n", - " '#5D69B1', # Medium blue\n", + "color_palette = [\n", + " \"#4E79A7\", # Blue\n", + " \"#F28E2B\", # Orange\n", + " \"#E15759\", # Red\n", + " \"#76B7B2\", # Teal\n", + " \"#59A14F\", # Green\n", + " \"#EDC948\", # Yellow\n", + " \"#B07AA1\", # Purple\n", + " \"#FF9DA7\", # Pink\n", + " \"#9C755F\", # Brown\n", + " \"#BAB0AC\", # Gray\n", + " \"#7C7C7C\", # Dark gray\n", + " \"#6B4C9A\", # Violet\n", + " \"#D55E00\", # Orange-red\n", + " \"#CC61B0\", # Magenta\n", + " \"#0072B2\", # Bright blue\n", + " \"#329262\", # Peacock green\n", + " \"#9E5B5A\", # Brick red\n", + " \"#636363\", # Medium gray\n", + " \"#CD9C00\", # Gold\n", + " \"#5D69B1\", # Medium blue\n", "]" ] }, @@ -95,14 +91,54 @@ "stnet = pd.DataFrame()\n", "\n", "\n", - "android = dd.read_csv(glob.glob(f'../../data/ookla-speedtest/raw/android_*.csv'), low_memory=False, usecols = ['test_id', 'test_date', 'client_latitude', 'client_longitude', 'client_country'])\n", - "android['type'] = 'android'\n", - "ios = dd.read_csv(glob.glob(f'../../data/ookla-speedtest/raw/ios_*.csv'), low_memory=False, usecols = ['test_id', 'test_date', 'client_latitude', 'client_longitude', 'client_country'])\n", - "ios['type'] = 'ios'\n", - "stnet = dd.read_csv(glob.glob(f'../../data/ookla-speedtest/raw/stnet_*.csv'), low_memory=False, usecols = ['test_id', 'test_date', 'client_latitude', 'client_longitude', 'client_country'])\n", - "stnet['type'] = 'stnet'\n", - "stdesktop = dd.read_csv(glob.glob(f'../../data/ookla-speedtest/raw/stdesktop_*.csv'), low_memory=False, usecols = ['test_id', 'test_date', 'client_latitude', 'client_longitude', 'client_country'])\n", - "stdesktop['type'] = 'stdesktop'" + "android = dd.read_csv(\n", + " glob.glob(\"../../data/ookla-speedtest/raw/android_*.csv\"),\n", + " low_memory=False,\n", + " usecols=[\n", + " \"test_id\",\n", + " \"test_date\",\n", + " \"client_latitude\",\n", + " \"client_longitude\",\n", + " \"client_country\",\n", + " ],\n", + ")\n", + "android[\"type\"] = \"android\"\n", + "ios = dd.read_csv(\n", + " glob.glob(\"../../data/ookla-speedtest/raw/ios_*.csv\"),\n", + " low_memory=False,\n", + " usecols=[\n", + " \"test_id\",\n", + " \"test_date\",\n", + " \"client_latitude\",\n", + " \"client_longitude\",\n", + " \"client_country\",\n", + " ],\n", + ")\n", + "ios[\"type\"] = \"ios\"\n", + "stnet = dd.read_csv(\n", + " glob.glob(\"../../data/ookla-speedtest/raw/stnet_*.csv\"),\n", + " low_memory=False,\n", + " usecols=[\n", + " \"test_id\",\n", + " \"test_date\",\n", + " \"client_latitude\",\n", + " \"client_longitude\",\n", + " \"client_country\",\n", + " ],\n", + ")\n", + "stnet[\"type\"] = \"stnet\"\n", + "stdesktop = dd.read_csv(\n", + " glob.glob(\"../../data/ookla-speedtest/raw/stdesktop_*.csv\"),\n", + " low_memory=False,\n", + " usecols=[\n", + " \"test_id\",\n", + " \"test_date\",\n", + " \"client_latitude\",\n", + " \"client_longitude\",\n", + " \"client_country\",\n", + " ],\n", + ")\n", + "stdesktop[\"type\"] = \"stdesktop\"" ] }, { @@ -117,15 +153,15 @@ "source": [ "# choose only the records where the records are from November 2022 till date and are taken in Turkey\n", "ddf = dd.concat([android, ios, stnet, stdesktop])\n", - "ddf = ddf[ddf['client_country']=='Turkey']\n", - "ddf['test_date'] = dd.to_datetime(ddf['test_date'])\n", - "meta = ('test_date', 'datetime64[ns]')\n", - "#df['test_date'] = df['test_date'].apply(lambda x: x + timedelta(hours=3), meta=meta)\n", - "ddf = ddf[(ddf['test_date']>'2022-11-01')&(ddf['test_date']<'2023-03-16')]\n", + "ddf = ddf[ddf[\"client_country\"] == \"Turkey\"]\n", + "ddf[\"test_date\"] = dd.to_datetime(ddf[\"test_date\"])\n", + "meta = (\"test_date\", \"datetime64[ns]\")\n", + "# df['test_date'] = df['test_date'].apply(lambda x: x + timedelta(hours=3), meta=meta)\n", + "ddf = ddf[(ddf[\"test_date\"] > \"2022-11-01\") & (ddf[\"test_date\"] < \"2023-03-16\")]\n", "\n", - "# create a column for the number of \n", - "#ddf['user_count'] = 1\n", - "ddf['date'] = ddf['test_date'].dt.date" + "# create a column for the number of\n", + "# ddf['user_count'] = 1\n", + "ddf[\"date\"] = ddf[\"test_date\"].dt.date" ] }, { @@ -139,7 +175,12 @@ "outputs": [], "source": [ "# count the number of test_ids that were taken on a given day\n", - "ddf = ddf[['test_id', 'date', 'client_latitude', 'client_longitude', 'type']].groupby(['client_latitude','client_longitude', 'date', 'type']).count()[['test_id']].reset_index()" + "ddf = (\n", + " ddf[[\"test_id\", \"date\", \"client_latitude\", \"client_longitude\", \"type\"]]\n", + " .groupby([\"client_latitude\", \"client_longitude\", \"date\", \"type\"])\n", + " .count()[[\"test_id\"]]\n", + " .reset_index()\n", + ")" ] }, { @@ -170,10 +211,20 @@ "outputs": [], "source": [ "# read turkey shapefile and areas of interest\n", - "turkey_adm2 = gpd.read_file('../../data/turkey_administrativelevels0_1_2/tur_polbna_adm2.shp')\n", - "aoi = gpd.read_file('../../data/SYRTUR_tessellation.geojson')\n", - "affected_adm1 = list(turkey_adm2[turkey_adm2['adm1'].isin(aoi['ADM1_PCODE'].unique())]['adm1_en'].unique())\n", - "affected_adm2 = list(turkey_adm2[turkey_adm2['pcode'].isin(aoi['ADM2_PCODE'].unique())]['adm2_en'].unique())" + "turkey_adm2 = gpd.read_file(\n", + " \"../../data/turkey_administrativelevels0_1_2/tur_polbna_adm2.shp\"\n", + ")\n", + "aoi = gpd.read_file(\"../../data/SYRTUR_tessellation.geojson\")\n", + "affected_adm1 = list(\n", + " turkey_adm2[turkey_adm2[\"adm1\"].isin(aoi[\"ADM1_PCODE\"].unique())][\n", + " \"adm1_en\"\n", + " ].unique()\n", + ")\n", + "affected_adm2 = list(\n", + " turkey_adm2[turkey_adm2[\"pcode\"].isin(aoi[\"ADM2_PCODE\"].unique())][\n", + " \"adm2_en\"\n", + " ].unique()\n", + ")" ] }, { @@ -196,10 +247,13 @@ ], "source": [ "gddf = gddf.sjoin(turkey_adm2)\n", - "gddf = gddf.drop(['client_latitude', 'client_longitude', 'OBJECTID', 'Shape_Leng', 'Shape_Area'], axis=1)\n", + "gddf = gddf.drop(\n", + " [\"client_latitude\", \"client_longitude\", \"OBJECTID\", \"Shape_Leng\", \"Shape_Area\"],\n", + " axis=1,\n", + ")\n", "\n", "# add up all the test ids that were taken for that admin 2 level\n", - "gddf = gddf.groupby(['adm2_en', 'adm1_en', 'date']).sum().reset_index()" + "gddf = gddf.groupby([\"adm2_en\", \"adm1_en\", \"date\"]).sum().reset_index()" ] }, { @@ -213,7 +267,7 @@ "outputs": [], "source": [ "ooklaUsers = gddf.compute()\n", - "ooklaUsers['date'] = pd.to_datetime(ooklaUsers['date'])" + "ooklaUsers[\"date\"] = pd.to_datetime(ooklaUsers[\"date\"])" ] }, { @@ -226,8 +280,8 @@ }, "outputs": [], "source": [ - "ooklaUsers.rename(columns = {'test_id': 'n_users'}, inplace=True)\n", - "ooklaUsers = ooklaUsers[['adm1_en', 'adm2_en', 'date', 'n_users']]" + "ooklaUsers.rename(columns={\"test_id\": \"n_users\"}, inplace=True)\n", + "ooklaUsers = ooklaUsers[[\"adm1_en\", \"adm2_en\", \"date\", \"n_users\"]]" ] }, { @@ -241,9 +295,11 @@ }, "outputs": [], "source": [ - "ooklaUsers['date'] = pd.to_datetime(ooklaUsers['date'])\n", - "#ooklaUsers = ooklaUsers.groupby(['adm2_en', 'date']).sum('n_users').reset_index()\n", - "ooklaUsers = ooklaUsers[(ooklaUsers['date']>'2023-01-03')&(ooklaUsers['date']<'2023-03-13')]" + "ooklaUsers[\"date\"] = pd.to_datetime(ooklaUsers[\"date\"])\n", + "# ooklaUsers = ooklaUsers.groupby(['adm2_en', 'date']).sum('n_users').reset_index()\n", + "ooklaUsers = ooklaUsers[\n", + " (ooklaUsers[\"date\"] > \"2023-01-03\") & (ooklaUsers[\"date\"] < \"2023-03-13\")\n", + "]" ] }, { @@ -302,7 +358,7 @@ }, "outputs": [], "source": [ - "ooklaUsersChange = ooklaUsers.merge(baseline, on = ['adm2_en'], how = 'left')" + "ooklaUsersChange = ooklaUsers.merge(baseline, on=[\"adm2_en\"], how=\"left\")" ] }, { @@ -315,9 +371,13 @@ }, "outputs": [], "source": [ - "ooklaUsersChange['n_baseline'] = ooklaUsersChange['n_users.mean']\n", - "ooklaUsersChange[\"n_difference\"] = ooklaUsersChange[\"n_users\"] - ooklaUsersChange[\"n_baseline\"]\n", - "ooklaUsersChange[\"percent_change\"] = 100 * (ooklaUsersChange[\"n_users\"] / (ooklaUsersChange[\"n_baseline\"]) - 1)" + "ooklaUsersChange[\"n_baseline\"] = ooklaUsersChange[\"n_users.mean\"]\n", + "ooklaUsersChange[\"n_difference\"] = (\n", + " ooklaUsersChange[\"n_users\"] - ooklaUsersChange[\"n_baseline\"]\n", + ")\n", + "ooklaUsersChange[\"percent_change\"] = 100 * (\n", + " ooklaUsersChange[\"n_users\"] / (ooklaUsersChange[\"n_baseline\"]) - 1\n", + ")" ] }, { @@ -350,12 +410,18 @@ }, "outputs": [], "source": [ - "#week['week_date'] = df['date'] - pd.to_timedelta(7, unit='d')\n", + "# week['week_date'] = df['date'] - pd.to_timedelta(7, unit='d')\n", "\n", "# #calculate sum of values, grouped by week\n", - "week = ooklaUsersChange.groupby([pd.Grouper(key='date', freq='W-MON'), 'adm2_en', 'adm1_en']).mean().reset_index()\n", + "week = (\n", + " ooklaUsersChange.groupby(\n", + " [pd.Grouper(key=\"date\", freq=\"W-MON\"), \"adm2_en\", \"adm1_en\"]\n", + " )\n", + " .mean()\n", + " .reset_index()\n", + ")\n", "\n", - "#week['user_count'] = week['user_count'].apply(lambda x: round(x,0))" + "# week['user_count'] = week['user_count'].apply(lambda x: round(x,0))" ] }, { @@ -370,54 +436,79 @@ "source": [ "bokeh.core.validation.silence(EMPTY_LAYOUT, True)\n", "\n", - "def get_line_plot(ooklaUsers,title, source, earthquakes=False, subtitle=None, measure = 'percent_change'):\n", "\n", - " p2 = figure(x_axis_type = 'datetime', width = 800, height = 400, toolbar_location='above')\n", + "def get_line_plot(\n", + " ooklaUsers,\n", + " title,\n", + " source,\n", + " earthquakes=False,\n", + " subtitle=None,\n", + " measure=\"percent_change\",\n", + "):\n", + " p2 = figure(x_axis_type=\"datetime\", width=800, height=400, toolbar_location=\"above\")\n", " p2.add_layout(Legend(), \"right\")\n", "\n", - " for id, adm2 in enumerate(ooklaUsers['adm2_en'].unique()):\n", - " df = ooklaUsers[ooklaUsers['adm2_en']==adm2][['date', measure]].reset_index(drop=True)\n", - " p2.line(df['date'], df[measure], line_width=2, line_color = color_palette[id], legend_label=adm2)\n", + " for id, adm2 in enumerate(ooklaUsers[\"adm2_en\"].unique()):\n", + " df = ooklaUsers[ooklaUsers[\"adm2_en\"] == adm2][[\"date\", measure]].reset_index(\n", + " drop=True\n", + " )\n", + " p2.line(\n", + " df[\"date\"],\n", + " df[measure],\n", + " line_width=2,\n", + " line_color=color_palette[id],\n", + " legend_label=adm2,\n", + " )\n", "\n", - " p2.legend.click_policy='hide'\n", + " p2.legend.click_policy = \"hide\"\n", " if subtitle is not None:\n", " p2.title = subtitle\n", "\n", - "\n", - " title_fig = figure(title=title, toolbar_location=None,width=800, height=40, )\n", + " title_fig = figure(\n", + " title=title,\n", + " toolbar_location=None,\n", + " width=800,\n", + " height=40,\n", + " )\n", " title_fig.title.align = \"left\"\n", " title_fig.title.text_font_size = \"20pt\"\n", " title_fig.border_fill_alpha = 0\n", - " title_fig.outline_line_width=0\n", + " title_fig.outline_line_width = 0\n", "\n", - " #with silence(MISSING_RENDERERS):\n", - " sub_title = figure(title=source, toolbar_location=None,width=800, height=40, )\n", + " # with silence(MISSING_RENDERERS):\n", + " sub_title = figure(\n", + " title=source,\n", + " toolbar_location=None,\n", + " width=800,\n", + " height=40,\n", + " )\n", " sub_title.title.align = \"left\"\n", " sub_title.title.text_font_size = \"10pt\"\n", - " sub_title.title.text_font_style=\"normal\"\n", + " sub_title.title.text_font_style = \"normal\"\n", " sub_title.border_fill_alpha = 0\n", - " sub_title.outline_line_width=0\n", + " sub_title.outline_line_width = 0\n", "\n", " layout = column(title_fig, p2, sub_title)\n", "\n", " if earthquakes:\n", - " p2.renderers.extend([\n", - " Span(\n", - " location=datetime(2023, 2, 6),\n", - " dimension=\"height\",\n", - " line_color='#7C7C7C',\n", - " line_width=2,\n", - " line_dash=(4,4)\n", - " ),\n", - " Span(\n", - " location=datetime(2023, 2, 20),\n", - " dimension=\"height\",\n", - " line_color='#7C7C7C',\n", - " line_width=2,\n", - " line_dash=(4,4)\n", - " ),\n", - " ]\n", - ")\n", + " p2.renderers.extend(\n", + " [\n", + " Span(\n", + " location=datetime(2023, 2, 6),\n", + " dimension=\"height\",\n", + " line_color=\"#7C7C7C\",\n", + " line_width=2,\n", + " line_dash=(4, 4),\n", + " ),\n", + " Span(\n", + " location=datetime(2023, 2, 20),\n", + " dimension=\"height\",\n", + " line_color=\"#7C7C7C\",\n", + " line_width=2,\n", + " line_dash=(4, 4),\n", + " ),\n", + " ]\n", + " )\n", "\n", " return layout" ] @@ -488,14 +579,20 @@ "tabs = []\n", "\n", "for adm in affected_adm1:\n", - " df = ooklaUsersChange[ooklaUsersChange['adm1_en']==adm] \n", + " df = ooklaUsersChange[ooklaUsersChange[\"adm1_en\"] == adm]\n", "\n", " tabs.append(\n", " TabPanel(\n", - " child=get_line_plot(df, f\"Number of daily users taking speedtest\", \"Source: Ookla Data for Good\", earthquakes=True, subtitle = '% change compared to a 3 month prior baseline'),\n", - " title=adm.capitalize(),\n", - " )\n", - " )\n", + " child=get_line_plot(\n", + " df,\n", + " \"Number of daily users taking speedtest\",\n", + " \"Source: Ookla Data for Good\",\n", + " earthquakes=True,\n", + " subtitle=\"% change compared to a 3 month prior baseline\",\n", + " ),\n", + " title=adm.capitalize(),\n", + " )\n", + " )\n", "\n", "tabs = Tabs(tabs=tabs, sizing_mode=\"scale_both\")\n", "show(tabs, warn_on_missing_glyphs=False)" @@ -568,14 +665,20 @@ "tabs = []\n", "\n", "for adm in affected_adm1:\n", - " df = week[week['adm1_en']==adm] \n", + " df = week[week[\"adm1_en\"] == adm]\n", "\n", " tabs.append(\n", " TabPanel(\n", - " child=get_line_plot(df, f\"Weekly average users taking speedtest\", \"Source: Ookla Data for Good\", earthquakes=True, subtitle = '% change compared to a 3 month prior baseline'),\n", - " title=adm.capitalize(),\n", - " )\n", - " )\n", + " child=get_line_plot(\n", + " df,\n", + " \"Weekly average users taking speedtest\",\n", + " \"Source: Ookla Data for Good\",\n", + " earthquakes=True,\n", + " subtitle=\"% change compared to a 3 month prior baseline\",\n", + " ),\n", + " title=adm.capitalize(),\n", + " )\n", + " )\n", "\n", "tabs = Tabs(tabs=tabs, sizing_mode=\"scale_both\")\n", "show(tabs, warn_on_missing_glyphs=False)" diff --git a/notebooks/mobility/activity.ipynb b/notebooks/mobility/activity.ipynb index 40ed6fc..15d219d 100644 --- a/notebooks/mobility/activity.ipynb +++ b/notebooks/mobility/activity.ipynb @@ -771,7 +771,7 @@ }, "outputs": [], "source": [ - "#ddf = ddf[ddf[\"datetime\"] >= pd.Timestamp('2022-07-01 00:00:00+03:00')]" + "# ddf = ddf[ddf[\"datetime\"] >= pd.Timestamp('2022-07-01 00:00:00+03:00')]" ] }, { @@ -2309,7 +2309,7 @@ " predicate = ACTIVITY[\"hex_id\"] == hex_id\n", " score = scaler.transform(ACTIVITY[predicate][[\"count\"]])\n", " ACTIVITY.loc[predicate, \"z_score\"] = score\n", - " except Exception as e:\n", + " except Exception:\n", " pass" ] }, @@ -2795,7 +2795,7 @@ " )\n", " r.visible = False\n", " renderers.append(r)\n", - " except Exception as e:\n", + " except Exception:\n", " pass\n", "\n", "renderers[0].visible = True\n", @@ -3456,7 +3456,7 @@ " )\n", " r.muted = True\n", " renderers.append(r)\n", - " except Exception as e:\n", + " except Exception:\n", " pass\n", "\n", "renderers[0].muted = False\n", @@ -3712,10 +3712,10 @@ " legend_label=NAMES.get(column),\n", " line_color=color,\n", " line_width=2,\n", - " ) \n", + " )\n", " r.visible = False\n", " renderers.append(r)\n", - " except Exception as e:\n", + " except Exception:\n", " pass\n", "\n", "renderers[0].visible = True\n", diff --git a/notebooks/mobility/visits.ipynb b/notebooks/mobility/visits.ipynb index 3503470..6667f4b 100644 --- a/notebooks/mobility/visits.ipynb +++ b/notebooks/mobility/visits.ipynb @@ -672,7 +672,7 @@ }, "outputs": [], "source": [ - "#result.to_parquet(f\"../../data/final/SYRTUR_visits_by_{TAG}_{PANEL}.parquet\")\n", + "# result.to_parquet(f\"../../data/final/SYRTUR_visits_by_{TAG}_{PANEL}.parquet\")\n", "result = pd.read_parquet(f\"../../data/final/SYRTUR_visits_by_{TAG}_{PANEL}.parquet\")" ] }, @@ -817,7 +817,7 @@ " line_width=2,\n", " )\n", " renderers.append(r)\n", - " except Exception as e:\n", + " except Exception:\n", " pass\n", "\n", " p.add_tools(\n", @@ -1337,7 +1337,8 @@ " tabs.append(\n", " TabPanel(\n", " child=plot_visits(\n", - " data, title=f\"Points-of-Interest Visit Trends in {NAMES.get(pcode)} ({pcode})\"\n", + " data,\n", + " title=f\"Points-of-Interest Visit Trends in {NAMES.get(pcode)} ({pcode})\",\n", " ),\n", " title=NAMES.get(pcode),\n", " )\n", @@ -1430,7 +1431,8 @@ " tabs.append(\n", " TabPanel(\n", " child=plot_visits(\n", - " data, title=f\"Points-of-Interest Visit Trends in {NAMES.get(pcode)} ({pcode})\"\n", + " data,\n", + " title=f\"Points-of-Interest Visit Trends in {NAMES.get(pcode)} ({pcode})\",\n", " ),\n", " title=NAMES.get(pcode),\n", " )\n", diff --git a/notebooks/nighttime-lights/01_clean_gas_flaring_data.R b/notebooks/nighttime-lights/01_clean_gas_flaring_data.R index f31d61b..9e8d975 100644 --- a/notebooks/nighttime-lights/01_clean_gas_flaring_data.R +++ b/notebooks/nighttime-lights/01_clean_gas_flaring_data.R @@ -43,5 +43,3 @@ gs_df <- gs_df %>% saveRDS(gs_df, file.path(gas_flare_dir, "FinalData", "gas_flare_locations.Rds")) write_csv(gs_df, file.path(gas_flare_dir, "FinalData", "gas_flare_locations.csv")) - - diff --git a/notebooks/nighttime-lights/03_aggregate.R b/notebooks/nighttime-lights/03_aggregate.R index 1a44187..29fa31f 100644 --- a/notebooks/nighttime-lights/03_aggregate.R +++ b/notebooks/nighttime-lights/03_aggregate.R @@ -150,5 +150,3 @@ for(roi in c("adm0", "adm1", "adm2")){ } } } - - diff --git a/notebooks/nighttime-lights/04_append.R b/notebooks/nighttime-lights/04_append.R index a37bc20..a7eca47 100644 --- a/notebooks/nighttime-lights/04_append.R +++ b/notebooks/nighttime-lights/04_append.R @@ -15,4 +15,4 @@ for(roi in c("adm0", "adm1", "adm2")){ paste0(roi, "_", product, ".csv"))) } -} \ No newline at end of file +} diff --git a/notebooks/nighttime-lights/05_avg_ntl_by_period.R b/notebooks/nighttime-lights/05_avg_ntl_by_period.R index da37957..22e4049 100644 --- a/notebooks/nighttime-lights/05_avg_ntl_by_period.R +++ b/notebooks/nighttime-lights/05_avg_ntl_by_period.R @@ -54,4 +54,4 @@ write_csv(ntl_sum_df, file.path(ntl_bm_dir, "FinalData", "aggregated", "ntl_aggregated_by_time_period.csv")) saveRDS(ntl_sum_df, file.path(ntl_bm_dir, "FinalData", "aggregated", - "ntl_aggregated_by_time_period.Rds")) \ No newline at end of file + "ntl_aggregated_by_time_period.Rds")) diff --git a/notebooks/nighttime-lights/05_maps_ntl_changes.R b/notebooks/nighttime-lights/05_maps_ntl_changes.R index 786e500..4dc76d6 100644 --- a/notebooks/nighttime-lights/05_maps_ntl_changes.R +++ b/notebooks/nighttime-lights/05_maps_ntl_changes.R @@ -33,25 +33,25 @@ ntl_sum_df <- ntl_df %>% mutate(ntl_3day_pc = (ntl_3day - ntl_base)/ntl_base * 100, ntl_2week_pc = (ntl_2week - ntl_base)/ntl_base * 100, ntl_march_pc = (ntl_march - ntl_base)/ntl_base * 100) %>% - + dplyr::mutate(ntl_3day_chng_bin = case_when( ntl_3day_pc >= 10 ~ "> 10% Increase", ntl_3day_pc <= -10 ~ "> 10% Decrease", TRUE ~ "Small change" )) %>% - + dplyr::mutate(ntl_2week_chng_bin = case_when( ntl_2week_pc >= 10 ~ "> 10% Increase", ntl_2week_pc <= -10 ~ "> 10% Decrease", TRUE ~ "Small change" )) %>% - + dplyr::mutate(ntl_march_chng_bin = case_when( ntl_march_pc >= 10 ~ "> 10% Increase", ntl_march_pc <= -10 ~ "> 10% Decrease", TRUE ~ "Small change" )) %>% - + ## Merge in earthquake data and determine max intensity left_join(eq_df, by = "pcode") %>% @@ -72,14 +72,14 @@ adm2_sf$ntl_3day_pc[adm2_sf$ntl_3day_pc > 50] <- 50 # [Map] % Change --------------------------------------------------------------- for(var in c("ntl_2week", "ntl_march", "ntl_3day")){ - + adm2_sf$var_pc <- adm2_sf[[paste0(var, "_pc")]] adm2_sf$var_chng_bin <- adm2_sf[[paste0(var, "_chng_bin")]] - + if(var == "ntl_3day") subtitle <- "Change from 2 Weeks Before Earthquake to 3 Days After Earthquake" if(var == "ntl_2week") subtitle <- "Change from 2 Weeks Before Earthquake to 2 Weeks After Earthquake" if(var == "ntl_march") subtitle <- "Change from 2 Weeks Before Earthquake to March" - + #### % Change p <- ggplot() + geom_sf(data = adm0_sf, @@ -101,10 +101,10 @@ for(var in c("ntl_2week", "ntl_march", "ntl_3day")){ high = "forestgreen", midpoint = 0, limits = c(-50, 50)) - + ggsave(p, filename = file.path(fig_dir, paste0("ntl_adm2_map_",var,"_pc.png")), height = 6, width = 10) - + ## Change Bin p <- ggplot() + geom_sf(data = adm0_sf, @@ -122,10 +122,10 @@ for(var in c("ntl_2week", "ntl_march", "ntl_3day")){ title = "Change in Nighttime Lights", subtitle = subtitle) + scale_fill_manual(values = c("dodgerblue", "darkorange", "gray")) - + ggsave(p, filename = file.path(fig_dir, paste0("ntl_adm2_map_",var,"_chng_bin.png")), height = 6, width = 10) - + } @@ -161,4 +161,3 @@ p <- ntl_df %>% ggsave(p, filename = file.path(fig_dir, "ntl_adm2_inc_ex.png"), height = 2, width = 3.5) - diff --git a/notebooks/nighttime-lights/05_ntl_trends_daily.R b/notebooks/nighttime-lights/05_ntl_trends_daily.R index 7d740da..a1c5828 100644 --- a/notebooks/nighttime-lights/05_ntl_trends_daily.R +++ b/notebooks/nighttime-lights/05_ntl_trends_daily.R @@ -12,14 +12,14 @@ df <- ntl_df %>% df <- df %>% arrange(date) %>% group_by(adm2_en) %>% - mutate(ntl_bm_mean_ma7 = rollmean(ntl_bm_mean, + mutate(ntl_bm_mean_ma7 = rollmean(ntl_bm_mean, k = 7, align = "right", fill = NA)) %>% ungroup() # Individual ADMs -------------------------------------------------------------- mi_u <- df$mmi_feb06_7p8 %>% floor() %>% unique() %>% sort() for(mi in mi_u){ - + p <- df %>% dplyr::filter(date >= ymd("2022-11-01"), floor(mmi_feb06_7p8) == mi) %>% @@ -36,9 +36,9 @@ for(mi in mi_u){ strip.background = element_blank(), plot.title = element_text(face = "bold", hjust = 0.5)) + facet_wrap(~adm2_en, - scales = "free_y") - - ggsave(p, filename = file.path(fig_dir, + scales = "free_y") + + ggsave(p, filename = file.path(fig_dir, paste0("ntl_trends_daily_adm2_mi", mi, ".png")), height = 13, width = 13) } @@ -66,9 +66,9 @@ mi_df %>% strip.background = element_blank(), plot.title = element_text(face = "bold", hjust = 0.5)) + facet_wrap(~mi, - scales = "free_y") + scales = "free_y") -ggsave(filename = file.path(fig_dir, +ggsave(filename = file.path(fig_dir, paste0("ntl_trends_daily_adm2_mi", "all_column", ".png")), height = 4, width = 6) @@ -84,9 +84,8 @@ mi_df %>% strip.background = element_blank(), plot.title = element_text(face = "bold", hjust = 0.5)) + facet_wrap(~mi, - scales = "free_y") + scales = "free_y") -ggsave(filename = file.path(fig_dir, +ggsave(filename = file.path(fig_dir, paste0("ntl_trends_daily_adm2_mi", "all_line", ".png")), height = 4, width = 6) - diff --git a/notebooks/nighttime-lights/05_ntl_trends_monthly.R b/notebooks/nighttime-lights/05_ntl_trends_monthly.R index 43efdeb..f468324 100644 --- a/notebooks/nighttime-lights/05_ntl_trends_monthly.R +++ b/notebooks/nighttime-lights/05_ntl_trends_monthly.R @@ -9,7 +9,7 @@ df <- ntl_df %>% mi_u <- df$mmi_feb06_7p8 %>% floor() %>% unique() %>% sort() for(mi in mi_u){ - + p <- df %>% dplyr::filter(date >= ymd("2022-01-01"), floor(mmi_feb06_7p8) == mi) %>% @@ -21,9 +21,9 @@ for(mi in mi_u){ title = "Trends in Nighttime Lights Across ADM2", subtitle = paste0("MI: ", mi)) + facet_wrap(~adm2_en, - scales = "free_y") - - ggsave(p, filename = file.path(fig_dir, + scales = "free_y") + + ggsave(p, filename = file.path(fig_dir, paste0("ntl_trends_monthly_adm2_mi", mi, ".png")), height = 13, width = 13) -} \ No newline at end of file +} diff --git a/notebooks/nighttime-lights/_main.R b/notebooks/nighttime-lights/_main.R index 9433aca..6ac4acb 100644 --- a/notebooks/nighttime-lights/_main.R +++ b/notebooks/nighttime-lights/_main.R @@ -5,8 +5,8 @@ #### Root paths if(Sys.info()[["user"]] == "robmarty"){ git_dir <- "~/Documents/Github/turkiye-earthquake-impact" - - data_dir <- file.path("~", "Dropbox", "World Bank", "Side Work", + + data_dir <- file.path("~", "Dropbox", "World Bank", "Side Work", "Turkiye Earthquake Impact", "Data") } @@ -86,6 +86,3 @@ if(F){ source(file.path(ntl_gir_dir, "05_ntl_trends_monthly.R")) } - - - diff --git a/notebooks/surveys/README.md b/notebooks/surveys/README.md index 0e2cad6..1f2d961 100644 --- a/notebooks/surveys/README.md +++ b/notebooks/surveys/README.md @@ -2,4 +2,4 @@ Surveys are one of the most traditional and direct methods to extract and assess information from a group of people and to verify the ground truth, especially in regions impacted by a crisis. -## \ No newline at end of file +## diff --git a/notebooks/surveys/premise.ipynb b/notebooks/surveys/premise.ipynb index dad4712..9a2a8c9 100644 --- a/notebooks/surveys/premise.ipynb +++ b/notebooks/surveys/premise.ipynb @@ -27,9 +27,10 @@ "from shapely.geometry import Point\n", "import geopandas as gpd\n", "\n", + "\n", "def convert_to_gdf(df):\n", - " if 'geometry' in df.columns:\n", - " gdf = gpd.GeoDataFrame(df, crs=\"EPSG:4326\", geometry='geometry')\n", + " if \"geometry\" in df.columns:\n", + " gdf = gpd.GeoDataFrame(df, crs=\"EPSG:4326\", geometry=\"geometry\")\n", " return gdf\n", " else:\n", " geometry = [Point(xy) for xy in zip(df.longitude, df.latitude)]\n", @@ -54,11 +55,11 @@ "metadata": {}, "outputs": [], "source": [ - "community_needs = pd.read_csv('../../data/premise/Community_Needs.csv')\n", - "displacement = pd.read_csv('../../data/premise/Displacement.csv')\n", - "recent_earthquake = pd.read_csv('../../data/premise/Recent_Earthquake_in_Turkey.csv')\n", - "fuel_prices = pd.read_csv('../../data/premise/Premise Turkey Fuel Prices Update.csv')\n", - "household_needs = pd.read_csv('../../data/premise/Premise Household Needs TR.csv')" + "community_needs = pd.read_csv(\"../../data/premise/Community_Needs.csv\")\n", + "displacement = pd.read_csv(\"../../data/premise/Displacement.csv\")\n", + "recent_earthquake = pd.read_csv(\"../../data/premise/Recent_Earthquake_in_Turkey.csv\")\n", + "fuel_prices = pd.read_csv(\"../../data/premise/Premise Turkey Fuel Prices Update.csv\")\n", + "household_needs = pd.read_csv(\"../../data/premise/Premise Household Needs TR.csv\")" ] }, { @@ -72,9 +73,9 @@ "outputs": [], "source": [ "def correct_prices(x):\n", - " if x>2000:\n", - " return x/100\n", - " elif x<5:\n", + " if x > 2000:\n", + " return x / 100\n", + " elif x < 5:\n", " return x\n", " else:\n", " return x" @@ -107,11 +108,15 @@ } ], "source": [ - "fuel_prices[ ' what_is_the_price_of_a_liter_of_petrol'] = fuel_prices[ ' what_is_the_price_of_a_liter_of_petrol'].apply(lambda x: correct_prices(x))\n", - "fuel_prices[' what_is_the_price_of_a_liter_of_diesel']= fuel_prices[ ' what_is_the_price_of_a_liter_of_diesel'].apply(lambda x: correct_prices(x))\n", + "fuel_prices[\" what_is_the_price_of_a_liter_of_petrol\"] = fuel_prices[\n", + " \" what_is_the_price_of_a_liter_of_petrol\"\n", + "].apply(lambda x: correct_prices(x))\n", + "fuel_prices[\" what_is_the_price_of_a_liter_of_diesel\"] = fuel_prices[\n", + " \" what_is_the_price_of_a_liter_of_diesel\"\n", + "].apply(lambda x: correct_prices(x))\n", "\n", - "fuel_prices[' what_is_the_price_of_a_liter_of_petrol'].iloc[13]=21.43\n", - "fuel_prices[' what_is_the_price_of_a_liter_of_diesel'].iloc[10]=20.51" + "fuel_prices[\" what_is_the_price_of_a_liter_of_petrol\"].iloc[13] = 21.43\n", + "fuel_prices[\" what_is_the_price_of_a_liter_of_diesel\"].iloc[10] = 20.51" ] }, { @@ -146,7 +151,7 @@ ], "source": [ "## Verifying the data\n", - "fuel_prices[[' what_is_the_price_of_a_liter_of_petrol']].boxplot()\n" + "fuel_prices[[\" what_is_the_price_of_a_liter_of_petrol\"]].boxplot()" ] }, { @@ -159,7 +164,9 @@ }, "outputs": [], "source": [ - "turkey_adm2 = gpd.read_file('../../data/turkey_administrativelevels0_1_2/tur_polbna_adm2.shp')" + "turkey_adm2 = gpd.read_file(\n", + " \"../../data/turkey_administrativelevels0_1_2/tur_polbna_adm2.shp\"\n", + ")" ] }, { @@ -175,7 +182,7 @@ "df = fuel_prices\n", "geometry = [Point(xy) for xy in zip(df.observation_lon, df.observation_lat)]\n", "gdf = gpd.GeoDataFrame(df, crs=\"EPSG:4326\", geometry=geometry)\n", - "#gdf = gdf.sjoin(turkey_adm2)" + "# gdf = gdf.sjoin(turkey_adm2)" ] }, { @@ -215,11 +222,11 @@ } ], "source": [ - "fig, ax = plt.subplots(figsize=(10,6), sharex=True)\n", + "fig, ax = plt.subplots(figsize=(10, 6), sharex=True)\n", "\n", - "turkey_adm2.boundary.plot(ax=ax, edgecolor = '#D3D3D3', linewidth=0.5)\n", - "gdf.plot(column=' what_is_the_price_of_a_liter_of_diesel', ax=ax, legend = True)\n", - "#gdf.sjoin(turkey_adm2).plot(column=' what_is_the_price_of_a_liter_of_petrol', ax=ax, legend = True)" + "turkey_adm2.boundary.plot(ax=ax, edgecolor=\"#D3D3D3\", linewidth=0.5)\n", + "gdf.plot(column=\" what_is_the_price_of_a_liter_of_diesel\", ax=ax, legend=True)\n", + "# gdf.sjoin(turkey_adm2).plot(column=' what_is_the_price_of_a_liter_of_petrol', ax=ax, legend = True)" ] }, { @@ -232,7 +239,13 @@ }, "outputs": [], "source": [ - "gdf.rename(columns = {' what_is_the_price_of_a_liter_of_petrol':'Price of Petrol',' what_is_the_price_of_a_liter_of_diesel':'Price of Diesel' }, inplace=True)" + "gdf.rename(\n", + " columns={\n", + " \" what_is_the_price_of_a_liter_of_petrol\": \"Price of Petrol\",\n", + " \" what_is_the_price_of_a_liter_of_diesel\": \"Price of Diesel\",\n", + " },\n", + " inplace=True,\n", + ")" ] }, { @@ -245,8 +258,13 @@ }, "outputs": [], "source": [ - "fuel = gdf[['submission_id', 'Price of Petrol', 'Price of Diesel']].set_index('submission_id').stack().reset_index()\n", - "fuel.rename(columns = {'level_1':'Type', 0:'Price'}, inplace=True)" + "fuel = (\n", + " gdf[[\"submission_id\", \"Price of Petrol\", \"Price of Diesel\"]]\n", + " .set_index(\"submission_id\")\n", + " .stack()\n", + " .reset_index()\n", + ")\n", + "fuel.rename(columns={\"level_1\": \"Type\", 0: \"Price\"}, inplace=True)" ] }, { @@ -278,20 +296,20 @@ } ], "source": [ - "fig, ax = plt.subplots(figsize=(10,6))\n", - "turkey_adm2.boundary.plot(ax=ax, color = '#d3d3d3')\n", - "turkey_adm2.sjoin(gdf).plot(column='Price of Diesel', ax=ax, legend = True)\n", + "fig, ax = plt.subplots(figsize=(10, 6))\n", + "turkey_adm2.boundary.plot(ax=ax, color=\"#d3d3d3\")\n", + "turkey_adm2.sjoin(gdf).plot(column=\"Price of Diesel\", ax=ax, legend=True)\n", "\n", - "ax.spines['top'].set_visible(False)\n", - "ax.spines['bottom'].set_visible(False)\n", - "ax.spines['right'].set_visible(False)\n", - "ax.spines['left'].set_visible(False)\n", + "ax.spines[\"top\"].set_visible(False)\n", + "ax.spines[\"bottom\"].set_visible(False)\n", + "ax.spines[\"right\"].set_visible(False)\n", + "ax.spines[\"left\"].set_visible(False)\n", "\n", "ax.set_xticks([])\n", "ax.set_yticks([])\n", "\n", "\n", - "plt.grid(False)\n" + "plt.grid(False)" ] }, { @@ -324,16 +342,17 @@ ], "source": [ "import seaborn as sns\n", - "fig, ax = plt.subplots(figsize=(6,6))\n", - "#gdf[['Price of Diesel', 'Price of Petrol']].boxplot()\n", + "\n", + "fig, ax = plt.subplots(figsize=(6, 6))\n", + "# gdf[['Price of Diesel', 'Price of Petrol']].boxplot()\n", "\n", "sns.set(style=\"whitegrid\")\n", - "ax = sns.boxplot(x=\"Type\", y=\"Price\", data=fuel, showfliers = False)\n", + "ax = sns.boxplot(x=\"Type\", y=\"Price\", data=fuel, showfliers=False)\n", "ax = sns.swarmplot(x=\"Type\", y=\"Price\", data=fuel, color=\".25\")\n", - "ax.spines['top'].set_visible(False)\n", - "ax.spines['right'].set_visible(False)\n", + "ax.spines[\"top\"].set_visible(False)\n", + "ax.spines[\"right\"].set_visible(False)\n", "\n", - "plt.show()\n" + "plt.show()" ] }, { @@ -351,8 +370,15 @@ "outputs": [], "source": [ "## Picking the relevant columns\n", - "household_columns = [' is_the_primary_health_care_facility_closest_to_you_currently_functional',' is_the_closest_hospital_to_you_currently_functional','observation_lat', 'observation_lon',\n", - " ' is_the_university_reference_hospital_near_you_functional',' do_you_have_access_to_functioning_food_markets',' in_the_past_six_months_has_the_amount_of_food_in_your_household_decreased_due_to_a_shortage']" + "household_columns = [\n", + " \" is_the_primary_health_care_facility_closest_to_you_currently_functional\",\n", + " \" is_the_closest_hospital_to_you_currently_functional\",\n", + " \"observation_lat\",\n", + " \"observation_lon\",\n", + " \" is_the_university_reference_hospital_near_you_functional\",\n", + " \" do_you_have_access_to_functioning_food_markets\",\n", + " \" in_the_past_six_months_has_the_amount_of_food_in_your_household_decreased_due_to_a_shortage\",\n", + "]" ] }, { @@ -399,17 +425,21 @@ } ], "source": [ - "fig, ax = plt.subplots(figsize=(10,6))\n", + "fig, ax = plt.subplots(figsize=(10, 6))\n", "\n", - "turkey_adm2[turkey_adm2['adm2_en']=='SAHINBEY'].boundary.plot(ax=ax, edgecolor = '#D3D3D3', linewidth=0.5)\n", - "#gdf.sjoin(turkey_adm2[turkey_adm2['adm2_en']=='SAHINBEY']).plot(ax=ax, column=' is_the_primary_health_care_facility_closest_to_you_currently_functional', legend=True)\n", - "gdf.sjoin(turkey_adm2[turkey_adm2['adm2_en']=='SAHINBEY']).plot(ax=ax, column=' is_the_closest_hospital_to_you_currently_functional', legend=True)\n", + "turkey_adm2[turkey_adm2[\"adm2_en\"] == \"SAHINBEY\"].boundary.plot(\n", + " ax=ax, edgecolor=\"#D3D3D3\", linewidth=0.5\n", + ")\n", + "# gdf.sjoin(turkey_adm2[turkey_adm2['adm2_en']=='SAHINBEY']).plot(ax=ax, column=' is_the_primary_health_care_facility_closest_to_you_currently_functional', legend=True)\n", + "gdf.sjoin(turkey_adm2[turkey_adm2[\"adm2_en\"] == \"SAHINBEY\"]).plot(\n", + " ax=ax, column=\" is_the_closest_hospital_to_you_currently_functional\", legend=True\n", + ")\n", "\n", "\n", - "ax.spines['top'].set_visible(False)\n", - "ax.spines['bottom'].set_visible(False)\n", - "ax.spines['right'].set_visible(False)\n", - "ax.spines['left'].set_visible(False)\n", + "ax.spines[\"top\"].set_visible(False)\n", + "ax.spines[\"bottom\"].set_visible(False)\n", + "ax.spines[\"right\"].set_visible(False)\n", + "ax.spines[\"left\"].set_visible(False)\n", "\n", "ax.set_xticks([])\n", "ax.set_yticks([])\n", @@ -428,7 +458,22 @@ }, "outputs": [], "source": [ - "manadatory_columns = ['observation_id','latitude', 'longitude', 'user_id', 'hasc', 'age', 'gender', 'geography', 'religion', 'ethnicity', 'living_situation', 'financial_situation', 'employment', 'education']" + "manadatory_columns = [\n", + " \"observation_id\",\n", + " \"latitude\",\n", + " \"longitude\",\n", + " \"user_id\",\n", + " \"hasc\",\n", + " \"age\",\n", + " \"gender\",\n", + " \"geography\",\n", + " \"religion\",\n", + " \"ethnicity\",\n", + " \"living_situation\",\n", + " \"financial_situation\",\n", + " \"employment\",\n", + " \"education\",\n", + "]" ] }, { @@ -445,25 +490,36 @@ "metadata": {}, "outputs": [], "source": [ - "columns_of_interest_community = [ 'in_your_neighborhood_is_lack_of_income_money_or_resources_to_survive_a_severe_problem',\n", - " 'in_your_neighborhood_is_lack_of_access_to_safe_drinking_or_cooking_water_a_severe_problem',\n", - " 'in_your_neighborhood_is_lack_of_access_to_functional_hygienic_bathrooms_a_severe_problem',\n", - " 'in_your_neighborhood_is_lack_of_access_to_personal_hygiene_products_and_services_a_severe_problem',\n", - " 'in_your_neighborhood_is_there_a_severe_problem_related_to_nutrition', 'what_are_the_principal_concerns_related_to_nutrition_in_your_neighborhood',\n", - " 'have_there_been_significant_changes_in_the_total_quantity_of_food_available_to_people_in_your_neighborhood',\n", - " 'in_your_neighborhood_is_lack_of_access_to_medical_attention_a_severe_problem',\n", - " 'which_of_the_following_health_services_are_easily_accessible_in_your_local_area',\n", - " 'what_are_the_primary_issues_faced_by_children_in_your_neighborhood_in_relation_to_access_to_education']\n", - "\n", - "columns_displacement = ['are_you_originally_from_the_area_you_are_located_in_now',\n", - " 'how_likely_are_you_to_return_to_your_place_of_origin_in_the_next_1_month',\n", - " 'do_you_live_in_a_refugee_or_idp_camp?', 'how_long_have_you_been_displaced_for?', \n", - " 'to_your_knowledge,_does_your_community_have_a_positive_view_of_the_idps_and_refugees_residing_in_your_area?','in_the_past_week_has_there_been_an_increase_or_decease_in_the_number_of_refugee_or_idps_present_in_your_local_area']\n", - "\n", - "columns_recent_earthquake = ['did_you_observe_any_damage_to_your_home_during_the_recent_earthquake', 'did_you_notice_any_damage_to_key_infrastructure_or_buildings_in_your_community_because_of_the_recent_earthquake',\n", - " 'was_your_local_school_house_damaged_or_destroyed_in_the_earthquake',\n", - " 'did_you_observe_any_damage_to_roads_as_a_result_of_the_earthquake', 'are_there_currently_supplies_or_utilities_you_need_that_you_cannot_currently_access_as_a_result_of_the_earthquake',\n", - " 'what_supplies_or_utilities_do_you_need_that_you_cannot_currently_access_as_a_result_of_the_earthquake']" + "columns_of_interest_community = [\n", + " \"in_your_neighborhood_is_lack_of_income_money_or_resources_to_survive_a_severe_problem\",\n", + " \"in_your_neighborhood_is_lack_of_access_to_safe_drinking_or_cooking_water_a_severe_problem\",\n", + " \"in_your_neighborhood_is_lack_of_access_to_functional_hygienic_bathrooms_a_severe_problem\",\n", + " \"in_your_neighborhood_is_lack_of_access_to_personal_hygiene_products_and_services_a_severe_problem\",\n", + " \"in_your_neighborhood_is_there_a_severe_problem_related_to_nutrition\",\n", + " \"what_are_the_principal_concerns_related_to_nutrition_in_your_neighborhood\",\n", + " \"have_there_been_significant_changes_in_the_total_quantity_of_food_available_to_people_in_your_neighborhood\",\n", + " \"in_your_neighborhood_is_lack_of_access_to_medical_attention_a_severe_problem\",\n", + " \"which_of_the_following_health_services_are_easily_accessible_in_your_local_area\",\n", + " \"what_are_the_primary_issues_faced_by_children_in_your_neighborhood_in_relation_to_access_to_education\",\n", + "]\n", + "\n", + "columns_displacement = [\n", + " \"are_you_originally_from_the_area_you_are_located_in_now\",\n", + " \"how_likely_are_you_to_return_to_your_place_of_origin_in_the_next_1_month\",\n", + " \"do_you_live_in_a_refugee_or_idp_camp?\",\n", + " \"how_long_have_you_been_displaced_for?\",\n", + " \"to_your_knowledge,_does_your_community_have_a_positive_view_of_the_idps_and_refugees_residing_in_your_area?\",\n", + " \"in_the_past_week_has_there_been_an_increase_or_decease_in_the_number_of_refugee_or_idps_present_in_your_local_area\",\n", + "]\n", + "\n", + "columns_recent_earthquake = [\n", + " \"did_you_observe_any_damage_to_your_home_during_the_recent_earthquake\",\n", + " \"did_you_notice_any_damage_to_key_infrastructure_or_buildings_in_your_community_because_of_the_recent_earthquake\",\n", + " \"was_your_local_school_house_damaged_or_destroyed_in_the_earthquake\",\n", + " \"did_you_observe_any_damage_to_roads_as_a_result_of_the_earthquake\",\n", + " \"are_there_currently_supplies_or_utilities_you_need_that_you_cannot_currently_access_as_a_result_of_the_earthquake\",\n", + " \"what_supplies_or_utilities_do_you_need_that_you_cannot_currently_access_as_a_result_of_the_earthquake\",\n", + "]" ] }, { @@ -476,9 +532,9 @@ }, "outputs": [], "source": [ - "community_needs = community_needs[manadatory_columns+columns_of_interest_community]\n", - "displacement = displacement[manadatory_columns+columns_displacement]\n", - "recent_earthquake = recent_earthquake[manadatory_columns+columns_recent_earthquake]" + "community_needs = community_needs[manadatory_columns + columns_of_interest_community]\n", + "displacement = displacement[manadatory_columns + columns_displacement]\n", + "recent_earthquake = recent_earthquake[manadatory_columns + columns_recent_earthquake]" ] }, { @@ -491,7 +547,15 @@ }, "outputs": [], "source": [ - "community_needs['food_color']=community_needs['have_there_been_significant_changes_in_the_total_quantity_of_food_available_to_people_in_your_neighborhood'].replace({\"the_quantity_has_decreased\":'Decreased','the_quantity_has_stayed_the_same':'Remained the same','the_quantity_has_increased':'Increased' })" + "community_needs[\"food_color\"] = community_needs[\n", + " \"have_there_been_significant_changes_in_the_total_quantity_of_food_available_to_people_in_your_neighborhood\"\n", + "].replace(\n", + " {\n", + " \"the_quantity_has_decreased\": \"Decreased\",\n", + " \"the_quantity_has_stayed_the_same\": \"Remained the same\",\n", + " \"the_quantity_has_increased\": \"Increased\",\n", + " }\n", + ")" ] }, { @@ -504,9 +568,9 @@ }, "outputs": [], "source": [ - "recent_earthquake = turkey_adm2.sjoin(convert_to_gdf(recent_earthquake), how='left')\n", - "displacement = turkey_adm2.sjoin(convert_to_gdf(displacement), how='left')\n", - "community_needs = turkey_adm2.sjoin(convert_to_gdf(community_needs), how='left')" + "recent_earthquake = turkey_adm2.sjoin(convert_to_gdf(recent_earthquake), how=\"left\")\n", + "displacement = turkey_adm2.sjoin(convert_to_gdf(displacement), how=\"left\")\n", + "community_needs = turkey_adm2.sjoin(convert_to_gdf(community_needs), how=\"left\")" ] }, { @@ -519,7 +583,9 @@ }, "outputs": [], "source": [ - "displacement['idp_color'] = displacement['in_the_past_week_has_there_been_an_increase_or_decease_in_the_number_of_refugee_or_idps_present_in_your_local_area']#.replace({\"i_don't_know\":0,'no_change':100,'increase':125, 'large_increase':150, 'decrease':75, 'large_decrease':50 })" + "displacement[\"idp_color\"] = displacement[\n", + " \"in_the_past_week_has_there_been_an_increase_or_decease_in_the_number_of_refugee_or_idps_present_in_your_local_area\"\n", + "] # .replace({\"i_don't_know\":0,'no_change':100,'increase':125, 'large_increase':150, 'decrease':75, 'large_decrease':50 })" ] }, { @@ -532,7 +598,7 @@ }, "outputs": [], "source": [ - "df = recent_earthquake.groupby('adm2_en').count()[['observation_id']].reset_index()" + "df = recent_earthquake.groupby(\"adm2_en\").count()[[\"observation_id\"]].reset_index()" ] }, { @@ -545,8 +611,8 @@ }, "outputs": [], "source": [ - "df = community_needs.groupby('adm2_en').count()[['observation_id']].reset_index()\n", - "df = df[['adm2_en', 'observation_id']].merge(turkey_adm2)" + "df = community_needs.groupby(\"adm2_en\").count()[[\"observation_id\"]].reset_index()\n", + "df = df[[\"adm2_en\", \"observation_id\"]].merge(turkey_adm2)" ] }, { @@ -603,30 +669,37 @@ "source": [ "from mpl_toolkits.axes_grid1 import make_axes_locatable\n", "\n", - "fig, ax = plt.subplots(figsize =(12,10))\n", - "#gdf.plot(column = 'mean_of_me', ax=ax, alpha =0.4)\n", - "gdf.plot(column = 'observation_id', ax=ax, cmap = 'Oranges', alpha = 0.7)\n", - "ax.spines['top'].set_visible(False)\n", - "ax.spines['bottom'].set_visible(False)\n", - "ax.spines['right'].set_visible(False)\n", - "ax.spines['left'].set_visible(False)\n", + "fig, ax = plt.subplots(figsize=(12, 10))\n", + "# gdf.plot(column = 'mean_of_me', ax=ax, alpha =0.4)\n", + "gdf.plot(column=\"observation_id\", ax=ax, cmap=\"Oranges\", alpha=0.7)\n", + "ax.spines[\"top\"].set_visible(False)\n", + "ax.spines[\"bottom\"].set_visible(False)\n", + "ax.spines[\"right\"].set_visible(False)\n", + "ax.spines[\"left\"].set_visible(False)\n", "\n", "ax.set_xticks([])\n", "ax.set_yticks([])\n", "\n", - "sm = plt.cm.ScalarMappable(cmap='Oranges',norm=plt.Normalize(vmin=gdf['observation_id'].min(), vmax=gdf['observation_id'].max()))\n", + "sm = plt.cm.ScalarMappable(\n", + " cmap=\"Oranges\",\n", + " norm=plt.Normalize(\n", + " vmin=gdf[\"observation_id\"].min(), vmax=gdf[\"observation_id\"].max()\n", + " ),\n", + ")\n", "\n", "\n", "divider = make_axes_locatable(ax)\n", "dvider_kwargs = dict(position=\"left\", size=\"1.5%\", pad=0)\n", - "fig.colorbar(sm, cax=divider.append_axes(**dvider_kwargs),\n", - " #format = matplotlib.ticker.FuncFormatter(lambda x, pos: ''),\n", - " #ticks = matplotlib.ticker.FixedLocator([])\n", - " )\n", + "fig.colorbar(\n", + " sm,\n", + " cax=divider.append_axes(**dvider_kwargs),\n", + " # format = matplotlib.ticker.FuncFormatter(lambda x, pos: ''),\n", + " # ticks = matplotlib.ticker.FixedLocator([])\n", + ")\n", "\n", - "ax.set_title('Recent Earthquake Observation Counts')\n", + "ax.set_title(\"Recent Earthquake Observation Counts\")\n", "\n", - "#plt.savefig('../images/recent_earthquake_observations.png')" + "# plt.savefig('../images/recent_earthquake_observations.png')" ] }, { @@ -635,7 +708,7 @@ "metadata": {}, "outputs": [], "source": [ - "displacement['idp_color'] = displacement['idp_color'].str.replace('_',' ')" + "displacement[\"idp_color\"] = displacement[\"idp_color\"].str.replace(\"_\", \" \")" ] }, { @@ -667,22 +740,22 @@ } ], "source": [ - "fig, ax = plt.subplots(figsize=(10,6))\n", + "fig, ax = plt.subplots(figsize=(10, 6))\n", "\n", - "turkey_adm2.boundary.plot(ax=ax, edgecolor = '#D3D3D3', linewidth=0.5)\n", - "displacement.plot(column = 'idp_color', ax=ax, legend = True, cmap='Spectral')\n", + "turkey_adm2.boundary.plot(ax=ax, edgecolor=\"#D3D3D3\", linewidth=0.5)\n", + "displacement.plot(column=\"idp_color\", ax=ax, legend=True, cmap=\"Spectral\")\n", "\n", "ax.set_xticks([])\n", "ax.set_yticks([])\n", - "ax.spines['top'].set_visible(False)\n", - "ax.spines['bottom'].set_visible(False)\n", - "ax.spines['right'].set_visible(False)\n", - "ax.spines['left'].set_visible(False)\n", + "ax.spines[\"top\"].set_visible(False)\n", + "ax.spines[\"bottom\"].set_visible(False)\n", + "ax.spines[\"right\"].set_visible(False)\n", + "ax.spines[\"left\"].set_visible(False)\n", "\n", - "#ax.legend(loc='upper right')\n", - "ax.set_title('Change in Displacement of IDP/Refugees')\n", + "# ax.legend(loc='upper right')\n", + "ax.set_title(\"Change in Displacement of IDP/Refugees\")\n", "plt.show()\n", - "#plt.legend()" + "# plt.legend()" ] }, { @@ -703,9 +776,9 @@ }, "outputs": [], "source": [ - "food_availability = community_needs[['adm2_en', 'food_color', 'geometry']]\n", - "food_availability = food_availability[food_availability['food_color']!='i_don_t_know']\n", - "adm2 = food_availability[~(food_availability['food_color'].isna())]['adm2_en'].unique()" + "food_availability = community_needs[[\"adm2_en\", \"food_color\", \"geometry\"]]\n", + "food_availability = food_availability[food_availability[\"food_color\"] != \"i_don_t_know\"]\n", + "adm2 = food_availability[~(food_availability[\"food_color\"].isna())][\"adm2_en\"].unique()" ] }, { @@ -739,22 +812,22 @@ } ], "source": [ - "fig, ax = plt.subplots(figsize=(10,6))\n", + "fig, ax = plt.subplots(figsize=(10, 6))\n", "\n", - "turkey_adm2.boundary.plot(ax=ax, edgecolor = '#D3D3D3', linewidth=0.5)\n", - "food_availability.plot(column = 'food_color', ax=ax, legend = True, cmap='viridis')\n", + "turkey_adm2.boundary.plot(ax=ax, edgecolor=\"#D3D3D3\", linewidth=0.5)\n", + "food_availability.plot(column=\"food_color\", ax=ax, legend=True, cmap=\"viridis\")\n", "\n", "ax.set_xticks([])\n", "ax.set_yticks([])\n", - "ax.spines['top'].set_visible(False)\n", - "ax.spines['bottom'].set_visible(False)\n", - "ax.spines['right'].set_visible(False)\n", - "ax.spines['left'].set_visible(False)\n", + "ax.spines[\"top\"].set_visible(False)\n", + "ax.spines[\"bottom\"].set_visible(False)\n", + "ax.spines[\"right\"].set_visible(False)\n", + "ax.spines[\"left\"].set_visible(False)\n", "\n", - "ax.set_title('Change in Food Availability')\n", + "ax.set_title(\"Change in Food Availability\")\n", "\n", "# leg = ax.legend()\n", - "# leg.get_frame().set_alpha(0)\n" + "# leg.get_frame().set_alpha(0)" ] }, { @@ -794,19 +867,23 @@ } ], "source": [ - "fig, ax = plt.subplots(figsize=(10,6))\n", + "fig, ax = plt.subplots(figsize=(10, 6))\n", "\n", - "turkey_adm2.boundary.plot(ax=ax, edgecolor = '#D3D3D3', linewidth=0.5)\n", - "convert_to_gdf(recent_earthquake).plot(column = 'did_you_notice_any_damage_to_key_infrastructure_or_buildings_in_your_community_because_of_the_recent_earthquake', ax=ax, legend = True)\n", + "turkey_adm2.boundary.plot(ax=ax, edgecolor=\"#D3D3D3\", linewidth=0.5)\n", + "convert_to_gdf(recent_earthquake).plot(\n", + " column=\"did_you_notice_any_damage_to_key_infrastructure_or_buildings_in_your_community_because_of_the_recent_earthquake\",\n", + " ax=ax,\n", + " legend=True,\n", + ")\n", "\n", "ax.set_xticks([])\n", "ax.set_yticks([])\n", - "ax.spines['top'].set_visible(False)\n", - "ax.spines['bottom'].set_visible(False)\n", - "ax.spines['right'].set_visible(False)\n", - "ax.spines['left'].set_visible(False)\n", + "ax.spines[\"top\"].set_visible(False)\n", + "ax.spines[\"bottom\"].set_visible(False)\n", + "ax.spines[\"right\"].set_visible(False)\n", + "ax.spines[\"left\"].set_visible(False)\n", "\n", - "ax.set_title('Damage to key infrastructure or buildings')" + "ax.set_title(\"Damage to key infrastructure or buildings\")" ] } ],