diff --git a/air-quality-backend/system_tests/utils/cams_utilities.py b/air-quality-backend/system_tests/utils/cams_utilities.py index cfc48d83..49b34260 100644 --- a/air-quality-backend/system_tests/utils/cams_utilities.py +++ b/air-quality-backend/system_tests/utils/cams_utilities.py @@ -1,3 +1,4 @@ +import pprint from datetime import datetime from pandas import read_csv @@ -127,6 +128,17 @@ def get_forecast_percentage_divergence( pollutant, "database_forecast", database_record_for_city_and_valid_time ) + database_pollutant_value_3dp = round(database_pollutant_value, 3) + + pprint.pprint( + "City: {}, Pollutant: {}, ECMWF value: {}, database value: {}".format( + test_city, + pollutant, + ecmwf_forecast_pollutant_value, + database_pollutant_value_3dp, + ) + ) + return calculate_database_divergence_from_ecmwf_forecast_values( - database_pollutant_value, ecmwf_forecast_pollutant_value + database_pollutant_value_3dp, ecmwf_forecast_pollutant_value ) diff --git a/notebooks/trends/trends-notebook.ipynb b/notebooks/trends/trends-notebook.ipynb new file mode 100644 index 00000000..1ff5d346 --- /dev/null +++ b/notebooks/trends/trends-notebook.ipynb @@ -0,0 +1,518 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Pollutant data trends\n", + "\n", + "#### Notebook Purpose\n", + "\n", + "This will allow you to get multiple months’ worth of data using the vAirify API and display it on a series of 5 graphs for each of the pollutants, NO2, O3, PM2.5, PM10 and SO2.\n", + "\n", + "**Please make sure you run all code cells in order they appear. Further explanations of what a code cell is and what each one does is provided throughout this notebook.**\n", + "\n", + "**Further important pieces of information will also be highlighted in bold.**" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Brief explanation of Jupyter Notebook:\n", + "\n", + "Just in case you are new to Jupyter notebooks, here is a very quick explanation of the basic structure of a notebook.\n", + "\n", + "#### Code Cells:\n", + "\n", + "The notebook is made up of cells. A common cell type is a code cell, where you write and execute code. \n", + "\n", + "When you run the cell, the output (such as text, numbers, or plots) is displayed directly below the cell.\n", + "\n", + "#### Markdown Cells:\n", + "\n", + "Markdown cells allow you to write formatted text using Markdown syntax. You can include headers, bullet points, links, and even equations (using LaTeX). These are useful for adding explanations, notes, or documentation.\n", + "\n", + "Example: You might see sections labelled \"Install Packages\", \"Global Variables\", or \"Get Cities\" in this notebook to organize information and code.\n", + "\n", + "#### Interactive Output:\n", + "\n", + "In addition to displaying text, code cells can show rich output such as plots (using libraries like matplotlib), tables, or even interactive widgets. This allows you to explore data visually within the same environment.\n", + "\n", + "Example: A notebook could display a line chart just like this notebook!" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Install packages\n", + "\n", + "Before you run any other code cells ensure all the Python modules we need are installed. \n", + "\n", + "Do this by running the code cell below. Do so by clicking the play symbol next to the code cell on the left had side in the margin. \n", + "\n", + "Depending on what you are using to view this notebook you may have to hover over the left side of the code cell in the margin to get it to appear. This is the case in visual studio. \n", + "\n", + "If you are running this in Google Collab, you will see a round button with a play simple in it on the left hand side of the code cell, it may only appear when you hover over the code cell. \n", + "\n", + "**USEFUL TIP: If you don't want to change any settings and just want to see some graphs! On Google Collab, at the top of the screen, underneath the notebook's file name, click the “Runtime” tab on the menu bar. Then click “Run all” this will run all code cells in order.**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install ipywidgets\n", + "%pip install requests\n", + "%pip install matplotlib\n", + "%pip install PyGithub" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# vAirfy API URL\n", + "\n", + "To connect to the vAirfy API we need to tell our notebook where it needs to send it's requests to. We can do this by setting the `AIR_QUALITY_API_URL` variable below.\n", + "\n", + "**By default** it is set to use the production API. However should you have a local database and API set up and you wish to use that, you are able to set that below. \n", + "\n", + "**IF YOU ARE HAPPY WITH THE URL BELOW RUN THE CODE CELL BEFORE MOVING ON TO THE NEXT STEP**" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [], + "source": [ + "AIR_QUALITY_API_URL = \"http://64.225.143.231/api\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Get Cities\n", + "\n", + "Run code below to get the cities from the CAMS_locations_V1 file in the GitHub repository. \n", + "\n", + "Once you have run the code cell a selection box should appear. Simply click on the city you want to use. The default is Lima." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from ipywidgets import interact\n", + "import ipywidgets as widgets\n", + "import csv\n", + "from github import Github\n", + "cities = []\n", + "city: str\n", + "\n", + "def update_chosen_city(city_input: str):\n", + " global city\n", + " city = city_input\n", + "\n", + "repo = Github().get_repo('ECMWFCode4Earth/vAirify')\n", + "locations = repo.get_contents('/deployment/database/CAMS_locations_V1.csv')\n", + "decoded_locations_file = locations.decoded_content.decode()\n", + "csv_reader = csv.reader(decoded_locations_file.split('\\n'), delimiter=',')\n", + "next(csv_reader)\n", + "for row in csv_reader:\n", + " if len(row) > 0:\n", + " cities.append(row[1])\n", + "cities.sort()\n", + "cities_widget = widgets.Dropdown(\n", + " options=cities,\n", + " value=\"Lima\",\n", + " description='Cities',\n", + ")\n", + "interact(update_chosen_city, city_input=cities_widget)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Define Time Range\n", + "\n", + "Run the code cell below and adjust how many months’ worth of data you want to retrieve. The default is one month's worth of data.\n", + "\n", + "If you wish, you can increase the max number of months change the variable `max_number_of_months` below at the top of the code cell. Otherwise please leave the variable as the default of 3 months.\n", + "\n", + "**WARNING: Making this greater than 3 months risks issues when creating the graphs, such as overlapping tick labels etc..**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "max_number_of_months = 3\n", + "\n", + "months_to_search = 1\n", + "def update_time_period(months_input: int):\n", + " global months_to_search\n", + " months_to_search = months_input\n", + "\n", + "time_period_widget = widgets.Dropdown(\n", + " options= np.arange(1, max_number_of_months + 1),\n", + " value=1,\n", + " description='Months',\n", + ")\n", + "interact(update_time_period, months_input=time_period_widget)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Make API Request\n", + "\n", + "Run the below code cell to get the data from the API. The top code cell will get the in_situ data and the one below that will get the forecast data.\n", + "\n", + "**USEFUL TIP: The variables of code cells save and do not need to be rerun unless you change the `max_number_of_months` in the above code cell or you select a different city for example. This goes for all of the code cells. You won't need to re-run them unless you change something they depend on.**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "import os\n", + "from datetime import datetime, timedelta\n", + "from dateutil.relativedelta import relativedelta\n", + "\n", + "in_situ_endpoint = AIR_QUALITY_API_URL + \"/air-pollutant/measurements\"\n", + "\n", + "start_date = datetime.now() - relativedelta(months=months_to_search)\n", + "end_date = datetime.now()\n", + "\n", + "in_situ_params = {\n", + " \"date_from\": start_date,\n", + " \"date_to\": end_date,\n", + " \"location_type\": \"city\",\n", + " \"location_names\": [city],\n", + " \"api_source\": \"OpenAQ\",\n", + "}\n", + "\n", + "in_situ_response = requests.get(in_situ_endpoint, params=in_situ_params)\n", + "\n", + "print(in_situ_response.status_code)\n", + "\n", + "in_situ_api_request_result = in_situ_response.json()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "forecast_endpoint = AIR_QUALITY_API_URL + \"/air-pollutant/forecast\"\n", + "\n", + "def getLatestBaseForecastTime(date: datetime): \n", + " hour = date.hour\n", + " modelHour = 12\n", + " if hour >= 10 and hour < 22:\n", + " modelHour = 0\n", + " \n", + " modelDate: datetime = datetime(\n", + " date.year,\n", + " date.month,\n", + " date.day,\n", + " modelHour,\n", + " 0,\n", + " 0,\n", + " )\n", + "\n", + " if hour >= 0 and hour < 10:\n", + " modelDate = modelDate - timedelta(days= 1 )\n", + " return modelDate\n", + "forecast_api_request_result = []\n", + "\n", + "current_time = start_date\n", + "\n", + "while(current_time < end_date):\n", + " forecast_base_time = getLatestBaseForecastTime(current_time)\n", + " \n", + " forecast_params = {\n", + " \"valid_time_from\": current_time,\n", + " \"valid_time_to\": current_time + timedelta(days= 1),\n", + " \"base_time\": forecast_base_time,\n", + " \"location_type\": \"city\",\n", + " \"location_name\": city,\n", + " }\n", + " forecast_response = requests.get(forecast_endpoint, params=forecast_params)\n", + " forecast_api_request_result.extend(forecast_response.json())\n", + " current_time = current_time + timedelta(days= 1)\n", + "\n", + "print(forecast_response.status_code)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Prepare Data to display\n", + "\n", + "The below code will prepare data to display on the graph. It groups the data by pollutant and within that by measuring station. The top code cell will process the in situ data and the one below that will process the forecast data." + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [], + "source": [ + "in_situ_processed_data = {\"no2\":{},\n", + " \"o3\":{},\n", + " \"pm2_5\":{},\n", + " \"pm10\":{},\n", + " \"so2\":{}}\n", + "\n", + "def generate_data_scafold():\n", + " return {\n", + " \"values\":[],\n", + " \"times\":[]\n", + " }\n", + "\n", + "def update_processed_data(measurement, pollutant, site_name, measurement_date):\n", + " if pollutant in measurement:\n", + " if site_name not in in_situ_processed_data[pollutant] : in_situ_processed_data[pollutant][site_name] = generate_data_scafold()\n", + " in_situ_processed_data[pollutant][site_name][\"values\"].append(measurement[pollutant])\n", + " in_situ_processed_data[pollutant][site_name][\"times\"].append(measurement_date)\n", + "\n", + "for measurement in in_situ_api_request_result:\n", + " site_name = measurement[\"site_name\"]\n", + " measurement_date = datetime.strptime(measurement[\"measurement_date\"], '%Y-%m-%dT%H:%M:%SZ')\n", + " if \"no2\" in measurement: update_processed_data(measurement, \"no2\", site_name, measurement_date)\n", + " if \"o3\" in measurement: update_processed_data(measurement, \"o3\", site_name, measurement_date)\n", + " if \"pm2_5\" in measurement: update_processed_data(measurement, \"pm2_5\", site_name, measurement_date)\n", + " if \"pm10\" in measurement: update_processed_data(measurement, \"pm10\", site_name, measurement_date)\n", + " if \"so2\" in measurement: update_processed_data(measurement, \"so2\", site_name, measurement_date)" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [], + "source": [ + "forecast_processed_data = {\"no2\":{\n", + " \"values\":[],\n", + " \"times\":[]\n", + " },\n", + " \"o3\":{\n", + " \"values\":[],\n", + " \"times\":[]\n", + " },\n", + " \"pm2_5\":{\n", + " \"values\":[],\n", + " \"times\":[]\n", + " },\n", + " \"pm10\":{\n", + " \"values\":[],\n", + " \"times\":[]\n", + " },\n", + " \"so2\":{\n", + " \"values\":[],\n", + " \"times\":[]\n", + " }} \n", + "\n", + "for forecast in forecast_api_request_result:\n", + " forecast_date = datetime.strptime(forecast[\"valid_time\"], '%Y-%m-%dT%H:%M:%SZ')\n", + " if \"no2\" in forecast: \n", + " forecast_processed_data[\"no2\"][\"values\"].append(forecast[\"no2\"][\"value\"])\n", + " forecast_processed_data[\"no2\"][\"times\"].append(forecast_date)\n", + " if \"o3\" in forecast: \n", + " forecast_processed_data[\"o3\"][\"values\"].append(forecast[\"o3\"][\"value\"])\n", + " forecast_processed_data[\"o3\"][\"times\"].append(forecast_date)\n", + " if \"pm2_5\" in forecast: \n", + " forecast_processed_data[\"pm2_5\"][\"values\"].append(forecast[\"pm2_5\"][\"value\"])\n", + " forecast_processed_data[\"pm2_5\"][\"times\"].append(forecast_date)\n", + " if \"pm10\" in forecast: \n", + " forecast_processed_data[\"pm10\"][\"values\"].append(forecast[\"pm10\"][\"value\"])\n", + " forecast_processed_data[\"pm10\"][\"times\"].append(forecast_date)\n", + " if \"so2\" in forecast: \n", + " forecast_processed_data[\"so2\"][\"values\"].append(forecast[\"so2\"][\"value\"])\n", + " forecast_processed_data[\"so2\"][\"times\"].append(forecast_date)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Select Measuring stations to display\n", + "\n", + "The list of stations is derived from the data obtained from the API call. \n", + "\n", + "**ATTENTION: Some of the stations may not have data for a given pollutant. Therefore if you deselect all stations that have data for a given pollutant that pollutants graph will no longer render.**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "stations_to_display = []\n", + "\n", + "stations_set = set()\n", + "\n", + "def update_stations_to_display(stations_input):\n", + " global stations_to_display\n", + " stations_to_display = stations_input\n", + "\n", + "for pollutant in in_situ_processed_data:\n", + " for measuring_station, station_data in in_situ_processed_data[pollutant].items():\n", + " stations_set.add(measuring_station)\n", + "\n", + "stations = list(stations_set)\n", + "\n", + "stations_widget = widgets.SelectMultiple(\n", + " options=stations,\n", + " value=stations,\n", + " description='Stations:',\n", + ")\n", + "\n", + "interact(update_stations_to_display, stations_input=stations_widget)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Generating the Graphs\n", + "\n", + "The below code cell will generate five graphs. One for each pollutant. Each line represents a measuring station. \n", + "\n", + "You are also able to change the transparency of the AQI background of the graphs by adjusting the `aqi_block_color_alpha` below. 0.3 is the default.\n", + "Furthermore, by default if there are more than 20 stations the legend is hidden, this can be changed by modifying the variable `hide_legend_if_exceed_stations`.\n", + "\n", + "**WARNING: Below, changing the line `ax.xaxis.set_major_locator(mdates.DayLocator())` to `ax.xaxis.set_major_locator(mdates.HourLocator())` will make the tick labels display every hour instead of every day, this can cause issues though so this is not recommended**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import matplotlib.dates as mdates\n", + "import numpy as np\n", + "\n", + "aqi_block_color_alpha = 0.3\n", + "hide_legend_if_exceed_stations = 20\n", + "\n", + "aqi_ranges = {\n", + " \"o3\": [50, 100, 130, 240, 380, 800],\n", + " \"no2\": [40, 90, 120, 230, 340, 1000],\n", + " \"so2\": [100, 200, 350, 500, 750, 1250],\n", + " \"pm10\": [20, 40, 50, 100, 150, 1200],\n", + " \"pm2_5\": [10, 20, 25, 50, 75, 800]\n", + "}\n", + "\n", + "formatted_polutants = {\"no2\": \"Nitrogen Dioxide\", \"o3\":\"Ozone\", \"pm2_5\":\"PM2.5\", \"pm10\":\"PM10\", \"so2\": \"Sulphur Dioxide\"}\n", + "\n", + "def calculate_graph_width(processed_data, pollutant, base_width=10, max_width=30):\n", + " max_data_points = max(\n", + " len(station_data[\"times\"]) \n", + " for station_data in processed_data[pollutant].values()\n", + " )\n", + " return min(max(base_width, max_data_points / 20), max_width)\n", + "\n", + "def has_common_element(list1, list2):\n", + " return bool(set(list1) & set(list2))\n", + "\n", + "def create_colour_bar(from_value, to_value, max_value, colour):\n", + " if max_value < to_value:\n", + " ax.axhspan(from_value, max_value, color=colour, alpha=aqi_block_color_alpha, lw=0)\n", + " return True\n", + " else:\n", + " ax.axhspan(from_value, to_value, color=colour, alpha=aqi_block_color_alpha, lw=0)\n", + " return False\n", + "\n", + "def display_colour_bars(max_value):\n", + " has_hit_max_value = False\n", + " if not has_hit_max_value : has_hit_max_value = create_colour_bar(0, aqi_ranges[pollutant][0],max_value, '#50f0e5')\n", + " if not has_hit_max_value : has_hit_max_value = create_colour_bar(aqi_ranges[pollutant][0], aqi_ranges[pollutant][1], max_value, '#50ccaa')\n", + " if not has_hit_max_value : has_hit_max_value = create_colour_bar(aqi_ranges[pollutant][1], aqi_ranges[pollutant][2], max_value, '#f0e641')\n", + " if not has_hit_max_value : has_hit_max_value = create_colour_bar(aqi_ranges[pollutant][2], aqi_ranges[pollutant][3], max_value, '#ff505080')\n", + " if not has_hit_max_value : has_hit_max_value = create_colour_bar(aqi_ranges[pollutant][3], aqi_ranges[pollutant][4], max_value, '#960032')\n", + " if not has_hit_max_value : has_hit_max_value = create_colour_bar(aqi_ranges[pollutant][4], max_value, max_value, '#7d2181')\n", + "\n", + "for pollutant in in_situ_processed_data:\n", + " if not has_common_element(list(in_situ_processed_data[pollutant].keys()), list(stations_to_display)):\n", + " continue\n", + "\n", + " graph_width = calculate_graph_width(in_situ_processed_data, pollutant)\n", + "\n", + " fig, ax = plt.subplots(figsize=(graph_width, 10))\n", + " \n", + " all_times = []\n", + " max_value = 0\n", + " if len(forecast_processed_data[pollutant][\"values\"]) > 0:\n", + " max_value = max(max_value, max(forecast_processed_data[pollutant][\"values\"]))\n", + " ax.plot(forecast_processed_data[pollutant][\"times\"], forecast_processed_data[pollutant][\"values\"], color='#000000', linewidth=5, linestyle='dashed', zorder=100000000)\n", + " \n", + " station_count = 0 \n", + " \n", + " for measuring_station, station_data in in_situ_processed_data[pollutant].items():\n", + " if measuring_station in list(stations_to_display):\n", + " station_count = station_count + 1\n", + " times = station_data[\"times\"]\n", + " values = station_data[\"values\"]\n", + " max_value = max(max_value, max(values))\n", + " ax.plot(times, values, label=str(measuring_station))\n", + " all_times.extend(times)\n", + "\n", + " ax.set_xlim(min(all_times), max(all_times))\n", + " ax.xaxis.set_major_locator(mdates.DayLocator())\n", + " ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M:%S'))\n", + " display_colour_bars(max_value)\n", + " ax.set_ylabel('Concentration (µg/m³)')\n", + " plt.title(f\"{city} - {formatted_polutants[pollutant]} \\n Date Range: {start_date.strftime('%d/%m/%Y')} - {end_date.strftime('%d/%m/%Y')}\")\n", + " plt.setp(ax.get_xticklabels(), rotation=45, ha=\"right\")\n", + " if station_count <= hide_legend_if_exceed_stations: plt.legend(bbox_to_anchor=(1, 1))\n", + " \n", + " plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}