From 0bff6b08aa120373125dba7f8cb584c69ef4873a Mon Sep 17 00:00:00 2001 From: Niu Zhixiao Date: Wed, 25 Oct 2023 20:03:27 +0800 Subject: [PATCH] update tutorial --- _toc.yml | 4 +- .../data/2023_FinalProject_Student_City.json | 1 + chapters/data-analytics/scipy-solution.ipynb | 213 ++++++++++++++++++ 3 files changed, 216 insertions(+), 2 deletions(-) create mode 100644 assets/data/2023_FinalProject_Student_City.json create mode 100644 chapters/data-analytics/scipy-solution.ipynb diff --git a/_toc.yml b/_toc.yml index 9430f5e..e32bc0e 100644 --- a/_toc.yml +++ b/_toc.yml @@ -40,8 +40,8 @@ parts: title: Tutorial - file: chapters/data-analytics/scipy-exercise title: Exercise - #- file: chapters/data-analytics/scipy-solution - #title: Solution + - file: chapters/data-analytics/scipy-solution + title: Solution - file: chapters/data-analytics/xarray title: Xarray sections: diff --git a/assets/data/2023_FinalProject_Student_City.json b/assets/data/2023_FinalProject_Student_City.json new file mode 100644 index 0000000..b756129 --- /dev/null +++ b/assets/data/2023_FinalProject_Student_City.json @@ -0,0 +1 @@ +{"A0252295B": "Palembang", "A0254731H": "Ipoh", "A0248871R": "Balikpapan", "A0252214U": "KotaKinabalu", "A0252451M": "Pontianak", "A0257104M": "Yangon", "A0251962B": "Medan", "A0251889L": "DaNang", "A0251816E": "Manila", "A0251709A": "Banjarmasin", "A0251887N": "Takeo", "A0251734H": "Singapore", "A0252567X": "PhnomPenh", "A0261515L": "Manado", "A0251942E": "CagayandeOro", "A0234145M": "Padang", "A0252142U": "KotaBharu", "A0252363J": "Yogyakarta", "A0254920H": "Makassar", "A0257695J": "Mansilingan", "A0255288R": "Bekasi", "A0251742J": "Zamboanga", "A0252314R": "HoChiMinhCity"} \ No newline at end of file diff --git a/chapters/data-analytics/scipy-solution.ipynb b/chapters/data-analytics/scipy-solution.ipynb new file mode 100644 index 0000000..f3350ad --- /dev/null +++ b/chapters/data-analytics/scipy-solution.ipynb @@ -0,0 +1,213 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "5202ee2c", + "metadata": {}, + "source": [ + "# SciPy Solution" + ] + }, + { + "cell_type": "markdown", + "id": "fe1b9fa4", + "metadata": {}, + "source": [ + "Choosing a probability distribution to fit daily precipitation depths is important for precipitation frequency analysis, stochastic precipitation modeling, and climate assessments. Read the file ../../assets/data/Changi_daily_rainfall.csv and complete the following tasks." + ] + }, + { + "cell_type": "markdown", + "id": "fe90719f", + "metadata": {}, + "source": [ + "## Task 1\n", + "Extract the wet-day series for 2020 from the raw daily rainfall dataset and calculate a few descriptive statistics: mean, variance, skewness, kurtosis, L-CV, and L-skewness. Is the distribution left-skewed or right-skewed? Is the distribution more or less peaked than the normal distribution? \n", + "\n", + "Note: The wet-day series should be constructed by excluding events whose magnitude is less than 0.25 mm/day (0.25 mm/day is the minimum precipitation that can be recorded by the in situ rain gauge). \n", + "\n", + "The L-CV is L-coefficient of variation and can be calculated as:\n", + "\n", + "$$\\text{L-CV}=\\tau_2=\\lambda_2/\\lambda_1,$$\n", + "\n", + "and the L-skewness is L-coefficient of skewness, which is calculated as:\n", + "\n", + "$$\\text{L-skewness}=\\tau_3=\\lambda_3/\\lambda_2,$$\n", + "\n", + "where $\\lambda_1$, $\\lambda_2$ and $\\lambda_3$ are the first three L-moments (details can be found in `SciPy tutorial`)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "3ae47cdd", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "mean: 11.42\n", + "variance 165.47\n", + "Skewness 1.72\n", + "Kurtosis: 3.04\n", + "L-CV: 0.56\n", + "L-skewness 0.39\n" + ] + } + ], + "source": [ + "# Your solution goes here.\n", + "import pandas as pd\n", + "import numpy as np\n", + "from scipy import stats\n", + "\n", + "df = pd.read_csv('../../assets/data/Changi_daily_rainfall.csv', index_col=0, parse_dates=True)\n", + "df_sb = df.loc['2020']\n", + "wet_series = df_sb[df_sb>0.25].dropna().values.flatten()\n", + "\n", + "\n", + "def samlmom3(sample):\n", + " \"\"\"\n", + " samlmom3 returns the first three L-moments of samples\n", + " sample is the 1-d array\n", + " n is the total number of the samples, j is the j_th sample\n", + " \"\"\"\n", + " n = len(sample)\n", + " sample = np.sort(sample.reshape(n))[::-1]\n", + " b0 = np.mean(sample)\n", + " b1 = np.array([(n - j - 1) * sample[j] / n / (n - 1)\n", + " for j in range(n)]).sum()\n", + " b2 = np.array([(n - j - 1) * (n - j - 2) * sample[j] / n / (n - 1) / (n - 2)\n", + " for j in range(n - 1)]).sum()\n", + " lmom1 = b0\n", + " lmom2 = 2 * b1 - b0\n", + " lmom3 = 6 * (b2 - b1) + b0\n", + "\n", + " return lmom1, lmom2, lmom3\n", + "\n", + "\n", + "lmon1, lmon2, lmon3 = samlmom3(wet_series)\n", + "L_CV = lmon2 / lmon1\n", + "L_skew = lmon3/lmon2\n", + "\n", + "print('mean: %.2f' % wet_series.mean())\n", + "print('variance %.2f' % wet_series.var())\n", + "print('Skewness %.2f' % stats.skew(wet_series, axis=0))\n", + "print('Kurtosis: %.2f' % stats.kurtosis(wet_series, axis=0, bias=False))\n", + "print('L-CV: %.2f' % L_CV)\n", + "print('L-skewness %.2f' % L_skew)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "c48637f2", + "metadata": {}, + "outputs": [], + "source": [ + "# the distribution is right-skewed and more peaked than the normal distribution." + ] + }, + { + "cell_type": "markdown", + "id": "69ba12e4", + "metadata": {}, + "source": [ + "## Task 2\n", + "Early studies identified the gamma (G2) distribution as a suitable distribution for wet-day precipitation based on the traditional goodness-of-fit tests. Does wet-day series of Changi follow a gamma distribution?\n", + "Check the `scipy.stats` documentation and do the following: \n", + "* fit a gamma distribution to the Changi wet-day series;\n", + "* print out the estimated parameters;\n", + "* print out the goodness-of-fit test results using KS test." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "2b40dc9f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.6597146798552613 0.39999999999999997 15.511976229013442\n", + "KstestResult(statistic=0.05634367405742169, pvalue=0.6503157533710182)\n" + ] + } + ], + "source": [ + "# Your solution goes here.\n", + "from scipy.stats import gamma, kstest\n", + "\n", + "a, loc, scale = gamma.fit(wet_series)\n", + "\n", + "print(a, loc, scale)\n", + "gamma_dist = gamma(a, loc=loc, scale=scale)\n", + "print(stats.kstest(wet_series, gamma_dist.cdf))" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "61fb4468", + "metadata": {}, + "outputs": [], + "source": [ + "# The p-value of ks-stastistic is larger than 0.05. \n", + "# The wet-day series for 2020 of Changi follows a gamma distribution." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "varInspector": { + "cols": { + "lenName": 16, + "lenType": 16, + "lenVar": 40 + }, + "kernels_config": { + "python": { + "delete_cmd_postfix": "", + "delete_cmd_prefix": "del ", + "library": "var_list.py", + "varRefreshCmd": "print(var_dic_list())" + }, + "r": { + "delete_cmd_postfix": ") ", + "delete_cmd_prefix": "rm(", + "library": "var_list.r", + "varRefreshCmd": "cat(var_dic_list()) " + } + }, + "types_to_exclude": [ + "module", + "function", + "builtin_function_or_method", + "instance", + "_Feature" + ], + "window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}