From 0bff6b08aa120373125dba7f8cb584c69ef4873a Mon Sep 17 00:00:00 2001
From: Niu Zhixiao <niuzhixiao@hotmail.com>
Date: Wed, 25 Oct 2023 20:03:27 +0800
Subject: [PATCH] update tutorial

---
 _toc.yml                                      |   4 +-
 .../data/2023_FinalProject_Student_City.json  |   1 +
 chapters/data-analytics/scipy-solution.ipynb  | 213 ++++++++++++++++++
 3 files changed, 216 insertions(+), 2 deletions(-)
 create mode 100644 assets/data/2023_FinalProject_Student_City.json
 create mode 100644 chapters/data-analytics/scipy-solution.ipynb

diff --git a/_toc.yml b/_toc.yml
index 9430f5e..e32bc0e 100644
--- a/_toc.yml
+++ b/_toc.yml
@@ -40,8 +40,8 @@ parts:
       title: Tutorial
     - file: chapters/data-analytics/scipy-exercise
       title: Exercise
-    #- file: chapters/data-analytics/scipy-solution
-      #title: Solution
+    - file: chapters/data-analytics/scipy-solution
+      title: Solution
   - file: chapters/data-analytics/xarray
     title: Xarray
     sections:
diff --git a/assets/data/2023_FinalProject_Student_City.json b/assets/data/2023_FinalProject_Student_City.json
new file mode 100644
index 0000000..b756129
--- /dev/null
+++ b/assets/data/2023_FinalProject_Student_City.json
@@ -0,0 +1 @@
+{"A0252295B": "Palembang", "A0254731H": "Ipoh", "A0248871R": "Balikpapan", "A0252214U": "KotaKinabalu", "A0252451M": "Pontianak", "A0257104M": "Yangon", "A0251962B": "Medan", "A0251889L": "DaNang", "A0251816E": "Manila", "A0251709A": "Banjarmasin", "A0251887N": "Takeo", "A0251734H": "Singapore", "A0252567X": "PhnomPenh", "A0261515L": "Manado", "A0251942E": "CagayandeOro", "A0234145M": "Padang", "A0252142U": "KotaBharu", "A0252363J": "Yogyakarta", "A0254920H": "Makassar", "A0257695J": "Mansilingan", "A0255288R": "Bekasi", "A0251742J": "Zamboanga", "A0252314R": "HoChiMinhCity"}
\ No newline at end of file
diff --git a/chapters/data-analytics/scipy-solution.ipynb b/chapters/data-analytics/scipy-solution.ipynb
new file mode 100644
index 0000000..f3350ad
--- /dev/null
+++ b/chapters/data-analytics/scipy-solution.ipynb
@@ -0,0 +1,213 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "5202ee2c",
+   "metadata": {},
+   "source": [
+    "# SciPy Solution"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fe1b9fa4",
+   "metadata": {},
+   "source": [
+    "Choosing a probability distribution to fit daily precipitation depths is important for precipitation frequency analysis, stochastic precipitation modeling, and climate assessments.  Read the file ../../assets/data/Changi_daily_rainfall.csv and complete the following tasks."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fe90719f",
+   "metadata": {},
+   "source": [
+    "## Task 1\n",
+    "Extract the wet-day series for 2020 from the raw daily rainfall dataset and calculate a few descriptive statistics: mean, variance, skewness, kurtosis, L-CV, and L-skewness. Is the distribution left-skewed or right-skewed? Is the distribution more or less peaked than the normal distribution? \n",
+    "\n",
+    "Note: The wet-day series should be constructed by excluding events whose magnitude is less than 0.25 mm/day (0.25 mm/day is the minimum precipitation that can be recorded by the in situ rain gauge). \n",
+    "\n",
+    "The L-CV is L-coefficient of variation and can be calculated as:\n",
+    "\n",
+    "$$\\text{L-CV}=\\tau_2=\\lambda_2/\\lambda_1,$$\n",
+    "\n",
+    "and the L-skewness is L-coefficient of skewness, which is calculated as:\n",
+    "\n",
+    "$$\\text{L-skewness}=\\tau_3=\\lambda_3/\\lambda_2,$$\n",
+    "\n",
+    "where $\\lambda_1$, $\\lambda_2$ and $\\lambda_3$ are the first three L-moments (details can be found in `SciPy tutorial`)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "3ae47cdd",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "mean: 11.42\n",
+      "variance 165.47\n",
+      "Skewness 1.72\n",
+      "Kurtosis: 3.04\n",
+      "L-CV: 0.56\n",
+      "L-skewness 0.39\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Your solution goes here.\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "from scipy import stats\n",
+    "\n",
+    "df = pd.read_csv('../../assets/data/Changi_daily_rainfall.csv', index_col=0, parse_dates=True)\n",
+    "df_sb = df.loc['2020']\n",
+    "wet_series = df_sb[df_sb>0.25].dropna().values.flatten()\n",
+    "\n",
+    "\n",
+    "def samlmom3(sample):\n",
+    "    \"\"\"\n",
+    "    samlmom3 returns the first three L-moments of samples\n",
+    "    sample is the 1-d array\n",
+    "    n is the total number of the samples, j is the j_th sample\n",
+    "    \"\"\"\n",
+    "    n = len(sample)\n",
+    "    sample = np.sort(sample.reshape(n))[::-1]\n",
+    "    b0 = np.mean(sample)\n",
+    "    b1 = np.array([(n - j - 1) * sample[j] / n / (n - 1)\n",
+    "                   for j in range(n)]).sum()\n",
+    "    b2 = np.array([(n - j - 1) * (n - j - 2) * sample[j] / n / (n - 1) / (n - 2)\n",
+    "                   for j in range(n - 1)]).sum()\n",
+    "    lmom1 = b0\n",
+    "    lmom2 = 2 * b1 - b0\n",
+    "    lmom3 = 6 * (b2 - b1) + b0\n",
+    "\n",
+    "    return lmom1, lmom2, lmom3\n",
+    "\n",
+    "\n",
+    "lmon1, lmon2, lmon3 = samlmom3(wet_series)\n",
+    "L_CV = lmon2 / lmon1\n",
+    "L_skew = lmon3/lmon2\n",
+    "\n",
+    "print('mean: %.2f' % wet_series.mean())\n",
+    "print('variance %.2f' % wet_series.var())\n",
+    "print('Skewness %.2f' % stats.skew(wet_series, axis=0))\n",
+    "print('Kurtosis: %.2f' % stats.kurtosis(wet_series, axis=0, bias=False))\n",
+    "print('L-CV: %.2f' % L_CV)\n",
+    "print('L-skewness %.2f' % L_skew)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "c48637f2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# the distribution is right-skewed and more peaked than the normal distribution."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "69ba12e4",
+   "metadata": {},
+   "source": [
+    "## Task 2\n",
+    "Early studies identified the gamma (G2) distribution as a suitable distribution for wet-day precipitation based on the traditional goodness-of-fit tests. Does wet-day series of Changi follow a gamma distribution?\n",
+    "Check the `scipy.stats` documentation and do the following: \n",
+    "* fit a gamma distribution to the Changi wet-day series;\n",
+    "* print out the estimated parameters;\n",
+    "* print out the goodness-of-fit test results using KS test."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "2b40dc9f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.6597146798552613 0.39999999999999997 15.511976229013442\n",
+      "KstestResult(statistic=0.05634367405742169, pvalue=0.6503157533710182)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Your solution goes here.\n",
+    "from scipy.stats import gamma, kstest\n",
+    "\n",
+    "a, loc, scale = gamma.fit(wet_series)\n",
+    "\n",
+    "print(a, loc, scale)\n",
+    "gamma_dist = gamma(a, loc=loc, scale=scale)\n",
+    "print(stats.kstest(wet_series, gamma_dist.cdf))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "61fb4468",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# The p-value of ks-stastistic is larger than 0.05. \n",
+    "# The wet-day series for 2020 of Changi follows a gamma distribution."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.12"
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}