From d641f7ff92455bfb9f79796d5618499c1c4318f0 Mon Sep 17 00:00:00 2001 From: Longye Tian Date: Mon, 22 Jul 2024 09:43:05 +1000 Subject: [PATCH 1/5] update prob_dist --- lectures/prob_dist.md | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/lectures/prob_dist.md b/lectures/prob_dist.md index d8d04f7e..ec1539fe 100644 --- a/lectures/prob_dist.md +++ b/lectures/prob_dist.md @@ -289,21 +289,13 @@ The mean and variance are: ```{code-cell} ipython3 λ = 2 u = scipy.stats.poisson(λ) -``` - -```{code-cell} ipython3 u.mean(), u.var() ``` - -The the expectation of Poisson distribution is $\lambda$ and the variance is also $\lambda$. + +The expectation of Poisson distribution is $\lambda$ and the variance is also $\lambda$. Here's the PMF: -```{code-cell} ipython3 -λ = 2 -u = scipy.stats.poisson(λ) -``` - ```{code-cell} ipython3 u.pmf(1) ``` From 707c5dd8af4342826cb83257d5c6188a429cf084 Mon Sep 17 00:00:00 2001 From: Longye Tian Date: Mon, 22 Jul 2024 10:23:04 +1000 Subject: [PATCH 2/5] add x y label --- lectures/prob_dist.md | 50 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 48 insertions(+), 2 deletions(-) diff --git a/lectures/prob_dist.md b/lectures/prob_dist.md index ec1539fe..0532ad36 100644 --- a/lectures/prob_dist.md +++ b/lectures/prob_dist.md @@ -124,6 +124,8 @@ S = np.arange(1, n+1) ax.plot(S, u.pmf(S), linestyle='', marker='o', alpha=0.8, ms=4) ax.vlines(S, 0, u.pmf(S), lw=0.2) ax.set_xticks(S) +ax.set_xlabel('S') +ax.set_ylabel('PMF') plt.show() ``` @@ -136,6 +138,8 @@ S = np.arange(1, n+1) ax.step(S, u.cdf(S)) ax.vlines(S, 0, u.cdf(S), lw=0.2) ax.set_xticks(S) +ax.set_xlabel('S') +ax.set_ylabel('CDF') plt.show() ``` @@ -232,6 +236,8 @@ S = np.arange(1, n+1) ax.plot(S, u.pmf(S), linestyle='', marker='o', alpha=0.8, ms=4) ax.vlines(S, 0, u.pmf(S), lw=0.2) ax.set_xticks(S) +ax.set_xlabel('S') +ax.set_ylabel('PMF') plt.show() ``` @@ -244,6 +250,8 @@ S = np.arange(1, n+1) ax.step(S, u.cdf(S)) ax.vlines(S, 0, u.cdf(S), lw=0.2) ax.set_xticks(S) +ax.set_xlabel('S') +ax.set_ylabel('CDF') plt.show() ``` @@ -267,6 +275,8 @@ u_sum = np.cumsum(u.pmf(S)) ax.step(S, u_sum) ax.vlines(S, 0, u_sum, lw=0.2) ax.set_xticks(S) +ax.set_xlabel('S') +ax.set_ylabel('CDF') plt.show() ``` @@ -306,6 +316,8 @@ S = np.arange(1, n+1) ax.plot(S, u.pmf(S), linestyle='', marker='o', alpha=0.8, ms=4) ax.vlines(S, 0, u.pmf(S), lw=0.2) ax.set_xticks(S) +ax.set_xlabel('S') +ax.set_ylabel('PMF') plt.show() ``` @@ -378,7 +390,8 @@ for μ, σ in zip(μ_vals, σ_vals): ax.plot(x_grid, u.pdf(x_grid), alpha=0.5, lw=2, label=f'$\mu={μ}, \sigma={σ}$') - +ax.set_xlabel('x') +ax.set_ylabel('PDF') plt.legend() plt.show() ``` @@ -394,6 +407,8 @@ for μ, σ in zip(μ_vals, σ_vals): alpha=0.5, lw=2, label=f'$\mu={μ}, \sigma={σ}$') ax.set_ylim(0, 1) +ax.set_xlabel('x') +ax.set_ylabel('CDF') plt.legend() plt.show() ``` @@ -438,7 +453,8 @@ for μ, σ in zip(μ_vals, σ_vals): ax.plot(x_grid, u.pdf(x_grid), alpha=0.5, lw=2, label=f'$\mu={μ}, \sigma={σ}$') - +ax.set_xlabel('x') +ax.set_ylabel('PDF') plt.legend() plt.show() ``` @@ -453,6 +469,8 @@ for σ in σ_vals: label=f'$\mu={μ}, \sigma={σ}$') ax.set_ylim(0, 1) ax.set_xlim(0, 3) +ax.set_xlabel('x') +ax.set_ylabel('CDF') plt.legend() plt.show() ``` @@ -492,6 +510,8 @@ for λ in λ_vals: ax.plot(x_grid, u.pdf(x_grid), alpha=0.5, lw=2, label=f'$\lambda={λ}$') +ax.set_xlabel('x') +ax.set_ylabel('PDF') plt.legend() plt.show() ``` @@ -504,6 +524,8 @@ for λ in λ_vals: alpha=0.5, lw=2, label=f'$\lambda={λ}$') ax.set_ylim(0, 1) +ax.set_xlabel('x') +ax.set_ylabel('CDF') plt.legend() plt.show() ``` @@ -549,6 +571,8 @@ for α, β in zip(α_vals, β_vals): ax.plot(x_grid, u.pdf(x_grid), alpha=0.5, lw=2, label=fr'$\alpha={α}, \beta={β}$') +ax.set_xlabel('x') +ax.set_ylabel('PDF') plt.legend() plt.show() ``` @@ -561,6 +585,8 @@ for α, β in zip(α_vals, β_vals): alpha=0.5, lw=2, label=fr'$\alpha={α}, \beta={β}$') ax.set_ylim(0, 1) +ax.set_xlabel('x') +ax.set_ylabel('CDF') plt.legend() plt.show() ``` @@ -606,6 +632,8 @@ for α, β in zip(α_vals, β_vals): ax.plot(x_grid, u.pdf(x_grid), alpha=0.5, lw=2, label=fr'$\alpha={α}, \beta={β}$') +ax.set_xlabel('x') +ax.set_ylabel('PDF') plt.legend() plt.show() ``` @@ -618,6 +646,8 @@ for α, β in zip(α_vals, β_vals): alpha=0.5, lw=2, label=fr'$\alpha={α}, \beta={β}$') ax.set_ylim(0, 1) +ax.set_xlabel('x') +ax.set_ylabel('CDF') plt.legend() plt.show() ``` @@ -712,6 +742,8 @@ We can histogram the income distribution we just constructed as follows x = df['income'] fig, ax = plt.subplots() ax.hist(x, bins=5, density=True, histtype='bar') +ax.set_xlabel('Income') +ax.set_ylabel('Density') plt.show() ``` @@ -752,6 +784,8 @@ x_amazon = np.asarray(data) ```{code-cell} ipython3 fig, ax = plt.subplots() ax.hist(x_amazon, bins=20) +ax.set_xlabel('Monthly Return (Percent Change)') +ax.set_ylabel('Density') plt.show() ``` @@ -766,6 +800,8 @@ KDE will generate a smooth curve that approximates the PDF. ```{code-cell} ipython3 fig, ax = plt.subplots() sns.kdeplot(x_amazon, ax=ax) +ax.set_xlabel('Monthly Return (Percent Change)') +ax.set_ylabel('KDE') plt.show() ``` @@ -776,6 +812,8 @@ fig, ax = plt.subplots() sns.kdeplot(x_amazon, ax=ax, bw_adjust=0.1, alpha=0.5, label="bw=0.1") sns.kdeplot(x_amazon, ax=ax, bw_adjust=0.5, alpha=0.5, label="bw=0.5") sns.kdeplot(x_amazon, ax=ax, bw_adjust=1, alpha=0.5, label="bw=1") +ax.set_xlabel('Monthly Return (Percent Change)') +ax.set_ylabel('KDE') plt.legend() plt.show() ``` @@ -794,6 +832,8 @@ Yet another way to display an observed distribution is via a violin plot. ```{code-cell} ipython3 fig, ax = plt.subplots() ax.violinplot(x_amazon) +ax.set_ylabel('Monthly Return (Percent Change)') +ax.set_xlabel('KDE') plt.show() ``` @@ -814,6 +854,8 @@ x_apple = np.asarray(data) ```{code-cell} ipython3 fig, ax = plt.subplots() ax.violinplot([x_amazon, x_apple]) +ax.set_ylabel('Monthly Return (Percent Change)') +ax.set_xlabel('KDE') plt.show() ``` @@ -847,6 +889,8 @@ x_grid = np.linspace(-50, 65, 200) fig, ax = plt.subplots() ax.plot(x_grid, u.pdf(x_grid)) ax.hist(x_amazon, density=True, bins=40) +ax.set_xlabel('Monthly Return (Percent Change)') +ax.set_ylabel('Density') plt.show() ``` @@ -874,6 +918,8 @@ x_grid = np.linspace(-4, 4, 200) fig, ax = plt.subplots() ax.plot(x_grid, u.pdf(x_grid)) ax.hist(x_draws, density=True, bins=40) +ax.set_xlabel('x') +ax.set_ylabel('Density') plt.show() ``` From f707a1195dc308131bf47b111db22b388101e68a Mon Sep 17 00:00:00 2001 From: Longye Tian <133612246+longye-tian@users.noreply.github.com> Date: Mon, 22 Jul 2024 11:07:18 +1000 Subject: [PATCH 3/5] Update lectures/prob_dist.md Co-authored-by: Matt McKay --- lectures/prob_dist.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lectures/prob_dist.md b/lectures/prob_dist.md index 0532ad36..da57a721 100644 --- a/lectures/prob_dist.md +++ b/lectures/prob_dist.md @@ -784,7 +784,7 @@ x_amazon = np.asarray(data) ```{code-cell} ipython3 fig, ax = plt.subplots() ax.hist(x_amazon, bins=20) -ax.set_xlabel('Monthly Return (Percent Change)') +ax.set_xlabel('monthly return (percent change)') ax.set_ylabel('Density') plt.show() ``` From b388345a794d59b983b04012b4a3d5bd4ed7bcf9 Mon Sep 17 00:00:00 2001 From: Longye Tian Date: Mon, 22 Jul 2024 11:27:32 +1000 Subject: [PATCH 4/5] update xy label using lower case --- lectures/prob_dist.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/lectures/prob_dist.md b/lectures/prob_dist.md index da57a721..f711bb26 100644 --- a/lectures/prob_dist.md +++ b/lectures/prob_dist.md @@ -742,8 +742,8 @@ We can histogram the income distribution we just constructed as follows x = df['income'] fig, ax = plt.subplots() ax.hist(x, bins=5, density=True, histtype='bar') -ax.set_xlabel('Income') -ax.set_ylabel('Density') +ax.set_xlabel('income') +ax.set_ylabel('density') plt.show() ``` @@ -785,7 +785,7 @@ x_amazon = np.asarray(data) fig, ax = plt.subplots() ax.hist(x_amazon, bins=20) ax.set_xlabel('monthly return (percent change)') -ax.set_ylabel('Density') +ax.set_ylabel('density') plt.show() ``` @@ -800,7 +800,7 @@ KDE will generate a smooth curve that approximates the PDF. ```{code-cell} ipython3 fig, ax = plt.subplots() sns.kdeplot(x_amazon, ax=ax) -ax.set_xlabel('Monthly Return (Percent Change)') +ax.set_xlabel('monthly return (percent change)') ax.set_ylabel('KDE') plt.show() ``` @@ -812,7 +812,7 @@ fig, ax = plt.subplots() sns.kdeplot(x_amazon, ax=ax, bw_adjust=0.1, alpha=0.5, label="bw=0.1") sns.kdeplot(x_amazon, ax=ax, bw_adjust=0.5, alpha=0.5, label="bw=0.5") sns.kdeplot(x_amazon, ax=ax, bw_adjust=1, alpha=0.5, label="bw=1") -ax.set_xlabel('Monthly Return (Percent Change)') +ax.set_xlabel('monthly return (percent change)') ax.set_ylabel('KDE') plt.legend() plt.show() @@ -832,7 +832,7 @@ Yet another way to display an observed distribution is via a violin plot. ```{code-cell} ipython3 fig, ax = plt.subplots() ax.violinplot(x_amazon) -ax.set_ylabel('Monthly Return (Percent Change)') +ax.set_ylabel('monthly return (percent change)') ax.set_xlabel('KDE') plt.show() ``` @@ -854,7 +854,7 @@ x_apple = np.asarray(data) ```{code-cell} ipython3 fig, ax = plt.subplots() ax.violinplot([x_amazon, x_apple]) -ax.set_ylabel('Monthly Return (Percent Change)') +ax.set_ylabel('monthly return (percent change)') ax.set_xlabel('KDE') plt.show() ``` @@ -889,8 +889,8 @@ x_grid = np.linspace(-50, 65, 200) fig, ax = plt.subplots() ax.plot(x_grid, u.pdf(x_grid)) ax.hist(x_amazon, density=True, bins=40) -ax.set_xlabel('Monthly Return (Percent Change)') -ax.set_ylabel('Density') +ax.set_xlabel('monthly return (percent change)') +ax.set_ylabel('density') plt.show() ``` @@ -919,7 +919,7 @@ fig, ax = plt.subplots() ax.plot(x_grid, u.pdf(x_grid)) ax.hist(x_draws, density=True, bins=40) ax.set_xlabel('x') -ax.set_ylabel('Density') +ax.set_ylabel('density') plt.show() ``` From 65749428849211959a68e4d0b03ecf343b18fde3 Mon Sep 17 00:00:00 2001 From: Matt McKay Date: Tue, 23 Jul 2024 11:24:49 +1000 Subject: [PATCH 5/5] Update lectures/prob_dist.md --- lectures/prob_dist.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lectures/prob_dist.md b/lectures/prob_dist.md index f711bb26..09174da3 100644 --- a/lectures/prob_dist.md +++ b/lectures/prob_dist.md @@ -302,7 +302,7 @@ u = scipy.stats.poisson(λ) u.mean(), u.var() ``` -The expectation of Poisson distribution is $\lambda$ and the variance is also $\lambda$. +The expectation of the Poisson distribution is $\lambda$ and the variance is also $\lambda$. Here's the PMF: