From 93febed32979d73ffe3ba1c7af90ccde1ffb01e8 Mon Sep 17 00:00:00 2001
From: John Stachurski <john.stachurski@gmail.com>
Date: Tue, 12 Mar 2024 07:01:03 +1100
Subject: [PATCH 1/3] misc

---
 lectures/markov_chains_II.md | 245 ++++++++++++++++++-----------------
 1 file changed, 129 insertions(+), 116 deletions(-)
diff --git a/lectures/markov_chains_II.md b/lectures/markov_chains_II.md
index f2940ac9..30aac518 100644
--- a/lectures/markov_chains_II.md
+++ b/lectures/markov_chains_II.md
@@ -143,11 +143,11 @@ We'll come back to this a bit later.
 
 ### Irreducibility and stationarity
 
-We discussed the uniqueness of the stationary in the {ref}`previous lecture <stationary>` requires the transition matrix to be everywhere positive.
+We discussed uniqueness of stationary distributions our {doc}`earlier lecture on Markov chains <markov_chains_I>` 
 
-In fact irreducibility is enough for the uniqueness of the stationary distribution to hold if the distribution exists.
+There we {ref}`stated <mc_po_conv_thm>` that uniqueness holds when the transition matrix is everywhere positive.
 
-We can revise the {ref}`theorem<strict_stationary>` into the following fundamental theorem:
+In fact irreducibility is sufficient:
 
 ```{prf:theorem}
 :label: mc_conv_thm
@@ -163,7 +163,6 @@ Theorem 5.2 of {cite}`haggstrom2002finite`.
 (ergodicity)=
 ## Ergodicity
 
-Please note that we use $\mathbb{1}$ for a vector of ones in this lecture.
 
 Under irreducibility, yet another important result obtains:
 
@@ -205,7 +204,7 @@ sequence of random variables is [not IID](iid_violation).
 
 
 (mc_eg1-2)=
-### Example 1
+### Example: Ergodicity and unemployment
 
 Recall our cross-sectional interpretation of the employment/unemployment model {ref}`discussed above <mc_eg1-1>`.
 
@@ -227,113 +226,63 @@ This is one aspect of the concept  of ergodicity.
 
 
 (ergo)=
-### Example 2
+### Example: Hamilton dynamics
 
 Another example is the Hamilton dynamics we {ref}`discussed before <mc_eg2>`.
 
-The {ref}`graph <mc_eg2>` of the Markov chain shows it is irreducible
+Let $\{X_t\}$ be a sample path generated by these dynamics.
 
-Therefore, we can see the sample path averages for each state (the fraction of
-time spent in each state) converges to the stationary distribution regardless of
-the starting state
-
-Let's denote the fraction of time spent in state $x$ at time $t$ in our sample path as $\hat p_t(x)$ where
+Let's denote the fraction of time spent in state $x$ over the period $t=1,
+\ldots, n$ by $\hat p_n(x)$, so that 
 
 $$
-\hat p_t(x) := \frac{1}{t} \sum_{t = 1}^t \mathbf{1}\{X_t = x\}
+    \hat p_n(x) := \frac{1}{n} \sum_{t = 1}^n \mathbf{1}\{X_t = x\}
+    \qquad (x \in \{0, 1, 2\})
 $$
 
 
-Here we compare $\hat p_t(x)$ with the stationary distribution $\psi^* (x)$ for different starting points $x_0$.
+The {ref}`graph <mc_eg2>` of the Markov chain shows it is irreducible, so
+ergodicity holds.
 
-```{code-cell} ipython3
-P = np.array([[0.971, 0.029, 0.000],
-              [0.145, 0.778, 0.077],
-              [0.000, 0.508, 0.492]])
-ts_length = 10_000
-mc = qe.MarkovChain(P)
-n = len(P)
-fig, axes = plt.subplots(nrows=1, ncols=n, figsize=(15, 6))
-ψ_star = mc.stationary_distributions[0]
-plt.subplots_adjust(wspace=0.35)
+Hence we expect that $\hat p_n(x) \approx \psi^*(x)$ when $n$ is large.
 
-for i in range(n):
-    axes[i].axhline(ψ_star[i], linestyle='dashed', lw=2, color='black', 
-                    label = fr'$\psi^*({i})$')
-    axes[i].set_xlabel('t')
-    axes[i].set_ylabel(fr'$\hat p_t({i})$')
+The next figure shows convergence of $\hat p_n(x)$ to $\psi^*(x)$ when $x=1$ and
+$X_0$ is either $0, 1$ or $2$.
 
-    # Compute the fraction of time spent, starting from different x_0s
-    for x0, col in ((0, 'blue'), (1, 'green'), (2, 'red')):
-        # Generate time series that starts at different x0
-        X = mc.simulate(ts_length, init=x0)
-        p_hat = (X == i).cumsum() / (1 + np.arange(ts_length, dtype=float))
-        axes[i].plot(p_hat, color=col, label=f'$x_0 = \, {x0} $')
-    axes[i].legend()
-plt.show()
-```
-
-Note the convergence to the stationary distribution regardless of the starting point $x_0$.
-
-### Example 3
+The figure shows convergence to the stationary distribution regardless of the
+initial condition $X_0$.
 
-Let's look at one more example with six states {ref}`discussed before <mc_eg3>`.
-
-
-$$
-P :=
-\begin{bmatrix} 
-0.86 & 0.11 & 0.03 & 0.00 & 0.00 & 0.00 \\
-0.52 & 0.33 & 0.13 & 0.02 & 0.00 & 0.00 \\
-0.12 & 0.03 & 0.70 & 0.11 & 0.03 & 0.01 \\
-0.13 & 0.02 & 0.35 & 0.36 & 0.10 & 0.04 \\
-0.00 & 0.00 & 0.09 & 0.11 & 0.55 & 0.25 \\
-0.00 & 0.00 & 0.09 & 0.15 & 0.26 & 0.50
-\end{bmatrix} 
-$$
-
-
-The {ref}`graph <mc_eg3>` for the chain shows all states are reachable,
-indicating that this chain is irreducible.
-
-Here we visualize the difference between $\hat p_t(x)$ and the stationary distribution $\psi^* (x)$ for each state $x$
 
 ```{code-cell} ipython3
-P = [[0.86, 0.11, 0.03, 0.00, 0.00, 0.00],
-     [0.52, 0.33, 0.13, 0.02, 0.00, 0.00],
-     [0.12, 0.03, 0.70, 0.11, 0.03, 0.01],
-     [0.13, 0.02, 0.35, 0.36, 0.10, 0.04],
-     [0.00, 0.00, 0.09, 0.11, 0.55, 0.25],
-     [0.00, 0.00, 0.09, 0.15, 0.26, 0.50]]
-
+P = np.array([[0.971, 0.029, 0.000],
+              [0.145, 0.778, 0.077],
+              [0.000, 0.508, 0.492]])
 ts_length = 10_000
 mc = qe.MarkovChain(P)
 ψ_star = mc.stationary_distributions[0]
-fig, ax = plt.subplots(figsize=(9, 6))
-X = mc.simulate(ts_length)
-# Center the plot at 0
-ax.set_ylim(-0.25, 0.25)
-ax.axhline(0, linestyle='dashed', lw=2, color='black', alpha=0.4)
-
-
-for x0 in range(6):
-    # Calculate the fraction of time for each state
-    p_hat = (X == x0).cumsum() / (1 + np.arange(ts_length, dtype=float))
-    ax.plot(p_hat - ψ_star[x0], label=f'$x = {x0+1} $')
-    ax.set_xlabel('t')
-    ax.set_ylabel(r'$\hat p_t(x) - \psi^* (x)$')
+i = 1  # We study convergence to psi^*(x) when x = i
 
+fig, ax = plt.subplots()
+ax.axhline(ψ_star[i], linestyle='dashed', color='black', 
+                label = fr'$\psi^*({i})$')
+# Compute the fraction of time spent in state 0, starting from different x_0s
+for x0 in range(3):
+    X = mc.simulate(ts_length, init=x0)
+    p_hat = (X == i).cumsum() / (1 + np.arange(ts_length))
+    ax.plot(p_hat, label=fr'$\psi^*({i})$ when $x_0 = \, {x0}$')
+ax.set_xlabel('t')
+ax.set_ylabel(fr'$\hat p_n({i})$')
 ax.legend()
 plt.show()
 ```
 
-Similar to previous examples, the sample path averages for each state converge
-to the stationary distribution.
+You might like to try changing $i=1$ to either $i=0$ or $i=2$.
 
-### Example 4
+In any of these cases, ergodicity will hold.
 
-Let's look at another example with two states: 0 and 1.
+### Example: a periodic chain
 
+Let's look at the following example with states 0 and 1:
 
 $$
 P :=
@@ -344,18 +293,21 @@ P :=
 $$
 
 
-The diagram of the Markov chain shows that it is **irreducible**
+The transition graph shows that this model is irreducible.
 
 ```{image} /_static/lecture_specific/markov_chains_II/example4.png
 :name: mc_example4
 :align: center
 ```
 
-In fact it has a periodic cycle --- the state cycles between the two states in a regular way.
+Notice that there is a periodic cycle --- the state cycles between the two states in a regular way.
 
-This is called [periodicity](https://stats.libretexts.org/Bookshelves/Probability_Theory/Probability_Mathematical_Statistics_and_Stochastic_Processes_(Siegrist)/16%3A_Markov_Processes/16.05%3A_Periodicity_of_Discrete-Time_Chains).
+Not surprisingly, this property 
+is called [periodicity](https://stats.libretexts.org/Bookshelves/Probability_Theory/Probability_Mathematical_Statistics_and_Stochastic_Processes_(Siegrist)/16%3A_Markov_Processes/16.05%3A_Periodicity_of_Discrete-Time_Chains).
 
-It is still irreducible so ergodicity holds.
+Nonetheless, the model is irreducible, so ergodicity holds.
+
+The following figure illustrates
 
 ```{code-cell} ipython3
 P = np.array([[0, 1],
@@ -371,13 +323,13 @@ for i in range(n):
     axes[i].axhline(ψ_star[i], linestyle='dashed', lw=2, color='black', 
                     label = fr'$\psi^*({i})$')
     axes[i].set_xlabel('t')
-    axes[i].set_ylabel(fr'$\hat p_t({i})$')
+    axes[i].set_ylabel(fr'$\hat p_n({i})$')
 
     # Compute the fraction of time spent, for each x
     for x0 in range(n):
         # Generate time series starting at different x_0
         X = mc.simulate(ts_length, init=x0)
-        p_hat = (X == i).cumsum() / (1 + np.arange(ts_length, dtype=float))
+        p_hat = (X == i).cumsum() / (1 + np.arange(ts_length))
         axes[i].plot(p_hat, label=f'$x_0 = \, {x0} $')
 
     axes[i].legend()
@@ -390,6 +342,64 @@ The proportion of time spent in a state can converge to the stationary distribut
 
 However, the distribution at each state does not.
 
+### Example:  political institutions
+
+Let's go back to the political institutions mode with six states discussed {ref}`in a previous lecture <mc_eg3>` and study ergodicity.
+
+
+Here's the transition matrix.
+
+$$
+    P :=
+    \begin{bmatrix} 
+        0.86 & 0.11 & 0.03 & 0.00 & 0.00 & 0.00 \\
+        0.52 & 0.33 & 0.13 & 0.02 & 0.00 & 0.00 \\
+        0.12 & 0.03 & 0.70 & 0.11 & 0.03 & 0.01 \\
+        0.13 & 0.02 & 0.35 & 0.36 & 0.10 & 0.04 \\
+        0.00 & 0.00 & 0.09 & 0.11 & 0.55 & 0.25 \\
+        0.00 & 0.00 & 0.09 & 0.15 & 0.26 & 0.50
+    \end{bmatrix} 
+$$
+
+
+The {ref}`graph <mc_eg3>` for the chain shows all states are reachable,
+indicating that this chain is irreducible.
+
+In the next figure, we visualize the difference $\hat p_n(x) - \psi^* (x)$ for each state $x$.
+
+Unlike the previous figure, $X_0$ is held fixed.
+
+```{code-cell} ipython3
+P = [[0.86, 0.11, 0.03, 0.00, 0.00, 0.00],
+     [0.52, 0.33, 0.13, 0.02, 0.00, 0.00],
+     [0.12, 0.03, 0.70, 0.11, 0.03, 0.01],
+     [0.13, 0.02, 0.35, 0.36, 0.10, 0.04],
+     [0.00, 0.00, 0.09, 0.11, 0.55, 0.25],
+     [0.00, 0.00, 0.09, 0.15, 0.26, 0.50]]
+
+ts_length = 10_000
+mc = qe.MarkovChain(P)
+ψ_star = mc.stationary_distributions[0]
+fig, ax = plt.subplots(figsize=(9, 6))
+X = mc.simulate(ts_length)
+# Center the plot at 0
+ax.set_ylim(-0.25, 0.25)
+ax.axhline(0, linestyle='dashed', lw=2, color='black', alpha=0.4)
+
+
+for x0 in range(len(P)):
+    # Calculate the fraction of time for each state
+    p_hat = (X == x0).cumsum() / (1 + np.arange(ts_length))
+    ax.plot(p_hat - ψ_star[x0], label=f'$x = {x0+1} $')
+    ax.set_xlabel('t')
+    ax.set_ylabel(r'$\hat p_n(x) - \psi^* (x)$')
+
+ax.legend()
+plt.show()
+```
+
+
+
 ### Expectations of geometric sums
 
 Sometimes we want to compute the mathematical expectation of a geometric sum, such as
@@ -422,15 +432,15 @@ Benhabib el al. {cite}`benhabib_wealth_2019` estimated that the transition matri
 
 $$
 P:=
-\begin{bmatrix} 
-0.222 & 0.222 & 0.215 & 0.187 & 0.081 & 0.038 & 0.029 & 0.006 \\
-0.221 & 0.22 & 0.215 & 0.188 & 0.082 & 0.039 & 0.029 & 0.006 \\
-0.207 & 0.209 & 0.21 & 0.194 & 0.09 & 0.046 & 0.036 & 0.008 \\ 
-0.198 & 0.201 & 0.207 & 0.198 & 0.095 & 0.052 & 0.04 & 0.009 \\ 
-0.175 & 0.178 & 0.197 & 0.207 & 0.11 & 0.067 & 0.054 & 0.012 \\ 
-0.182 & 0.184 & 0.2 & 0.205 & 0.106 & 0.062 & 0.05 & 0.011 \\ 
-0.123 & 0.125 & 0.166 & 0.216 & 0.141 & 0.114 & 0.094 & 0.021 \\ 
-0.084 & 0.084 & 0.142 & 0.228 & 0.17 & 0.143 & 0.121 & 0.028
+    \begin{bmatrix} 
+        0.222 & 0.222 & 0.215 & 0.187 & 0.081 & 0.038 & 0.029 & 0.006 \\
+        0.221 & 0.22 & 0.215 & 0.188 & 0.082 & 0.039 & 0.029 & 0.006 \\
+        0.207 & 0.209 & 0.21 & 0.194 & 0.09 & 0.046 & 0.036 & 0.008 \\ 
+        0.198 & 0.201 & 0.207 & 0.198 & 0.095 & 0.052 & 0.04 & 0.009 \\ 
+        0.175 & 0.178 & 0.197 & 0.207 & 0.11 & 0.067 & 0.054 & 0.012 \\ 
+        0.182 & 0.184 & 0.2 & 0.205 & 0.106 & 0.062 & 0.05 & 0.011 \\ 
+        0.123 & 0.125 & 0.166 & 0.216 & 0.141 & 0.114 & 0.094 & 0.021 \\ 
+        0.084 & 0.084 & 0.142 & 0.228 & 0.17 & 0.143 & 0.121 & 0.028
 \end{bmatrix} 
 $$
 
@@ -458,20 +468,18 @@ P = np.array(P)
 codes_B = ('1','2','3','4','5','6','7','8')
 ```
 
-In this exercise,
-
-1. show this process is asymptotically stationary and calculate the stationary distribution using simulations.
+1. Show this process is asymptotically stationary and calculate an approximation to the stationary distribution.
 
-1. use simulations to demonstrate ergodicity of this process.
+1. Use simulations to illustrate ergodicity.
 
 ````
 
 ```{solution-start} mc_ex1
 :class: dropdown
 ```
-Solution 1:
+Part 1:
 
-Use the technique we learnt before, we can take the power of the transition matrix
+One option is to take the power of the transition matrix.
 
 ```{code-cell} ipython3
 P = [[0.222, 0.222, 0.215, 0.187, 0.081, 0.038, 0.029, 0.006],
@@ -489,7 +497,8 @@ codes_B = ('1','2','3','4','5','6','7','8')
 np.linalg.matrix_power(P, 10)
 ```
 
-We find again that rows of the transition matrix converge to the stationary distribution
+For this model, rows of $P^n$ converge to the stationary distribution as $n \to
+\infty$:
 
 ```{code-cell} ipython3
 mc = qe.MarkovChain(P)
@@ -497,7 +506,7 @@ mc = qe.MarkovChain(P)
 ψ_star
 ```
 
-Solution 2:
+Part 2:
 
 ```{code-cell} ipython3
 ts_length = 1000
@@ -509,16 +518,17 @@ ax.axhline(0, linestyle='dashed', lw=2, color='black', alpha=0.4)
 
 for x0 in range(8):
     # Calculate the fraction of time for each worker
-    p_hat = (X == x0).cumsum() / (1 + np.arange(ts_length, dtype=float))
+    p_hat = (X == x0).cumsum() / (1 + np.arange(ts_length))
     ax.plot(p_hat - ψ_star[x0], label=f'$x = {x0+1} $')
     ax.set_xlabel('t')
-    ax.set_ylabel(r'$\hat p_t(x) - \psi^* (x)$')
+    ax.set_ylabel(r'$\hat p_n(x) - \psi^* (x)$')
 
 ax.legend()
 plt.show()
 ```
 
-Note that the fraction of time spent at each state quickly converges to the probability assigned to that state by the stationary distribution.
+Note that the fraction of time spent at each state converges to the probability
+assigned to that state by the stationary distribution.
 
 ```{solution-end}
 ```
@@ -590,7 +600,7 @@ for x0, col in ((0, 'blue'), (1, 'green')):
     # Generate time series for worker that starts at x0
     X = mc.simulate(ts_length, init=x0)
     # Compute fraction of time spent unemployed, for each n
-    X_bar = (X == 0).cumsum() / (1 + np.arange(ts_length, dtype=float))
+    X_bar = (X == 0).cumsum() / (1 + np.arange(ts_length))
     # Plot
     ax.fill_between(range(ts_length), np.zeros(ts_length), X_bar - p, color=col, alpha=0.1)
     ax.plot(X_bar - p, color=col, label=f'$x_0 = \, {x0} $')
@@ -611,11 +621,12 @@ plt.show()
 
 In `quantecon` library, irreducibility is tested by checking whether the chain forms a [strongly connected component](https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.components.is_strongly_connected.html).
 
-However, another way to verify irreducibility is by checking whether $A$ satisfies the following statement:
+Another way to test irreducibility is via the following statement:
 
-Assume A is an $n \times n$ $A$ is irreducible if and only if $\sum_{k=0}^{n-1}A^k$ is a positive matrix.
+The $n \times n$ matrix $A$ is irreducible if and only if $\sum_{k=0}^{n-1}A^k$
+is a strictly positive matrix.
 
-(see more: {cite}`zhao_power_2012` and [here](https://math.stackexchange.com/questions/3336616/how-to-prove-this-matrix-is-a-irreducible-matrix))
+(see, e.g., {cite}`zhao_power_2012` and [this StackExchange post](https://math.stackexchange.com/questions/3336616/how-to-prove-this-matrix-is-a-irreducible-matrix))
 
 Based on this claim, write a function to test irreducibility.
 
@@ -634,6 +645,8 @@ def is_irreducible(P):
     return np.all(result > 0)
 ```
 
+Let's try it.
+
 ```{code-cell} ipython3
 P1 = np.array([[0, 1],
                [1, 0]])

From 55bcd9055fbb6bcfc54e9aebc0a77dea83e8f33b Mon Sep 17 00:00:00 2001
From: mmcky <mamckay@gmail.com>
Date: Tue, 12 Mar 2024 09:30:07 +1100
Subject: [PATCH 2/3] fix: prf labels need prf:ref for sphinx domain

---
 lectures/markov_chains_II.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lectures/markov_chains_II.md b/lectures/markov_chains_II.md
index 30aac518..b8bc51a1 100644
--- a/lectures/markov_chains_II.md
+++ b/lectures/markov_chains_II.md
@@ -145,7 +145,7 @@ We'll come back to this a bit later.
 
 We discussed uniqueness of stationary distributions our {doc}`earlier lecture on Markov chains <markov_chains_I>` 
 
-There we {ref}`stated <mc_po_conv_thm>` that uniqueness holds when the transition matrix is everywhere positive.
+There we {prf:ref}`stated <mc_po_conv_thm>` that uniqueness holds when the transition matrix is everywhere positive.
 
 In fact irreducibility is sufficient:
 

From e0f038103b4f20862dca596bae4285acc0be1a48 Mon Sep 17 00:00:00 2001
From: John Stachurski <john.stachurski@gmail.com>
Date: Tue, 12 Mar 2024 10:18:41 +1100
Subject: [PATCH 3/3] misc

---
 lectures/markov_chains_II.md | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/lectures/markov_chains_II.md b/lectures/markov_chains_II.md
index b8bc51a1..67def2d7 100644
--- a/lectures/markov_chains_II.md
+++ b/lectures/markov_chains_II.md
@@ -48,7 +48,6 @@ Let's start with some standard imports:
 
 ```{code-cell} ipython3
 import matplotlib.pyplot as plt
-plt.rcParams["figure.figsize"] = (11, 5)  # set default figure size
 import quantecon as qe
 import numpy as np
 ```
@@ -249,8 +248,6 @@ Hence we expect that $\hat p_n(x) \approx \psi^*(x)$ when $n$ is large.
 The next figure shows convergence of $\hat p_n(x)$ to $\psi^*(x)$ when $x=1$ and
 $X_0$ is either $0, 1$ or $2$.
 
-The figure shows convergence to the stationary distribution regardless of the
-initial condition $X_0$.
 
 
 ```{code-cell} ipython3
@@ -260,23 +257,23 @@ P = np.array([[0.971, 0.029, 0.000],
 ts_length = 10_000
 mc = qe.MarkovChain(P)
 ψ_star = mc.stationary_distributions[0]
-i = 1  # We study convergence to psi^*(x) when x = i
+x = 1  # We study convergence to psi^*(x) 
 
 fig, ax = plt.subplots()
-ax.axhline(ψ_star[i], linestyle='dashed', color='black', 
-                label = fr'$\psi^*({i})$')
+ax.axhline(ψ_star[x], linestyle='dashed', color='black', 
+                label = fr'$\psi^*({x})$')
 # Compute the fraction of time spent in state 0, starting from different x_0s
 for x0 in range(3):
     X = mc.simulate(ts_length, init=x0)
-    p_hat = (X == i).cumsum() / (1 + np.arange(ts_length))
-    ax.plot(p_hat, label=fr'$\psi^*({i})$ when $x_0 = \, {x0}$')
+    p_hat = (X == x).cumsum() / (1 + np.arange(ts_length))
+    ax.plot(p_hat, label=fr'$\hat p_n({x})$ when $X_0 = \, {x0}$')
 ax.set_xlabel('t')
-ax.set_ylabel(fr'$\hat p_n({i})$')
+ax.set_ylabel(fr'$\hat p_n({x})$')
 ax.legend()
 plt.show()
 ```
 
-You might like to try changing $i=1$ to either $i=0$ or $i=2$.
+You might like to try changing $x=1$ to either $x=0$ or $x=2$.
 
 In any of these cases, ergodicity will hold.