From 93febed32979d73ffe3ba1c7af90ccde1ffb01e8 Mon Sep 17 00:00:00 2001 From: John Stachurski Date: Tue, 12 Mar 2024 07:01:03 +1100 Subject: [PATCH 1/3] misc --- lectures/markov_chains_II.md | 245 ++++++++++++++++++----------------- 1 file changed, 129 insertions(+), 116 deletions(-) diff --git a/lectures/markov_chains_II.md b/lectures/markov_chains_II.md index f2940ac9..30aac518 100644 --- a/lectures/markov_chains_II.md +++ b/lectures/markov_chains_II.md @@ -143,11 +143,11 @@ We'll come back to this a bit later. ### Irreducibility and stationarity -We discussed the uniqueness of the stationary in the {ref}`previous lecture ` requires the transition matrix to be everywhere positive. +We discussed uniqueness of stationary distributions our {doc}`earlier lecture on Markov chains ` -In fact irreducibility is enough for the uniqueness of the stationary distribution to hold if the distribution exists. +There we {ref}`stated ` that uniqueness holds when the transition matrix is everywhere positive. -We can revise the {ref}`theorem` into the following fundamental theorem: +In fact irreducibility is sufficient: ```{prf:theorem} :label: mc_conv_thm @@ -163,7 +163,6 @@ Theorem 5.2 of {cite}`haggstrom2002finite`. (ergodicity)= ## Ergodicity -Please note that we use $\mathbb{1}$ for a vector of ones in this lecture. Under irreducibility, yet another important result obtains: @@ -205,7 +204,7 @@ sequence of random variables is [not IID](iid_violation). (mc_eg1-2)= -### Example 1 +### Example: Ergodicity and unemployment Recall our cross-sectional interpretation of the employment/unemployment model {ref}`discussed above `. @@ -227,113 +226,63 @@ This is one aspect of the concept of ergodicity. (ergo)= -### Example 2 +### Example: Hamilton dynamics Another example is the Hamilton dynamics we {ref}`discussed before `. -The {ref}`graph ` of the Markov chain shows it is irreducible +Let $\{X_t\}$ be a sample path generated by these dynamics. -Therefore, we can see the sample path averages for each state (the fraction of -time spent in each state) converges to the stationary distribution regardless of -the starting state - -Let's denote the fraction of time spent in state $x$ at time $t$ in our sample path as $\hat p_t(x)$ where +Let's denote the fraction of time spent in state $x$ over the period $t=1, +\ldots, n$ by $\hat p_n(x)$, so that $$ -\hat p_t(x) := \frac{1}{t} \sum_{t = 1}^t \mathbf{1}\{X_t = x\} + \hat p_n(x) := \frac{1}{n} \sum_{t = 1}^n \mathbf{1}\{X_t = x\} + \qquad (x \in \{0, 1, 2\}) $$ -Here we compare $\hat p_t(x)$ with the stationary distribution $\psi^* (x)$ for different starting points $x_0$. +The {ref}`graph ` of the Markov chain shows it is irreducible, so +ergodicity holds. -```{code-cell} ipython3 -P = np.array([[0.971, 0.029, 0.000], - [0.145, 0.778, 0.077], - [0.000, 0.508, 0.492]]) -ts_length = 10_000 -mc = qe.MarkovChain(P) -n = len(P) -fig, axes = plt.subplots(nrows=1, ncols=n, figsize=(15, 6)) -ψ_star = mc.stationary_distributions[0] -plt.subplots_adjust(wspace=0.35) +Hence we expect that $\hat p_n(x) \approx \psi^*(x)$ when $n$ is large. -for i in range(n): - axes[i].axhline(ψ_star[i], linestyle='dashed', lw=2, color='black', - label = fr'$\psi^*({i})$') - axes[i].set_xlabel('t') - axes[i].set_ylabel(fr'$\hat p_t({i})$') +The next figure shows convergence of $\hat p_n(x)$ to $\psi^*(x)$ when $x=1$ and +$X_0$ is either $0, 1$ or $2$. - # Compute the fraction of time spent, starting from different x_0s - for x0, col in ((0, 'blue'), (1, 'green'), (2, 'red')): - # Generate time series that starts at different x0 - X = mc.simulate(ts_length, init=x0) - p_hat = (X == i).cumsum() / (1 + np.arange(ts_length, dtype=float)) - axes[i].plot(p_hat, color=col, label=f'$x_0 = \, {x0} $') - axes[i].legend() -plt.show() -``` - -Note the convergence to the stationary distribution regardless of the starting point $x_0$. - -### Example 3 +The figure shows convergence to the stationary distribution regardless of the +initial condition $X_0$. -Let's look at one more example with six states {ref}`discussed before `. - - -$$ -P := -\begin{bmatrix} -0.86 & 0.11 & 0.03 & 0.00 & 0.00 & 0.00 \\ -0.52 & 0.33 & 0.13 & 0.02 & 0.00 & 0.00 \\ -0.12 & 0.03 & 0.70 & 0.11 & 0.03 & 0.01 \\ -0.13 & 0.02 & 0.35 & 0.36 & 0.10 & 0.04 \\ -0.00 & 0.00 & 0.09 & 0.11 & 0.55 & 0.25 \\ -0.00 & 0.00 & 0.09 & 0.15 & 0.26 & 0.50 -\end{bmatrix} -$$ - - -The {ref}`graph ` for the chain shows all states are reachable, -indicating that this chain is irreducible. - -Here we visualize the difference between $\hat p_t(x)$ and the stationary distribution $\psi^* (x)$ for each state $x$ ```{code-cell} ipython3 -P = [[0.86, 0.11, 0.03, 0.00, 0.00, 0.00], - [0.52, 0.33, 0.13, 0.02, 0.00, 0.00], - [0.12, 0.03, 0.70, 0.11, 0.03, 0.01], - [0.13, 0.02, 0.35, 0.36, 0.10, 0.04], - [0.00, 0.00, 0.09, 0.11, 0.55, 0.25], - [0.00, 0.00, 0.09, 0.15, 0.26, 0.50]] - +P = np.array([[0.971, 0.029, 0.000], + [0.145, 0.778, 0.077], + [0.000, 0.508, 0.492]]) ts_length = 10_000 mc = qe.MarkovChain(P) ψ_star = mc.stationary_distributions[0] -fig, ax = plt.subplots(figsize=(9, 6)) -X = mc.simulate(ts_length) -# Center the plot at 0 -ax.set_ylim(-0.25, 0.25) -ax.axhline(0, linestyle='dashed', lw=2, color='black', alpha=0.4) - - -for x0 in range(6): - # Calculate the fraction of time for each state - p_hat = (X == x0).cumsum() / (1 + np.arange(ts_length, dtype=float)) - ax.plot(p_hat - ψ_star[x0], label=f'$x = {x0+1} $') - ax.set_xlabel('t') - ax.set_ylabel(r'$\hat p_t(x) - \psi^* (x)$') +i = 1 # We study convergence to psi^*(x) when x = i +fig, ax = plt.subplots() +ax.axhline(ψ_star[i], linestyle='dashed', color='black', + label = fr'$\psi^*({i})$') +# Compute the fraction of time spent in state 0, starting from different x_0s +for x0 in range(3): + X = mc.simulate(ts_length, init=x0) + p_hat = (X == i).cumsum() / (1 + np.arange(ts_length)) + ax.plot(p_hat, label=fr'$\psi^*({i})$ when $x_0 = \, {x0}$') +ax.set_xlabel('t') +ax.set_ylabel(fr'$\hat p_n({i})$') ax.legend() plt.show() ``` -Similar to previous examples, the sample path averages for each state converge -to the stationary distribution. +You might like to try changing $i=1$ to either $i=0$ or $i=2$. -### Example 4 +In any of these cases, ergodicity will hold. -Let's look at another example with two states: 0 and 1. +### Example: a periodic chain +Let's look at the following example with states 0 and 1: $$ P := @@ -344,18 +293,21 @@ P := $$ -The diagram of the Markov chain shows that it is **irreducible** +The transition graph shows that this model is irreducible. ```{image} /_static/lecture_specific/markov_chains_II/example4.png :name: mc_example4 :align: center ``` -In fact it has a periodic cycle --- the state cycles between the two states in a regular way. +Notice that there is a periodic cycle --- the state cycles between the two states in a regular way. -This is called [periodicity](https://stats.libretexts.org/Bookshelves/Probability_Theory/Probability_Mathematical_Statistics_and_Stochastic_Processes_(Siegrist)/16%3A_Markov_Processes/16.05%3A_Periodicity_of_Discrete-Time_Chains). +Not surprisingly, this property +is called [periodicity](https://stats.libretexts.org/Bookshelves/Probability_Theory/Probability_Mathematical_Statistics_and_Stochastic_Processes_(Siegrist)/16%3A_Markov_Processes/16.05%3A_Periodicity_of_Discrete-Time_Chains). -It is still irreducible so ergodicity holds. +Nonetheless, the model is irreducible, so ergodicity holds. + +The following figure illustrates ```{code-cell} ipython3 P = np.array([[0, 1], @@ -371,13 +323,13 @@ for i in range(n): axes[i].axhline(ψ_star[i], linestyle='dashed', lw=2, color='black', label = fr'$\psi^*({i})$') axes[i].set_xlabel('t') - axes[i].set_ylabel(fr'$\hat p_t({i})$') + axes[i].set_ylabel(fr'$\hat p_n({i})$') # Compute the fraction of time spent, for each x for x0 in range(n): # Generate time series starting at different x_0 X = mc.simulate(ts_length, init=x0) - p_hat = (X == i).cumsum() / (1 + np.arange(ts_length, dtype=float)) + p_hat = (X == i).cumsum() / (1 + np.arange(ts_length)) axes[i].plot(p_hat, label=f'$x_0 = \, {x0} $') axes[i].legend() @@ -390,6 +342,64 @@ The proportion of time spent in a state can converge to the stationary distribut However, the distribution at each state does not. +### Example: political institutions + +Let's go back to the political institutions mode with six states discussed {ref}`in a previous lecture ` and study ergodicity. + + +Here's the transition matrix. + +$$ + P := + \begin{bmatrix} + 0.86 & 0.11 & 0.03 & 0.00 & 0.00 & 0.00 \\ + 0.52 & 0.33 & 0.13 & 0.02 & 0.00 & 0.00 \\ + 0.12 & 0.03 & 0.70 & 0.11 & 0.03 & 0.01 \\ + 0.13 & 0.02 & 0.35 & 0.36 & 0.10 & 0.04 \\ + 0.00 & 0.00 & 0.09 & 0.11 & 0.55 & 0.25 \\ + 0.00 & 0.00 & 0.09 & 0.15 & 0.26 & 0.50 + \end{bmatrix} +$$ + + +The {ref}`graph ` for the chain shows all states are reachable, +indicating that this chain is irreducible. + +In the next figure, we visualize the difference $\hat p_n(x) - \psi^* (x)$ for each state $x$. + +Unlike the previous figure, $X_0$ is held fixed. + +```{code-cell} ipython3 +P = [[0.86, 0.11, 0.03, 0.00, 0.00, 0.00], + [0.52, 0.33, 0.13, 0.02, 0.00, 0.00], + [0.12, 0.03, 0.70, 0.11, 0.03, 0.01], + [0.13, 0.02, 0.35, 0.36, 0.10, 0.04], + [0.00, 0.00, 0.09, 0.11, 0.55, 0.25], + [0.00, 0.00, 0.09, 0.15, 0.26, 0.50]] + +ts_length = 10_000 +mc = qe.MarkovChain(P) +ψ_star = mc.stationary_distributions[0] +fig, ax = plt.subplots(figsize=(9, 6)) +X = mc.simulate(ts_length) +# Center the plot at 0 +ax.set_ylim(-0.25, 0.25) +ax.axhline(0, linestyle='dashed', lw=2, color='black', alpha=0.4) + + +for x0 in range(len(P)): + # Calculate the fraction of time for each state + p_hat = (X == x0).cumsum() / (1 + np.arange(ts_length)) + ax.plot(p_hat - ψ_star[x0], label=f'$x = {x0+1} $') + ax.set_xlabel('t') + ax.set_ylabel(r'$\hat p_n(x) - \psi^* (x)$') + +ax.legend() +plt.show() +``` + + + ### Expectations of geometric sums Sometimes we want to compute the mathematical expectation of a geometric sum, such as @@ -422,15 +432,15 @@ Benhabib el al. {cite}`benhabib_wealth_2019` estimated that the transition matri $$ P:= -\begin{bmatrix} -0.222 & 0.222 & 0.215 & 0.187 & 0.081 & 0.038 & 0.029 & 0.006 \\ -0.221 & 0.22 & 0.215 & 0.188 & 0.082 & 0.039 & 0.029 & 0.006 \\ -0.207 & 0.209 & 0.21 & 0.194 & 0.09 & 0.046 & 0.036 & 0.008 \\ -0.198 & 0.201 & 0.207 & 0.198 & 0.095 & 0.052 & 0.04 & 0.009 \\ -0.175 & 0.178 & 0.197 & 0.207 & 0.11 & 0.067 & 0.054 & 0.012 \\ -0.182 & 0.184 & 0.2 & 0.205 & 0.106 & 0.062 & 0.05 & 0.011 \\ -0.123 & 0.125 & 0.166 & 0.216 & 0.141 & 0.114 & 0.094 & 0.021 \\ -0.084 & 0.084 & 0.142 & 0.228 & 0.17 & 0.143 & 0.121 & 0.028 + \begin{bmatrix} + 0.222 & 0.222 & 0.215 & 0.187 & 0.081 & 0.038 & 0.029 & 0.006 \\ + 0.221 & 0.22 & 0.215 & 0.188 & 0.082 & 0.039 & 0.029 & 0.006 \\ + 0.207 & 0.209 & 0.21 & 0.194 & 0.09 & 0.046 & 0.036 & 0.008 \\ + 0.198 & 0.201 & 0.207 & 0.198 & 0.095 & 0.052 & 0.04 & 0.009 \\ + 0.175 & 0.178 & 0.197 & 0.207 & 0.11 & 0.067 & 0.054 & 0.012 \\ + 0.182 & 0.184 & 0.2 & 0.205 & 0.106 & 0.062 & 0.05 & 0.011 \\ + 0.123 & 0.125 & 0.166 & 0.216 & 0.141 & 0.114 & 0.094 & 0.021 \\ + 0.084 & 0.084 & 0.142 & 0.228 & 0.17 & 0.143 & 0.121 & 0.028 \end{bmatrix} $$ @@ -458,20 +468,18 @@ P = np.array(P) codes_B = ('1','2','3','4','5','6','7','8') ``` -In this exercise, - -1. show this process is asymptotically stationary and calculate the stationary distribution using simulations. +1. Show this process is asymptotically stationary and calculate an approximation to the stationary distribution. -1. use simulations to demonstrate ergodicity of this process. +1. Use simulations to illustrate ergodicity. ```` ```{solution-start} mc_ex1 :class: dropdown ``` -Solution 1: +Part 1: -Use the technique we learnt before, we can take the power of the transition matrix +One option is to take the power of the transition matrix. ```{code-cell} ipython3 P = [[0.222, 0.222, 0.215, 0.187, 0.081, 0.038, 0.029, 0.006], @@ -489,7 +497,8 @@ codes_B = ('1','2','3','4','5','6','7','8') np.linalg.matrix_power(P, 10) ``` -We find again that rows of the transition matrix converge to the stationary distribution +For this model, rows of $P^n$ converge to the stationary distribution as $n \to +\infty$: ```{code-cell} ipython3 mc = qe.MarkovChain(P) @@ -497,7 +506,7 @@ mc = qe.MarkovChain(P) ψ_star ``` -Solution 2: +Part 2: ```{code-cell} ipython3 ts_length = 1000 @@ -509,16 +518,17 @@ ax.axhline(0, linestyle='dashed', lw=2, color='black', alpha=0.4) for x0 in range(8): # Calculate the fraction of time for each worker - p_hat = (X == x0).cumsum() / (1 + np.arange(ts_length, dtype=float)) + p_hat = (X == x0).cumsum() / (1 + np.arange(ts_length)) ax.plot(p_hat - ψ_star[x0], label=f'$x = {x0+1} $') ax.set_xlabel('t') - ax.set_ylabel(r'$\hat p_t(x) - \psi^* (x)$') + ax.set_ylabel(r'$\hat p_n(x) - \psi^* (x)$') ax.legend() plt.show() ``` -Note that the fraction of time spent at each state quickly converges to the probability assigned to that state by the stationary distribution. +Note that the fraction of time spent at each state converges to the probability +assigned to that state by the stationary distribution. ```{solution-end} ``` @@ -590,7 +600,7 @@ for x0, col in ((0, 'blue'), (1, 'green')): # Generate time series for worker that starts at x0 X = mc.simulate(ts_length, init=x0) # Compute fraction of time spent unemployed, for each n - X_bar = (X == 0).cumsum() / (1 + np.arange(ts_length, dtype=float)) + X_bar = (X == 0).cumsum() / (1 + np.arange(ts_length)) # Plot ax.fill_between(range(ts_length), np.zeros(ts_length), X_bar - p, color=col, alpha=0.1) ax.plot(X_bar - p, color=col, label=f'$x_0 = \, {x0} $') @@ -611,11 +621,12 @@ plt.show() In `quantecon` library, irreducibility is tested by checking whether the chain forms a [strongly connected component](https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.components.is_strongly_connected.html). -However, another way to verify irreducibility is by checking whether $A$ satisfies the following statement: +Another way to test irreducibility is via the following statement: -Assume A is an $n \times n$ $A$ is irreducible if and only if $\sum_{k=0}^{n-1}A^k$ is a positive matrix. +The $n \times n$ matrix $A$ is irreducible if and only if $\sum_{k=0}^{n-1}A^k$ +is a strictly positive matrix. -(see more: {cite}`zhao_power_2012` and [here](https://math.stackexchange.com/questions/3336616/how-to-prove-this-matrix-is-a-irreducible-matrix)) +(see, e.g., {cite}`zhao_power_2012` and [this StackExchange post](https://math.stackexchange.com/questions/3336616/how-to-prove-this-matrix-is-a-irreducible-matrix)) Based on this claim, write a function to test irreducibility. @@ -634,6 +645,8 @@ def is_irreducible(P): return np.all(result > 0) ``` +Let's try it. + ```{code-cell} ipython3 P1 = np.array([[0, 1], [1, 0]]) From 55bcd9055fbb6bcfc54e9aebc0a77dea83e8f33b Mon Sep 17 00:00:00 2001 From: mmcky Date: Tue, 12 Mar 2024 09:30:07 +1100 Subject: [PATCH 2/3] fix: prf labels need prf:ref for sphinx domain --- lectures/markov_chains_II.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lectures/markov_chains_II.md b/lectures/markov_chains_II.md index 30aac518..b8bc51a1 100644 --- a/lectures/markov_chains_II.md +++ b/lectures/markov_chains_II.md @@ -145,7 +145,7 @@ We'll come back to this a bit later. We discussed uniqueness of stationary distributions our {doc}`earlier lecture on Markov chains ` -There we {ref}`stated ` that uniqueness holds when the transition matrix is everywhere positive. +There we {prf:ref}`stated ` that uniqueness holds when the transition matrix is everywhere positive. In fact irreducibility is sufficient: From e0f038103b4f20862dca596bae4285acc0be1a48 Mon Sep 17 00:00:00 2001 From: John Stachurski Date: Tue, 12 Mar 2024 10:18:41 +1100 Subject: [PATCH 3/3] misc --- lectures/markov_chains_II.md | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/lectures/markov_chains_II.md b/lectures/markov_chains_II.md index b8bc51a1..67def2d7 100644 --- a/lectures/markov_chains_II.md +++ b/lectures/markov_chains_II.md @@ -48,7 +48,6 @@ Let's start with some standard imports: ```{code-cell} ipython3 import matplotlib.pyplot as plt -plt.rcParams["figure.figsize"] = (11, 5) # set default figure size import quantecon as qe import numpy as np ``` @@ -249,8 +248,6 @@ Hence we expect that $\hat p_n(x) \approx \psi^*(x)$ when $n$ is large. The next figure shows convergence of $\hat p_n(x)$ to $\psi^*(x)$ when $x=1$ and $X_0$ is either $0, 1$ or $2$. -The figure shows convergence to the stationary distribution regardless of the -initial condition $X_0$. ```{code-cell} ipython3 @@ -260,23 +257,23 @@ P = np.array([[0.971, 0.029, 0.000], ts_length = 10_000 mc = qe.MarkovChain(P) ψ_star = mc.stationary_distributions[0] -i = 1 # We study convergence to psi^*(x) when x = i +x = 1 # We study convergence to psi^*(x) fig, ax = plt.subplots() -ax.axhline(ψ_star[i], linestyle='dashed', color='black', - label = fr'$\psi^*({i})$') +ax.axhline(ψ_star[x], linestyle='dashed', color='black', + label = fr'$\psi^*({x})$') # Compute the fraction of time spent in state 0, starting from different x_0s for x0 in range(3): X = mc.simulate(ts_length, init=x0) - p_hat = (X == i).cumsum() / (1 + np.arange(ts_length)) - ax.plot(p_hat, label=fr'$\psi^*({i})$ when $x_0 = \, {x0}$') + p_hat = (X == x).cumsum() / (1 + np.arange(ts_length)) + ax.plot(p_hat, label=fr'$\hat p_n({x})$ when $X_0 = \, {x0}$') ax.set_xlabel('t') -ax.set_ylabel(fr'$\hat p_n({i})$') +ax.set_ylabel(fr'$\hat p_n({x})$') ax.legend() plt.show() ``` -You might like to try changing $i=1$ to either $i=0$ or $i=2$. +You might like to try changing $x=1$ to either $x=0$ or $x=2$. In any of these cases, ergodicity will hold.