-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsta_theory_cheatsheet.tex
304 lines (227 loc) · 20 KB
/
sta_theory_cheatsheet.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
\documentclass[10pt,landscape]{article}
\usepackage{multicol}
\usepackage{calc}
\usepackage{ifthen}
\usepackage[landscape]{geometry}
\usepackage{graphicx}
\usepackage{amsmath, amssymb, amsthm}
\usepackage{latexsym, marvosym}
\usepackage{pifont}
\usepackage[sc]{mathpazo} % use mathpazo for math fonts
\usepackage{lscape}
\usepackage{graphicx}
\usepackage{array}
\usepackage{booktabs}
\usepackage[bottom]{footmisc}
\usepackage{tikz}
\usetikzlibrary{shapes}
\usepackage{pdfpages}
\usepackage{wrapfig}
\usepackage{enumitem}
\setlist[description]{leftmargin=0pt}
\usepackage{xfrac}
\usepackage[pdftex,
pdfauthor={Gejun Zhu},
pdftitle={Statistical Analysis Cheatsheet},
pdfsubject={One page both sides cheatsheet for statistics analysis comprehensive exam.},
pdfkeywords= {statistics} {cheatsheet} {pdf} {cheat} {sheet} {formulas}{equations}
]{hyperref}
\usepackage{relsize}
\usepackage{rotating}
\newcommand\independent{\protect\mathpalette{\protect\independenT}{\perp}}
\def\independenT#1#2{\mathrel{\setbox0\hbox{$#1#2$}%
\copy0\kern-\wd0\mkern4mu\box0}}
\newcommand{\noin}{\noindent}
\newcommand{\logit}{\textrm{logit}}
\newcommand{\var}{\textrm{Var}}
\newcommand{\cov}{\textrm{Cov}}
\newcommand{\corr}{\textrm{Corr}}
\newcommand{\N}{\mathcal{N}}
\newcommand{\Bern}{\textrm{Bern}}
\newcommand{\Bin}{\textrm{Bin}}
\newcommand{\Beta}{\textrm{Beta}}
\newcommand{\Gam}{\textrm{Gamma}}
\newcommand{\Expo}{\textrm{Expo}}
\newcommand{\Pois}{\textrm{Pois}}
\newcommand{\Unif}{\textrm{Unif}}
\newcommand{\Geom}{\textrm{Geom}}
\newcommand{\NBin}{\textrm{NBin}}
\newcommand{\Hypergeometric}{\textrm{HGeom}}
\newcommand{\Mult}{\textrm{Mult}}
\geometry{top=.3in,left=.2in,right=.2in,bottom=.3in}
\pagestyle{empty}
\makeatletter
\renewcommand{\section}{\@startsection{section}{1}{0mm}%
{-1ex plus -.5ex minus -.2ex}%
{0.5ex plus .2ex}%x
{\normalfont\normalsize\bfseries}}
\renewcommand{\subsection}{\@startsection{subsection}{2}{0mm}%
{-1explus -.5ex minus -.2ex}%
{0.5ex plus .2ex}%
{\normalfont\normalsize\bfseries}}
\renewcommand{\subsubsection}{\@startsection{subsubsection}{3}{0mm}%
{-1ex plus -.5ex minus -.2ex}%
{1ex plus .2ex}%
{\normalfont\small\bfseries}}
\makeatother
\setcounter{secnumdepth}{0}
\setlength{\parindent}{0pt}
\setlength{\parskip}{0pt plus 0.5ex}
% -----------------------------------------------------------------------
\begin{document}
\raggedright
\footnotesize
\begin{multicols}{3}
% multicol parameters
% These lengths are set only within the two main columns
%\setlength{\columnseprule}{0.25pt}
\setlength{\premulticols}{1pt}
\setlength{\postmulticols}{1pt}
\setlength{\multicolsep}{1pt}
\setlength{\columnsep}{2pt}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% TITLE
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{center}
\Large{\textbf{Statistical Theory Cheatsheet}} \\
\end{center}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% ATTRIBUTIONS
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\scriptsize
Compiled by Gejun Zhu (zhug3@miamioh.edu) in preparation for the theory comprehensive exam by using William Chen's \href{http://wzchen.com/probability-cheatsheet}{formula sheet template}.
\begin{center}
Last Updated \today
\end{center}
% Cheatsheet format from
% http://www.stdout.org/$\sim$winston/latex/
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% BEGIN CHEATSHEET
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%
%%%% Regression %%%%
%%%%%%%%%%%%%%%%%%%%%%%
%
% \begin{align*}
% ({\bf A} \cup {\bf B})^c \equiv {\bf A^c} \cap {\bf B^c} \\
% ({\bf A} \cap {\bf B})^c \equiv {\bf A^c} \cup {\bf B^c}
% \end{align*}
\section{Probability and Distributions} \smallskip \hrule height 1pt \smallskip
\begin{description}
\item[Section 1] - probability and distributions \\
Let $Y=g(X)$, where $g(x)$ is a one-to-one differentiable function. $f_Y(y) = f_X(g^{-1}(y)) |\frac{dx}{dy}|$ for $y \in S_Y$ . \\
If $X$ and $Y$ are independent random variables, then $M_{X+Y} (t) = M_{X}(t)M_{Y}(t)$.\\
If $X$ and $Y$ are independent random variables, then $\rho(X, Y) = 0$. \\
If $X$ is a discrete random variable, then mgf of $X$ is $M_{X}(t) = \sum P(X = k)e^{kt}$. \\
If $X \sim Gamma(a_1,b)$ and $Y \sim Gamma(a_2,b)$, $X+Y \sim Gamma(a_1+a_2,b)$ if $X$ and $Y$ are independent. \\
If $X \sim Gamma(a_1,b)$, then $cX\sim Gamma(a_1, cb)$. \\
$X \sim Gamma(a,b)$, if $a=1$ then $X\sim exponential(b)$; if $b=2$ then $X\sim \chi_{(2a)}^2$. \\
If $X_i \sim Laplace(\mu, b)$ then $\frac{2 \sum_{i=1}^n |X_i-\mu|}{b} \sim \chi^2(2n) $.\\
If $X \sim Laplace(0, b)$ then $|X| ~ Exponential(b^{-1})$.\\
If $X \sim Exp(\lambda)$ then $X \sim Gamma(1, \lambda)$.\\
If $X \sim Gamma(k, \theta)$ where $k$ is the shape parameter and $\theta$ is a scale parameter, then $E(X) = k\theta$, $var(X) = k\theta^2$; if $X \sim Gamma(\alpha, \beta)$ where $\alpha$ is the shape parameter. $\beta$ is the rate parameter, then $E(X) = \frac{\alpha}{\beta} $, $var(X) = \frac{\alpha}{\beta^2}$.\\
\end{description}
%%%%%%%%%%%%%%%
\begin{description}
\item[Section 2] - Multivariate Distributions\\ % \boldsymbol{}
Covariance: $EX = \mu_1$, $EY = \mu_2$, $Cov(X, Y) = E(XY) - \mu_1\mu_2$, \\
Correlation coefficient: $\rho = \frac{cov(X, Y)}{\sigma_1\sigma_2}$ \\
$E(X_2) = E[E(X_2|X_1)]$, $Var(X_2) = E[Var(X_2|X_1)] + Var(E(X_2|X_1)) \geq Var(E(X_2|X_1))$.\\
Moment generate function: $M = E(e^{tX})$, $\mu = E(X) = M'(0)$, $\sigma^2 = E(X^2) - (EX)^2 = M''(0) - [M'(0)]^2$\\
$Z_n \sim \chi^2(n)$, $M_{Z_n} = (1 - 2t)^{-\frac{n}{2}}, t< \frac{1}{2}$, $w_n = \frac{Z_n}{n^2}$, $M_{w_n}(t) = E[e^{tw_n}] = E[e^{t\frac{Z_n}{n^2}}] = M_{Z_n}(\frac{t}{n^2}) = (1-\frac{2t}{n^2})^{-\frac{n}{2}}$ for $\frac{t}{n^2} < \frac{1}{2}$\\
Negative binomial distribution: $y = \#$ of failures before the $r^{th}$ success. $p(y) = {{y+r-1}\choose{r-1}}p^r(1-p)^y$ \\
Poisson distribution: $y = \#$ of successes in a fixed length of time, $p(y) = \frac{\lambda^x e^{-\lambda}}{x!}$;\\
Gamma distribution: $y =$ waiting time required until the $\alpha^{th}$ success. $f(y) = \frac{1}{\Gamma(\alpha) \beta^\alpha} x^{\alpha - 1} e^{\frac{-x}{\beta}}$. Special cases: (1) exponential distribution - $\alpha = 1, \beta = \frac{1}{\lambda}$; (2) Chi-square distribution - $\alpha = \frac{r}{2}$, $\beta = 2$.\\
\end{description}
\section{Statistical Inference}\smallskip \hrule height 1pt \smallskip
%\subsection{Simple Linear Regression}
\begin{description}
\item[Inequalities] - Important Inequalities \\
\underline{Markov's Inequality}: $u(X)$ non-negative, $E(u(X))$ exists, $P[u(X)\geq c] \leq \frac{E(u(X))}{c}$.\\
\underline{Chebyshev's Inequality}: $P(|X - \mu| \geq k\sigma) \leq \frac{1}{k^2}$
\end{description}
\begin{description}
\item[Distributions]: \textbf{Some facts }\\
Sample mean: $\bar{X} = \frac{\sum X_i}{n}$ , Sample variance: $S^2 = \frac{\sum X_i^2 - n\bar{X}^2}{n-1}$, $E(S^2) = \sigma^2$; another unbiased estimator for $\sigma^2$ in normal distribution is $(\bar{x}^2 - \dfrac{S^2}{n})$ [also think about $Y_n$]\\
$E(\bar{X}) = \mu$, $var(\bar{X}) = \frac{\sigma^2}{n}$, $\bar{X} \sim N(\mu, \sigma^2/n)$, $\frac{(n-1)S^2}{\sigma^2} \sim \chi_{(n-1)}^2$, $\bar{X}\ \&\ S^2$ are indep.; \\
$\sum a_iX_i$ and $\sum b_iX_i$ are indep. iff $\sum a_ib_i = 0$;\\
$\frac{\sum (X_i - \mu )^2}{\sigma^2} = \frac{(n-1)S^2}{\sigma^2} + \frac{(\bar{X} - \mu)^2}{\sigma^2/n}$\\
$\dfrac{\bar{X} - \mu}{\sigma/\sqrt{n}} \sim N(0, 1)$ by CLT, $\dfrac{\bar{X} - \mu}{s/\sqrt{n}} \sim T_{n-1}$\\
%$\frac{\bar{X} - \mu}{\sigma/\sqrt{n}} \sim N(0, 1)$, $\frac{\bar{X} - \mu}{S/\sqrt{n}} \sim t(n-1)$,
%\item[Properties of SLR]
\end{description}
\begin{description}
\item[Order Statistics] - \textbf{Order Statistics}\\ % \boldsymbol{}
$g_k(y_k) = \dfrac{n!}{(k-1)!(n-k)!}[F(y_k)]^{k-1}[1-F(y_k)]^{n-k}f(y_k)$, $a< y_k <b$; 0 elsewhere.
\end{description}
\begin{description}
\item[Section 4] - \textbf{Confidence Interval Estimation}\\ % \boldsymbol{}
\underline{Error types}: Type I - reject $H_0$ while $H_0$ is true, $P_{\theta}(X\in R)$; Type II - fail to reject $H_0$ while $H_0$ is false. $\alpha = P(Type\ I\ error),\ \beta = P(Type\ II\ error)$, $Power = 1 - \beta$. \\
\underline{Level of significance}: the probability of making type I error - reject $H_0$ when $H_0$ is true. \\
\underline{Power function}: $K(\theta) = P_{\theta}[(X_1, X_2, ..., X_n) \in C | \theta]$, $\theta \in \omega_1$. The \textit{power function} of a hypothesis test with rejection region $R$ is the function of $\theta$ defined by $\beta(\theta) = P_{\theta}(X\in R)$. In other words, the power of a hypothesis test is the probability of rejecting $H_0$ when $H_a$ is true. $Power = P(X\in R|\theta = \theta_a) = 1 - \beta$.\\
\underline{Level of significance} : $\alpha = max_{\theta \in \omega_0} K(\theta)$ or $\alpha = P(Type\ I\ error) = P(rejecting\ H_0\ when\ H_0\ is\ true) = P(X\in R | \theta = \theta_0)$\\
\underline{CI for difference in Means}: $X_i \overset{iid} \sim N(\mu_1, \sigma^2)$, $Y_i \overset{iid} \sim N(\mu_2, \sigma^2)$, where $\sigma^2$ is unknown. $S_p^2 = \frac{(n-1)S_1^2 + (m-1)S_2^2}{n+m-2}$, $\frac{(\bar{X} - \bar{Y}) - (\mu_1 - \mu_2)}{\sqrt{S_p^2(\frac{1}{n} + \frac{1}{m})}} \sim T(n+m-2)$. \\
Note: $x > \theta \Rightarrow I(x - \theta)$
\end{description}
\begin{description} %%% \overset{P}{\rightarrow} %%%
\item[Section 5] - \textbf{Consistency and Limiting Distributions} \\
\underline{Convergence in Probability}: $X_n \overset{P}{\rightarrow} X$ if for every $\epsilon > 0$, $lim_{n\rightarrow\infty} P(|X_n - X| \geq \epsilon) = 0$ or ($lim_{n\rightarrow\infty} P(|X_n - X| < \epsilon) = 1$ ).\\
\underline{Degenerate r.v.}: $p(x) = 1, if\ x=a; p(x) = 0,\ if\ x\neq a$ and $F(x) = 0\ if\ x<a; F(x) = 1\ if\ x\geq a$. We write $X_b \overset{P}{\rightarrow} a$. \\
\underline{Consistency}: The statistic $T_n$ is a consistent estimator for $\theta$ iff $T_n \overset{P}{\rightarrow} \theta$. [find limit of the estimator $E(\hat{\theta}) \rightarrow \theta$ and $var(\hat{\theta}) \rightarrow 0$]\\
\underline{Convergence in Distribution}: $X_n \overset{D}{\rightarrow} X$ iff $lim_{n\rightarrow \infty} F_n(x) = F(x)$. F(x) is said to be the limiting distribution or asymptotic distribution of X; \\
\underline{Limiting distribution}: $X_n \rightarrow X$ if and only if $lim_{n\rightarrow \infty} F_n(x) = F(x)$ where $F_n(x)$ is the cdf of $X_n$. $F(x)$ is said to be the limiting distribution of $X_n$. \\
\underline{Theorem 5.2.10}: Suppose $X_n$ has m.g.f. $M_{X_n}(t)$ that exists for $-h\leq x \leq h$ for all $n$. Let $X$ has m.g.f. $M(t)$ which exists for $|t|\leq h_1 \leq h$. If $lim_{n\rightarrow \infty} M_{X_n}(t) = M(t)$ for $|t| \leq h_1$, then $X_n \overset{D}{\rightarrow} X$. \\
\underline{m.g.f technique}: (1). $lim_{n\rightarrow \infty} (1 + \frac{b}{n} + \frac{\phi (n)}{n})^{cn} = lim_{n\rightarrow \infty} (1+\frac{b}{n})^{cn} = e^{bc}$ where $b$ and $c$ are constants and $lim_{n\rightarrow \infty} \phi(n) = 0$. (2). $e^x = 1 + x + \frac{x^2}{2} + ... + \frac{x^m}{m!} + ...$; \\
\underline{CLT}: $X_1, X_2, ..., X_n \overset{iid}{\sim} f(x)$ with mean $\mu$ and variance $\sigma^2$, $y_n = \frac{\bar{X} - \mu }{\sigma / \sqrt{n}} \overset{D}{\rightarrow} z$, where $z \sim N(0, 1)$. \\
\end{description}
\begin{description} %%% \overset{iid}{\sim}
\item[Section 6] - \textbf{Maximum Likelihood Estimation }\\
\underline{Consistent}: If there is a unique solution to the likelihood equation $\frac{\partial}{\partial \theta}L(\theta) = 0$, then $\hat{\theta} \overset{P}{\rightarrow} \theta $ ($\hat{\theta}$ is consistent for $\theta$).\\
\underline{Score function}: $\frac{\partial ln(f(x; \theta))}{\partial \theta}$;\\
\underline{Fisher Information}: $I(\theta)= var(\frac{\partial ln(f(x; \theta))}{\partial \theta}) = -E(\frac{\partial^2 ln(f(x; \theta))}{\partial \theta^2})$;\\
\underline{Efficient}: $y$ is unbiased for $\theta$, $y$ is efficient for $\theta$ iff $var(y) = [nI(\theta)]^{-1}$. In general, $var(y) \geq [nI(\theta)]^{-1}$ where $y$ is unbiased for $\theta$. \\
\underline{Efficiency}: The efficiency of an unbiased estimator is given by the ratio $\frac{RCLB}{var(\hat{\theta})}$, where $RCLB = [nI(\theta)]^{-1}$. \\
\underline{Relative Efficiency}: Relative efficiency of unbiased estimators $\hat{\theta}_1$ to $\hat{\theta}_2$ is $\frac{var(\hat{\theta}_2)}{var(\hat{\theta}_1)}$. If $\theta_2$ is biased, then relative efficiency is $\frac{var(\theta_2) + [bias(\theta_2)]^2}{var(\theta_1)}$ where $bias(\theta_2) = E(\theta_2) - \theta$.\\
\underline{Theorem 6.1.2}: Suppose $\hat{\theta}$ is the MLE of $\theta$ and $g(\theta)$ is a function of $\theta$. Then MLE of $g(\theta)$ is $\hat{g(\theta)} = g(\hat{\theta})$. \\
\underline{Theorem 6.2.1 (Rao-Cramer Lower Bound)}: $X_1, X-2, ..., X_n \overset{iid}{\sim} f(x; \theta)$ for $\theta \in \Omega$. Let $y = u(X_1,X_2, ..., X_n)$ be a statistic with mean $E(Y) = k(\theta)$. Then $var(Y) \geq \frac{[k'(\theta)]^2}{nI(\theta)}$. \\
\underline{MVUE}: $\hat{\theta} = u(X_1, X_2, ..., X_n)$ is a minimum variance unbiased estimator for $\theta$ iff $E(\hat{\theta})$ and $var(\hat{\theta})$ is less than or equal to the variance of every other unbiased estimator. \\
\underline{Theorem}: If $\hat{\theta}$ is asymptotically unbiased for $\theta$ and $var(\hat{\theta}) \rightarrow 0$ as $n\rightarrow \infty$, then $\hat{\theta} \overset{P}{\rightarrow} \theta$.
\end{description}
\begin{description}
\item[Section 7] - \textbf{Measure of Quality of Estimators}\\
\underline{MVUE}: $\hat{\theta} = u(X_1, X_2, ..., X_n)$ is a minimum variance unbiased estimator for $\theta$ iff $E(\hat{\theta}) = \theta$ and $var(\hat{\theta})$ is less than or equal to the variance of every other unbiased estimator. Relative efficiency of MVUE to any other unbiased estimator must be $\geq 1$. MUVE would be consistent if $var(\hat{\theta}) \rightarrow 0$ as $n\rightarrow\infty$. \\
\underline{Sufficient Statistics}: Let $X_1, X_2, ..., X_n$ be a random sample from $f(x;\theta)$, $y_1 = u(X_1,X_2, ..., X_n)$ is sufficient for $\theta$ iff $\frac{\Pi(f(x_i;\theta))}{g_1(y_1;\theta)} = H(x_1, x_2, ..., x_n)$ where $g_1$ is a marginal pdf for $y_1$.\\
\underline{Factorization Theorem}: $y_1 = u(x_1, x_2,..., x_n)$ is sufficient for $\theta$ iff $\Pi_{i=1}^n f(x_i; \theta) = k_1(u(x_1, x_2, ..., x_n); \theta) k_2(x_1, x_2, ..., x_n)$ where $k_2(x_1, x_2, ..., x_n)$ does not depend on $\theta$. \\
\underline{Theorem 7.3.2}: $X_1, X_2, ..., X_n \overset{iid}{\sim} f(x; \theta)$. If a sufficient statistic $y_1 = u(X_1,X_2, ..., X_n)$ for $\theta$ exists and if an MLE $\hat{\theta}$ exists uniquely, then $\hat{\theta}$ is a function of $y_1$. \\
\underline{Theorem}: Suppose $y_1 = u(X_1,X_2, ..., X_n)$ is a sufficient statistic for $\theta$. Let $z = u(y_1)$ be a 1-to-1 transformation not involving $\theta$. $z$ is also sufficient for $\theta$. \\
\underline{\textbf{Rao-Blackwell Theorem}}: (1). $y_1 = u_1(x_1, x_2, ..., x_n)$ be sufficient for $\theta$; (2) $y_2 = u_2(x_1, x_2, ..., x_n)$ be unbiased for $\theta$; (3). $\phi(y_1) = E(y_2|y_1)$. Then, (1) $\phi(y_1)$ is a statistic; (2) $\phi(y_1)$ is a function of $y_1$ alone; (3) $\phi(y_1)$ is unbiased for $\theta$; (4). $\phi(y_1)$ has variance $< \sigma_{y_2}^2$. \\
\underline{Completeness}: Suppose $Z \sim h(z; \theta)$, a member of a family of p.d.f's (p.m.f's): $\{h(z;\theta), \theta \in \Omega \}$. If $E(u(z)) = 0, \forall \theta \in \Omega$ implies that $u(z) = 0$ except on a set of points that has probability 0 for each $h(z; \theta), \theta \in \Omega$, then the family $\{h(z;\theta), \theta \in \Omega \}$ is called a complete family of density (mass) functions. \\
\underline{Lehmann and Scheffe (MVUE)}: Let $Y_1 = u_1(X_1,X_2, ..., X_n)$ be sufficient for $\theta$ and $\{g_1(y_1, \theta): \theta \in \Omega \}$ be a complete family of densities (or p.m.f's). If there is a function if $Y_1$ which is unbiased for $\theta $, then this function of $Y_1$ is the unique MVUE for $\theta$. \\
\underline{Find MVUE}: (1) Find a sufficient statistic $t$; (2) show that the family of distributions of $t$ is complete [shortcut - theorem 7.5.2]; (3) Find a crude unbiased estimator; (4) evaluate \\
\underline{Exponential Family}: $f(x; \theta) = exp\{p(\theta)k(x) + \delta (x) + q(x)\}$ where $\delta(x)$ dose not depend on $\theta$, $p(\theta)$ is nontrivial continuous, $k'(x) \neq 0$.\\
$X_1,X_2, ..., X_n \overset{iid}{\sim} f(x; \theta)$, a regular case of exponential class with $y_1 = \sum k(x_i)$. Then, (1) $g_1(y_1; \theta) = R(y_1)exp\{p(\theta)y_1 + nq(\theta)\}$; (2) $E(y_1) = -n\frac{q'(\theta)}{p'(\theta)}$; (3) $var(y_1) = n\frac{1}{[p'(\theta)]^3}\{p"(\theta)q'(\theta) - q"(\theta)p'(\theta)\}$ \\
\underline{\textbf{Theorem 7.5.2}}: $X_1,X_2, ..., X_n \overset{iid}{\sim} f(x; \theta)$, a regular case of exponential class with $\Omega = \{\theta: \gamma < \theta < \delta \}$, $y_1 = \sum k(x_i)$ is \textbf{sufficient} for $\theta$ and the family $\{g_1(y_1; \theta): \gamma < \theta< \delta \}$ is \textbf{complete}. \\
\underline{Theorem 7.4.1}: $X_1, X_2, ..., X_n$, a random sample from $f(x;\theta)$, $y_1 = u_1(X_1, X_2, ..., X_n)$ complete sufficient for $\theta$ and $E(\phi(y_1)) = \alpha(\theta)$, then $\phi(y_1)$ is unique MVUE for $\alpha(\theta)$.\\
unique MVUE = sufficient + complete (Theorem 7.5.2) + unbiased \\
\end{description}
\begin{description}
\item[Section 8] \textbf{Most powerful tests }\\
\underline{Neyman-Pearson Theorem}: A \textbf{best critical region} of size $\alpha$ for testing $H_0: \theta = \theta '$ v.s $H_1: \theta = \theta "$ (both simple) is such that \\
(1) $\dfrac{L(\theta ' ; x_1, x_2, ..., x_n)}{L(\theta " ; x_1, x_2, ..., x_n)}\leq k$ for each $(x_1, x_2, ..., x_n) \in C$\\
(2) $\dfrac{L(\theta ' ; x_1, x_2, ..., x_n)}{L(\theta " ; x_1, x_2, ..., x_n)}\geq k$ for each $(x_1, x_2, ..., x_n) \in C^c$\\
\textbf{(3)} $P((x_1, x_2, ..., x_n) \in C; H_0) = \alpha$ \\
\underline{Uniformly Most Powerful Test}: C is a uniformly most powerful critical region of size $\alpha$ if C is a best critical region of size $\alpha$ for testing $H_0$ against \textbf{each} simple hypothesis in $H_1$. Because the critical region $C$ defines a test that is most powerful against each simple alternative $H_1$, this is a uniformly most powerful test, and $C$ is a uniformly most powerful critical region of size $\alpha$.\\
\underline{Likelihood Ratio Test}:
Let (1) $L(\hat{\omega})$ denote the maximum of the likelihood function with respect to $\theta$ when $\theta$ is in the null parameter space $\omega$. (2) $L(\hat\Omega)$ denote the maximum of the likelihood function with respect to $\theta$ when $\theta$ is in the entire parameter space $\Omega$. Then, the likelihood ratio is the quotient: \\
\begin{center}
$\lambda = \frac{L(\hat{\omega})}{L({\hat{\Omega}})} = \frac{L(\hat{\theta}_0|x)}{L(\hat{\theta}|x)}$.\\ %= \frac{max_{\theta \in \Theta_0}L(\theta)}{max_{\theta \in \Theta } L(\theta)}
\end{center}
p.s: MLE estimated $\mu$ and $\sigma^2$ for normal distribution: $\hat{\mu} = \bar{x}$, $\hat{\sigma}^2 = \frac{1}{n}\sum (x_i - \bar{x})^2$; $\hat{\mu} = (n+m)^{-1}\{\sum x_i + \sum y_i \}$, $\hat{\sigma}^2 = (n+m)^{-1} \{\sum (x_i - \bar{x})^2 + \sum (y_i - \bar{y})^2\}$.\\
\underline{Neyman-Pearson Lamma}: If a critical value is chosen so that $P_{\theta_0}(\Lambda \leq c) = \alpha$, then the test with decision rule \\
\begin{center}
Reject $\theta = \theta_0$ in favor of $\theta = \theta_1$ when $\Lambda \leq c$
\end{center}
is a uniformly most powerful test of size $\alpha$.
\end{description}
\end{multicols}
\end{document}