diff --git a/SAS/log-analysis.pdf b/SAS/log-analysis.pdf
index 7116cb7..3324ad2 100644
Binary files a/SAS/log-analysis.pdf and b/SAS/log-analysis.pdf differ
diff --git a/SAS/log-cleaning.pdf b/SAS/log-cleaning.pdf
index 5bb636a..afd82b3 100644
Binary files a/SAS/log-cleaning.pdf and b/SAS/log-cleaning.pdf differ
diff --git a/SAS/output-analysis.pdf b/SAS/output-analysis.pdf
index 41aad5a..253d118 100644
Binary files a/SAS/output-analysis.pdf and b/SAS/output-analysis.pdf differ
diff --git a/SAS/output-cleaning.pdf b/SAS/output-cleaning.pdf
index 5e4cb9f..4c3f582 100644
Binary files a/SAS/output-cleaning.pdf and b/SAS/output-cleaning.pdf differ
diff --git a/code/analysis.SAS b/code/analysis.SAS
index 28effbf..5959140 100644
--- a/code/analysis.SAS
+++ b/code/analysis.SAS
@@ -36,13 +36,19 @@ RUN;
 * Plot outcome time series;
 *******************************************************************************;
 
-FOOTNOTE;
+FOOTNOTE; * Remove the footnote so it isn't on the graphs;
 
+* Plot the time series of log grain price over time. This makes a separate
+	time series line for each grain.;
 TITLE2 "PRICE PER BUSHEL OF GRAINS OVER TIME";
 PROC SGPLOT DATA = HOME.GRAINS;
 	SERIES X = YEAR Y = LPE / GROUP = GRN;
 RUN;
 
+* Color the points of the time series by the President's political party. The
+	default colors are already red and blue so we don't need to change them!
+	Also plots a gray line underneath the points, since the JOIN option for
+	the SCATTER statement will not connect the points in order.;
 TITLE2 "GRAIN PRICES AND PRESIDENT'S POLITICAL PARTY OVER TIME";
 PROC SGPANEL DATA = HOME.GRAINS;
 	PANELBY GRN;
@@ -51,14 +57,22 @@ PROC SGPANEL DATA = HOME.GRAINS;
 		MARKERATTRS = (SYMBOL = CIRCLEFILLED);
 RUN;
 
-TITLE2 "PERCENT CHANGE DISTRIBUTION BY PRESIDENT'S POLITICAL PARTY";
+* Make a boxplot of log price vs. president's political party. This ignores the
+	time series information, but can tell us if either party has more high or
+	low years compared to the other.;
+TITLE2 "LOG PRICE DISTRIBUTION BY PRESIDENT'S POLITICAL PARTY";
 PROC SGPANEL DATA = HOME.GRAINS;
 	PANELBY GRN;
-	HBOX PCT / GROUP = PARTY;
+	HBOX LPE / GROUP = PARTY;
 RUN;
 
 ODS GRAPHICS / WIDTH = 6in HEIGHT = 9in;
 
+* Make a scatterplot of the log price vs each covariate, ignoring the time
+	series component of the data. There is not an easy way to connect the points
+	like a phase portrait using PROC SGPLOT.;
+* I divided this into two plots so they would fit on one page nicer. In the final
+	manuscript they could be put side by side.;
 TITLE2 "SCATTERPLOTS OF PRICE VS COVARIATES";
 PROC SGSCATTER DATA = HOME.GRAINS;
 	PLOT LPE * (ACR HVT LNR PRD YLD) / REG
@@ -74,6 +88,8 @@ RUN;
 * Plots of covariates across time;
 *******************************************************************************;
 
+* Plot the time series of each covariate, to assess how they change. I split
+	this one into two plots to prevent the plots being too small as before.;
 TITLE2 "CHANGE IN COVARIATES ACROSS TIME";
 PROC SGSCATTER DATA = HOME.GRAINS;
 	PLOT (ACR HVT LNR PRD YLD) * YEAR /
@@ -89,7 +105,7 @@ RUN;
 * Univariate analyses;
 *******************************************************************************;
 
-* Univariate analysis of main outcome by grain type;
+* Univariate analysis of main outcome (log price) by grain type;
 TITLE2 "UNIVARIATE SUMMARY OF GRAIN DATA OVER TIME";
 
 ODS GRAPHICS / WIDTH = 4in HEIGHT = 4in;
@@ -103,13 +119,19 @@ RUN;
 * Bivariate analyses of price and covariates, ignoring time;
 *******************************************************************************;
 
-* Correlations;
+TITLE2 "BIVARIATE CORRELATIONS ACROSS NUMERICAL VARIABLES";
+* Correlations -- check to see which covariates are correlated with the outcome,
+	and which are correlated with each other and should not be modeled
+	together.;
 PROC CORR PEARSON SPEARMAN DATA = HOME.GRAINS;
 	VAR LPE ACR HVT LNR PRD YLD INFL PWR TEMP VALUE;
 	BY GRN;
 RUN;
 
-* Mean difference in LPE by party;
+TITLE2 "SUMMARY STATISTICS BY PRESIDENTIAL PARTY AND GRAIN";
+* Mean difference in LPE by party -- proc corr does not have a point biserial
+	option, so we can check the difference/overlap in means and standard errors
+	to assess if party seems to impact log price for any of the grains.;
 PROC MEANS DATA = HOME.GRAINS MEAN STDERR MEDIAN RANGE NWAY;
 	VAR LPE;
 	CLASS PARTY;
@@ -120,6 +142,13 @@ RUN;
 * Simple and multiple OLS regression models;
 *******************************************************************************;
 
+TITLE2 "SIMPLE LINEAR REGRESSION MODELS";
+* Write a macro to fit all regression models of the form
+		MODEL COVAR GRN COVAR * GRN
+ 	without having to type out all of the PROC GLM statements. This model will
+ 	be parametrized without an intercept, and will generate all appropriate
+ 	diagnostic plots for the model.;
+
 %MACRO ALLSIMPLE(DAT = , RESP = , PRED = );
 	%LET N = %SYSFUNC(COUNTW(&DAT));
 	%DO I = 1 %TO &N;
@@ -136,12 +165,18 @@ RUN;
 	PRED = ACR PRD INFL TEMP PWR YEAR
 );
 
+* Fit the same model that was used as before, but with party as a covariate.
+	Party needs to be in the class statement, and is the only categorical
+	variable, so it wasn't worth modifying the above macro to use party
+	correctly and I did it manually.;
 PROC GLM DATA = HOME.GRAINS PLOTS = ALL;
 	CLASS GRN PARTY;
 	MODEL LPE = GRN | PARTY;
 RUN;
 
-* 1866 FULL MODEL;
+TITLE2 "1866 FULL MODEL";
+* 1866 FULL MODEL: this model includes all non-correlated predictors that were
+	measured in 1866.;
 PROC MIXED DATA = HOME.GRAINS PLOTS = ALL;
 	CLASS GRN PARTY;
 	MODEL LPE =
@@ -151,12 +186,15 @@ PROC MIXED DATA = HOME.GRAINS PLOTS = ALL;
 	;
 RUN;
 
-* FULL MODEL WITH TEMP (1880 MODEL);
+TITLE2 "1880 FULL MODEL";
+* FULL MODEL WITH TEMP (1880 MODEL): this model is the same as the previous
+	model, but also includes the temperature anomaly information. Consequently,
+	it only uses data from 1880 onwards (even less for sorghum).;
 PROC MIXED DATA = HOME.GRAINS PLOTS = ALL;
 	CLASS PARTY GRN;
 	MODEL LPE =
-		GRN HVT PRD INFL PWR YEAR
-		GRN*HVT GRN*PRD GRN*INFL GRN*PWR GRN*YEAR GRN*PARTY /
+		GRN HVT PRD INFL PWR YEAR PARTY TEMP
+		GRN*HVT GRN*PRD GRN*INFL GRN*PWR GRN*YEAR GRN*PARTY TEMP*PARTY/
 		NOINT SOLUTION
 	;
 RUN;
@@ -165,6 +203,10 @@ RUN;
 * GLS multiple regression analysis;
 *******************************************************************************;
 
+* Take the better fitting (by AIC) of the two previous models, and run a model
+	that can account for correlation using generalized least squares.
+	This model assumes exchangeable correlations between each of the time points.;
+
 PROC MIXED DATA = HOME.GRAINS PLOTS = ALL;
 	CLASS PARTY GRN;
 	MODEL LPE = HVT PRD INFL PWR YEAR GRN GRN*HVT GRN*PRD /
@@ -176,32 +218,46 @@ RUN;
 * Simple forecasting;
 *******************************************************************************;
 
+* Now instead of just using regression models, we can try to fit a more
+	flexible forecasting model using PROC ARIMA.
+	First, we need a time variable that is actually a SAS date, so we create 
+	that first.;
 DATA TS_DAT;
 	SET HOME.GRAINS;
 	T = MDY(1, 1, YEAR);
 RUN;
 
+* Next we use the IDENTIFY modeling stage. We check up to 30 lags in the
+	first ARIMA modeling stage, and also explicitly test for stationarity at
+	the first 10 differences using the random walk with drift test. We
+	also use the SCAN method, which is a heuristic for identifying
+	candidate ARIMA models.;
 PROC ARIMA DATA = TS_DAT;
 	IDENTIFY VAR = LPE NLAG = 30 SCAN STATIONARITY = (RW = 10);
+	BY GRN;
 RUN;
 
-PROC ARIMA DATA = TS_DAT;
-	IDENTIFY VAR = LPE(1);
-RUN;
-
+* Next we use the ESTIMATE modeling stage. We fit several different ARIMA
+	models to the data in order to see which fits our time series the best,
+	and if any have white noise as the error term.;
 PROC ARIMA DATA = TS_DAT;
 	IDENTIFY VAR = LPE;
 	ESTIMATE P = 1;
 	ESTIMATE Q = 1;
 	ESTIMATE P = 1 Q = 1;
 	ESTIMATE P = 2;
+	BY GRN;
 RUN;
 
+* Finally, we use the best fitting model to make some simple forecasts in
+	the FORECAST modeling stage. We also identify outliers of the best
+	fitting model.;
 PROC ARIMA DATA = TS_DAT;
 	IDENTIFY VAR = LPE;
 	ESTIMATE P = 1;
 	OUTLIER;
 	FORECAST LEAD = 10 INTERVAL = YEAR ID = T OUT = GRAIN_FC;
+	BY GRN;
 RUN;
 	
 	
diff --git a/code/cleaning.sas b/code/cleaning.sas
index 2b9fbb8..a185e7d 100644
--- a/code/cleaning.sas
+++ b/code/cleaning.sas
@@ -180,8 +180,11 @@ RUN;
 FILENAME FDGRN '/home/u59465388/SAS-Grain-Prices/fg-sheet1.csv';
 
 DATA ALLGRNS;
-	* Import the CSV file;
-	INFILE FDGRN DLM = ',' DSD FIRSTOBS = 9 MISSOVER;
+	* Import the CSV file. The option DSD is necessary to read in consecutive
+		delimiters as missing data, and the MISSOVER option is necessary as
+		there are missing values at the end of lines, so the INPUT specification
+		should be interpreted strictly.;
+	INFILE FDGRN DLM = ',' FIRSTOBS = 9 DSD MISSOVER;
 	
 	* SAS doesn't like the missing values being denoted by ,, even with the DSD
 		option, and has a hard time parsing the numeric values. So, I'll import
diff --git a/data/SAS/grains.sas7bdat b/data/SAS/grains.sas7bdat
index a809a31..69a36c5 100644
Binary files a/data/SAS/grains.sas7bdat and b/data/SAS/grains.sas7bdat differ