Merge pull request #42 from drbenvincent/dev

Dev
drbenvincent · Apr 10, 2016 · 9fbbcfd · 9fbbcfd
2 parents b1f13b9 + 9c2e376
commit 9fbbcfd
Show file tree

Hide file tree

Showing 129 changed files with 8,655 additions and 2,438 deletions.
diff --git a/.gitignore b/.gitignore
@@ -7,3 +7,15 @@ demo/parameterEstimates_methodspaper-kirby27.txt
 demo/parameterEstimates_methodspaper-kirby27.txt
 
 demo/parameterEstimates_nonHierarchical.txt
+
+demo/temp.data.R
+
+demo/output-1.csv
+
+demo/output-2.csv
+
+*.hpp
+
+ddToolbox/models/hierarchicalME
+
+ddToolbox/models/hierarchicalLogK
diff --git a/README.md b/README.md
@@ -18,10 +18,12 @@ Go to the [wiki](https://github.com/drbenvincent/delay-discounting-analysis/wiki
 # Key features:
 
 * Know how confident you are in delay discounting parameters from the posterior distributions calculated.
+* Easily interpretable results due to using the 1-parameter hyperbolic discount function.
 * Improved accuracy of delay discounting parameter estimates using hierarchical Bayesian estimation. Trial-level responses, and participant- and group-level parameters are all modelled.
-* Easily interpretable results due to using the 1-parameter hyperbolic discount function
-* Richer results as we model the magnitude effect (how discount rate varies as a function of reward magnitude).
-* More robust discounting parameter estimates due to explicitly modelling participant errors.
+* A variety of models are provided to:
+  * Estimate discount rates, log(k).
+  * Estimate the magnitude effect; how discount rate varies as a function of reward magnitude.
+* Explicit modelling of participant errors provides more robust parameter estimates of discounting parameters.
 
 # Easy to use!
 The commands use to get your analysis up and running are quite quick and easy. Here is a minimum working example of the demo provided:
@@ -32,10 +34,10 @@ cd('/Users/benvincent/git-local/delay-discounting-analysis/demo')
 myData = DataClass('data');
 myData.loadDataFiles({'AC-kirby27-DAYS.txt','CS-kirby27-DAYS.txt','NA-kirby27-DAYS.txt','SB-kirby27-DAYS.txt','bv-kirby27.txt','rm-kirby27.txt','vs-kirby27.txt','BL-kirby27.txt','EP-kirby27.txt','JR-kirby27.txt','KA-kirby27.txt','LJ-kirby27.txt','LY-kirby27.txt','SK-kirby27.txt','VD-kirby27.txt'});
 saveFolder = 'methodspaper-kirby27';
-hModel = ModelHierarchical(toolboxPath, 'JAGS', myData, saveFolder);
-hModel.conductInference();
-hModel.exportParameterEstimates();
-hModel.plot();
+model = ModelHierarchicalME(toolboxPath, 'JAGS', myData, saveFolder);
+model.conductInference();
+model.exportParameterEstimates();
+model.plot();
 ```
 # Get in touch
 Please use the [GitHub Issues](https://github.com/drbenvincent/delay-discounting-analysis/issues) feature to ask question, report a bug, or request a feature. You'll need a GitHub account to do this, which isn't very hard to set up. But you could always email me instead.
diff --git a/ddToolbox/calculateLogK_ConditionOnReward.m b/ddToolbox/calculateLogK_ConditionOnReward.m
@@ -1,4 +1,4 @@
-function [posteriorMode,lh] = calculateLogK_ConditionOnReward(reward, params, plotFlag)
+function [posteriorMean,lh] = calculateLogK_ConditionOnReward(reward, params, plotFlag)
 	lh=[];
 	% -----------------------------------------------------------
 	% log(k) = m * log(B) + c
@@ -20,8 +20,8 @@
 	% Calculate kernel density estimate
 	[f,xi] = ksdensity(logKsamples, 'function', 'pdf');
 
-	% Calculate posterior mode
-	posteriorMode = xi( argmax(f) );
+	%posteriorMode = xi( argmax(f) );
+	posteriorMean = mean(logKsamples);
 
 	if plotFlag
 		figure(1)

diff --git a/ddToolbox/classes/DataClass.m b/ddToolbox/classes/DataClass.m
@@ -10,8 +10,6 @@
 		IDname
 
 		participantLevel
-		%covariateSupplied
-		%covariateProbeVals
 
 		groupTable
 		observedData
@@ -22,242 +20,114 @@
 
 		% =================================================================
 		function obj=DataClass(dataFolder)
-			% create empty tables
-			obj.groupTable = table();
-			obj.participantLevel(1).table = table();
-			% where the data is
+			try
+				table();
+			catch
+				error( strcat('ERROR: This version of Matlab does not support the Table data type. ',...
+					'You will need to call DataClassLegacy() instead of DataClass().'))
+			end
 			obj.dataFolder = dataFolder;
-% 			% by default assume we do not have any covariate data
-% 			obj.covariateSupplied = false;
-% 			% create a savename
-% 			[PATHSTR,NAME,EXT] = fileparts(saveName);
-% 			obj.saveName = NAME;
-			display('You have created a DataClass object')
+			display('You have created a Data object')
 		end
 		% =================================================================
 
 
 		function [obj] = loadDataFiles(obj,fnames)
-			% fnames should be a cell array of filenames
-
-			% TODO: TIDY UP WHAT IS HAPPENING HERE
-
-			for n=1:numel(fnames) % loop over fnames, each time importing
-				fname = fnames{n};
-				% Load tab separated .txt file with rows labelled: A, B, D, R. This
-				% will load the data into T, which is a 'table' data type, see:
-				% http://uk.mathworks.com/help/matlab/tables.html
-				rawData = readtable(fullfile(obj.dataFolder,fname), 'delimiter','tab');
-
-				% add a new column defining the participant ID
-				ID = ones( height(rawData), 1) * n;
-				participantTable = [rawData table(ID)];
-
-				% add column of participant filenames (all identical
-				% entries) for easier identification of participants in
-				% plots
-				participantInitials = strtok(fnames{n}, '-');
-				obj.IDname{n} = participantInitials;
-				% TODO: Don't need the 2 lines below, but keep for
-				% reference in case it's useful.
-				%IDname = table(repmat(participantInitials,height(rawData),1), 'VariableNames',{'IDname'});
-				%participantTable = [participantTable IDname];
+			% INPUT:
+			% - fnames	a cell arrage of filenames of participant data
 
-				% complete participant level data
-				obj.participantLevel(n).table = participantTable;
-				obj.participantLevel(n).trialsForThisParticant = height(participantTable);
-				obj.participantLevel(n).data.A = obj.participantLevel(n).table.A;
-				obj.participantLevel(n).data.B = obj.participantLevel(n).table.B;
-				obj.participantLevel(n).data.DA = obj.participantLevel(n).table.DA;
-				obj.participantLevel(n).data.DB = obj.participantLevel(n).table.DB;
-				obj.participantLevel(n).data.R = obj.participantLevel(n).table.R;
-				obj.participantLevel(n).data.ID = obj.participantLevel(n).table.ID;
-
-				% append participant to group table
-				obj.groupTable = [obj.groupTable;participantTable];
-			end
-
-% 			% CREATE UNKNOWN PARTICIPANT HERE ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-% 			n = numel(fnames)+1;
-% 			obj.IDname{n} = '**G**';
-% 			obj.participantLevel(n).table = [];
-% 			obj.participantLevel(n).trialsForThisParticant=1;
-% 			obj.participantLevel(n).data.A = 1;
-% 			obj.participantLevel(n).data.B = 2;
-% 			obj.participantLevel(n).data.DA = 0;
-% 			obj.participantLevel(n).data.DB = 1;
-% 			obj.participantLevel(n).data.R = NaN;
-% 			obj.participantLevel(n).data.ID = obj.IDname{n};
-% 			% ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+			obj.nParticipants = numel(fnames);
+			obj.participantFilenames = fnames;
 
-			%% Copy the observed data into a structure
-			maxTrials = max([obj.participantLevel.trialsForThisParticant]);
-			nParticipants = numel(obj.participantLevel);
-			% create an empty matrix which we then fill with data
-			obj.observedData.A  = NaN(nParticipants, maxTrials);
-			obj.observedData.B  = NaN(nParticipants, maxTrials);
-			obj.observedData.DA = NaN(nParticipants, maxTrials);
-			obj.observedData.DB = NaN(nParticipants, maxTrials);
-			obj.observedData.R  = NaN(nParticipants, maxTrials);
-			for p=1:nParticipants
-				Tp = obj.participantLevel(p).trialsForThisParticant;
-				obj.observedData.A(p,[1:Tp]) = obj.participantLevel(p).data.A;
-				obj.observedData.B(p,[1:Tp]) = obj.participantLevel(p).data.B;
-				obj.observedData.DA(p,[1:Tp]) = obj.participantLevel(p).data.DA;
-				obj.observedData.DB(p,[1:Tp]) = obj.participantLevel(p).data.DB;
-				obj.observedData.R(p,[1:Tp]) = obj.participantLevel(p).data.R;
+			fnameType = 'all'; % {'all', 'ID'}
+			for n=1:obj.nParticipants
+				switch fnameType
+					case {'all'}
+						[~,obj.IDname{n},~] = fileparts(fnames{n}); % just get filename
+					case{'ID'}
+						obj.IDname{n} = getPrefixOfString(fnames{n},'-')
+				end
+				%obj.IDname{n} = obj.extractParticipantInitialsFromFilename(fnames{n});
+				%obj.IDname{n} = obj.extractIDfromFilename(fnames{n});
+				participantTable = readtable(fullfile(obj.dataFolder,fnames{n}), 'delimiter','tab');
+				participantTable = obj.appendParticipantIDcolumn(participantTable, n);
+ 				obj.participantLevel(n).table = participantTable;
+ 				obj.participantLevel(n).trialsForThisParticant = height(participantTable);
 			end
-
-			% T is a vector containing number of trials for each participant
-			obj.observedData.T = [obj.participantLevel.trialsForThisParticant];
 
-			% calculate more things
+			obj.constructObservedDataForMCMC()
+			obj.exportGroupDataFile()
 			obj.totalTrials = height(obj.groupTable);
-			obj.nParticipants = nParticipants; 
-			obj.participantFilenames = fnames;
 
-			% by default, assume we do not have any covariate data
-%			obj.covariateSupplied = false;
-% 			% set all covariate values to zero
-% 			covariateValues = zeros([1, obj.nParticipants]);
-% 			obj.setCovariateValues(covariateValues);
+			display('The following participant-level data files were imported:')
+			display(fnames')
+		end
 
- 			% save
- 			saveLocation = fullfile(obj.dataFolder,'groupLevelData');
- 			if ~exist(saveLocation, 'dir'), mkdir(saveLocation), end
+		function exportGroupDataFile(obj)
+			obj.buildGroupDataTable();
+			saveLocation = fullfile(obj.dataFolder,'groupLevelData');
+			if ~exist(saveLocation, 'dir'), mkdir(saveLocation), end
 			writetable(obj.groupTable,...
 				fullfile(saveLocation,'COMBINED_DATA.txt'),...
 				'delimiter','tab')
 			fprintf('A copy of the group-level dataset just constructed has been saved as a text file:\n%s\n',...
 				fullfile(saveLocation,'COMBINED_DATA.txt'));
-
-			display('The following participant-level data files were imported:')
-			display(fnames')
 		end
 
-
-		function [data] = getParticipantData(obj,participant)
-			% grabs data just from one participant.
-			data = obj.participantLevel(participant).data;
-			data.trialsForThisParticant =...
-				obj.participantLevel(participant).trialsForThisParticant;
+		function buildGroupDataTable(obj)
+			obj.groupTable = table();
+			for n=1:obj.nParticipants
+				obj.groupTable = [obj.groupTable; obj.participantLevel(n).table];
+			end
 		end
 
+		function [dataStruct] = getParticipantData(obj,participant)
+			% grabs data just from one participant.
+			% OUTPUTS:
+			% a structure with fields
+			%  - A, B, DA, DB, R, ID (all column vectors)
+			%  - trialsForThisParticant (a single value)
 
-		% function [obj] = addData(obj, thisTrialData)
-		% 	% adds one trial worth of data
-		% 	% we assume this is happening in the context of live fitting
-		% 	% during an adaptive experimental procedure, so we are only
-		% 	% dealing with one participant
-
-		% 	% append to bottom of table
-		% 	obj.participantLevel(1).table = [obj.participantLevel(1).table ; thisTrialData];
-
-		% 	% copy to groupTable
-		% 	obj.groupTable = obj.participantLevel(1).table;
-
-		% 	% Copy the observed data into a structure
-		% 	obj.observedData.A = obj.groupTable.A;
-		% 	obj.observedData.B = obj.groupTable.B;
-		% 	obj.observedData.DA = obj.groupTable.DA;
-		% 	obj.observedData.DB = obj.groupTable.DB;
-		% 	obj.observedData.R = obj.groupTable.R;
-		% 	%obj.observedData.ID = obj.groupTable.ID;
-
-		% 	% calculate more things
-		% 	obj.totalTrials = height(obj.groupTable);
-		% 	obj.nParticipants = 1;
-		% end
-
-
-		%% DEPRICATED
-		% function quickAnalysis(obj)
-		% 	% *********
-		% 	% NOTE TO SELF: This function needs to be improved. I need to
-		% 	% plug in plotting functions for:
-		% 	% - discount function plots when all DA==0
-		% 	% - discount surface plots when not all DA==0
-		% 	% *********
-		%
-		% 	for n=1:obj.nParticipants
-		% 		datap = getParticipantData(obj, n);
-		% 		[logk, kvec, prop_explained] = obj.quickAndDirtyEstimateOfLogK(datap);
-		%
-		% 		figure(1), clf, drawnow
-		% 		subplot(1,2,1) % plot raw data
-		% 		plot3DdataSpace(datap, []);
-		%
-		% 		subplot(1,2,2) % plot quick & dirty analysis
-		% 		semilogx(kvec, prop_explained)
-		% 		axis tight
-		% 		ylim([0 1])
-		% 		vline(exp(logk));
-		% 		title(['particpant ' num2str(n)])
-		% 		xlabel('discount rate (k)')
-		% 		ylabel({'proportion of responses consistent with';'1-param hyperbolic discount function'})
-		% 		axis square
-		% 		% EXPORTING ---------------------
-		% 		figure(1)
-		% 		latex_fig(16, 8, 6)
-		% 		myExport(obj.saveName, 'dataSummary-', ['participant' num2str(n)]);
-		% 		% -------------------------------
-		% 	end
-		% end
-
+			dataStruct = table2struct(obj.participantLevel(participant).table,...
+				'ToScalar',true);
 
-% 		function obj = setCovariateValues(obj,covariateValues)
-% 			% set the values
-% 			obj.observedData.covariate = covariateValues;
-% 			% If the values are not all zero, then we are dealing with a
-% 			% dataset where meaningful covariate data has been provided.
-% 			if sum((covariateValues==0)~=1) > 0
-% 				display('COVARIATE DATA SUPPLIED')
-% 				obj.covariateSupplied = true;
-% 				% create a vector of probe covariate values for
-% 				% visualisation purposes
-% 
-% 				% set default values
-% 				obj.observedData.covariateProbeVals = linspace( min(covariateValues), max(covariateValues) ,20);
-% 			else
-% 				display('COVARIATE DEFINED AS NOT PRESENT')
-% 			end
-% 		end
+			dataStruct.trialsForThisParticant =...
+				obj.participantLevel(participant).trialsForThisParticant;
+		end
 
+		function constructObservedDataForMCMC(obj)
+			% construct a structure of ObservedData which will provide input to
+			% the MCMC process.
+			maxTrials = max([obj.participantLevel.trialsForThisParticant]);
+			% create an empty matrix which we then fill with data.
+			obj.observedData.A  = NaN(obj.nParticipants, maxTrials);
+			obj.observedData.B  = NaN(obj.nParticipants, maxTrials);
+			obj.observedData.DA = NaN(obj.nParticipants, maxTrials);
+			obj.observedData.DB = NaN(obj.nParticipants, maxTrials);
+			obj.observedData.R  = NaN(obj.nParticipants, maxTrials);
+			for p=1:obj.nParticipants
+				Tp = obj.participantLevel(p).trialsForThisParticant;
+				obj.observedData.A(p,[1:Tp]) = obj.participantLevel(p).table.('A');
+				obj.observedData.B(p,[1:Tp]) = obj.participantLevel(p).table.('B');
+				obj.observedData.DA(p,[1:Tp]) = obj.participantLevel(p).table.('DA');
+				obj.observedData.DB(p,[1:Tp]) = obj.participantLevel(p).table.('DB');
+				obj.observedData.R(p,[1:Tp]) = obj.participantLevel(p).table.('R');
+			end
 
-% 		function obj = setCovariateProbeValues(obj, CovariateProbeValues)
-% 			obj.observedData.covariateProbeVals = CovariateProbeValues;
-% 		end
+			% T is a vector containing number of trials for each participant
+			obj.observedData.T = [obj.participantLevel.trialsForThisParticant];
+		end
 
 	end
 
 
 	methods(Static)
-		%% DEPRICATED
-		% function [logk, kvec, prop_explained] = quickAndDirtyEstimateOfLogK(data)
-		% 	% Given the response data for this participant, do a very quick and dirty
-		% 	% estimate of the likely log discount rate (logk). This is used as initial
-		% 	% parameters for the MCMC process.
-		%
-		% 	%% 1-parameter hyperbolic discount function --------------------------------
-		% 	% v = b ./ (1+(k*d)
-		% 	% NOTE: This functions wants the discount rate (k), NOT the log(k)
-		% 	V = @(d,k,b) bsxfun(@rdivide, b, 1+bsxfun(@times,k,d) );
-		%
-		% 	%% vector of discount rates (k) to examine ---------------------------------
-		% 	kvec = logspace(-8,2,1000);
-		%
-		% 	presentSubjectiveValue = V( data.DB, kvec, data.B);
-		% 	chooseDelayed = bsxfun(@minus, presentSubjectiveValue, data.A) >1;
-		% 	err = bsxfun(@minus, data.R, chooseDelayed);
-		% 	err = sum(abs(err));
-		%
-		% 	% calc proportion of responses explained
-		% 	prop_explained = (data.trialsForThisParticant - err) / data.trialsForThisParticant;
-		%
-		% 	%[~, index] = max(prop_explained);
-		% 	k_optimal = kvec( argmax(prop_explained) );
-		% 	logk = log(k_optimal);
-		% end
+
+		function pTable = appendParticipantIDcolumn(pTable, n)
+			ID = ones( height(pTable), 1) * n;
+			pTable = [pTable table(ID)];
+		end
+
 	end
 
 end