diff --git a/ddToolbox/PosteriorPrediction.m b/ddToolbox/PosteriorPrediction.m index 2a270246..a0954832 100644 --- a/ddToolbox/PosteriorPrediction.m +++ b/ddToolbox/PosteriorPrediction.m @@ -28,8 +28,7 @@ obj.controlModelProbChooseDelayed(p) = obj.postPred(p).proportion_chose_delayed; % Calculate metrics - obj.postPred(p).score = obj.calcPostPredOverallScore(responses_predicted, responses_actual, obj.controlModelProbChooseDelayed(p)); - obj.postPred(p).GOF_distribtion = obj.calcGoodnessOfFitDistribution(responses_inferredPB, responses_actual, obj.controlModelProbChooseDelayed(p)); + obj.postPred(p).log_loss_distribution = obj.calcLogLossDistribution(responses_inferredPB, responses_actual); obj.postPred(p).percentPredictedDistribution = obj.calcPercentResponsesCorrectlyPredicted(responses_inferredPB, responses_actual); % Store obj.postPred(p).responses_actual = responses_actual; @@ -56,14 +55,14 @@ function plot(obj, plotOptions, modelFilename, model) % PUBLIC GETTERS -------------------------------------------------- - function score = getScores(obj) - score = [obj.postPred(:).score]'; - end - function pp = getPercentPredictedDistribution(obj) pp = {obj.postPred(:).percentPredictedDistribution}; end + function pp = getLogLossDistribution(obj) + pp = {obj.postPred(:).log_loss_distribution}; + end + function postPredTable = getPostPredTable(obj) postPredTable = obj.postPredTable; end @@ -80,17 +79,17 @@ function posterior_prediction_figure(obj, n, plotOptions, plotDiscountFunction) % Arrange subplots h = layout([1 4; 2 3]); subplot(h(1)), obj.pp_plotTrials(n) - subplot(h(2)), obj.pp_plotGOFdistribution(n, plotOptions) + subplot(h(2)), obj.pp_plotLogLossDistribution(n, plotOptions) subplot(h(3)), obj.pp_plotPercentPredictedDistribution(n, plotOptions) plotDiscountFunction(n, h(4)) drawnow end - function pp_plotGOFdistribution(obj, n, plotOptions) + function pp_plotLogLossDistribution(obj, n, plotOptions) uni = mcmc.UnivariateDistribution(... - obj.postPred(n).GOF_distribtion(:),... - 'xLabel', 'goodness of fit score',... + obj.postPred(n).log_loss_distribution(:),... + 'xLabel', 'Log Loss',... 'plotStyle','hist',... 'pointEstimateType', plotOptions.pointEstimateType); end @@ -139,11 +138,12 @@ function pp_plotTrials(obj, n) end function postPredTable = makePostPredTable(obj, data, pointEstimateType) - postPredTable = table(obj.getScores(),... + postPredTable = table(... obj.calc_percent_predicted_point_estimate(pointEstimateType),... + obj.calc_log_loss_point_estimate(pointEstimateType),... obj.any_percent_predicted_warnings(),... 'RowNames', data.getIDnames('experiments'),... - 'VariableNames',{'ppScore' 'percentPredicted' 'warning_percent_predicted'}); + 'VariableNames',{'percentPredicted' 'LogLoss' 'warning_percent_predicted'}); if data.isUnobservedPartipantPresent() % add extra row of NaN's on the bottom for the unobserved participant @@ -163,6 +163,14 @@ function pp_plotTrials(obj, n) obj.getPercentPredictedDistribution())'; end + function percentPredicted = calc_log_loss_point_estimate(obj, pointEstimateType) + % Calculate point estimates of perceptPredicted. use the point + % estimate type that the user specified + pointEstFunc = str2func(pointEstimateType); + percentPredicted = cellfun(pointEstFunc,... + obj.getLogLossDistribution())'; + end + function pp_warning = any_percent_predicted_warnings(obj) % warnings when we have less than 95% confidence that we can % predict more responses than the control model @@ -179,15 +187,10 @@ function pp_plotTrials(obj, n) methods (Access = private, Static) - function [score] = calcGoodnessOfFitDistribution(responses_predictedMCMC, responses_actual, proportion_chose_delayed) - % Expand the participant responses so we can do vectorised calculations below - totalSamples = size(responses_predictedMCMC,2); - responses_actual = repmat(responses_actual, [1,totalSamples]); - responses_control_model = ones(size(responses_actual)) .* proportion_chose_delayed; - - score = calcLogOdds(... - calcDataLikelihood(responses_actual, responses_predictedMCMC),... - calcDataLikelihood(responses_actual, responses_control_model)); + function logloss = calcLogLossDistribution(predicted, actual) + % log loss for binary variables + logloss = - (sum(actual .* log(predicted) + (1 - actual) ... + .* log(1 - predicted))) ./ length(actual); end function percentResponsesPredicted = calcPercentResponsesCorrectlyPredicted(responses_predictedMCMC, responses_actual) @@ -202,17 +205,6 @@ function pp_plotTrials(obj, n) percentResponsesPredicted = sum(isCorrectPrediction,1)./nQuestions; end - function score = calcPostPredOverallScore(responses_predicted, responses_actual, proportion_chose_delayed) - % Calculate log posterior odds of data under the model and a - % control model where prob of responding is proportion_chose_delayed. - - % NOTE: This is model comparison, not posterior prediction - responses_control_model = ones(size(responses_predicted)).*proportion_chose_delayed; - score = calcLogOdds(... - calcDataLikelihood(responses_actual, responses_predicted'),... - calcDataLikelihood(responses_actual, responses_control_model')); - end - function exportFigure(plotOptions, prefix_string, modelFilename) % TODO: Exporting is not the responsibility of PosteriorPrediction class, so we need to extract this up to Model subclasses. They call it as: obj.postPred.plot(obj.plotOptions, obj.modelFilename) if plotOptions.shouldExportPlots @@ -227,14 +219,3 @@ function exportFigure(plotOptions, prefix_string, modelFilename) end end - -function logOdds = calcLogOdds(a,b) -logOdds = log(a./b); -end - -function dataLikelihood = calcDataLikelihood(responses, predicted) -% Responses are Bernoulli distributed: a special case of the Binomial with 1 event. -dataLikelihood = prod(binopdf(responses, ... - ones(size(responses)),... - predicted)); -end diff --git a/demo/demo_group_comparison_repeated_measures.m b/demo/demo_group_comparison_repeated_measures.m index 6245a87b..1028e8f1 100644 --- a/demo/demo_group_comparison_repeated_measures.m +++ b/demo/demo_group_comparison_repeated_measures.m @@ -20,25 +20,23 @@ 'nchains', 4,... 'nburnin', 2000); pointEstimateType = 'median'; - -%% Analyse group 1 + +%% Analyse group 1 datapath1 = fullfile(path_of_this_mfile,'datasets','group_comparison','group1'); group1 = ModelHierarchicalLogK(... Data(datapath1, 'files', allFilesInFolder(datapath1, 'txt')),... 'savePath', fullfile(pwd,'output','group1'),... 'pointEstimateType', pointEstimateType,... - 'sampler', 'jags',... 'shouldPlot', 'no',... 'shouldExportPlots', false,... 'mcmcParams', mcmcparams); - -%% Analyse group 2 + +%% Analyse group 2 datapath2 = fullfile(path_of_this_mfile,'datasets','group_comparison','group2'); group2 = ModelHierarchicalLogK(... Data(datapath2, 'files', allFilesInFolder(datapath2, 'txt')),... 'savePath', fullfile(pwd,'output','group2'),... 'pointEstimateType', pointEstimateType,... - 'sampler', 'jags',... 'shouldPlot', 'no',... 'shouldExportPlots', false,... 'mcmcParams', mcmcparams); @@ -84,4 +82,3 @@ % METHOD 2) - diff --git a/demo/run_me.m b/demo/run_me.m index 002fda0d..3732a8ca 100644 --- a/demo/run_me.m +++ b/demo/run_me.m @@ -34,7 +34,6 @@ % Data(datapath, 'files', allFilesInFolder(datapath, 'txt')),... % 'savePath', 'analysis_with_hierarchical_magnitude_effect',... % 'pointEstimateType','median',... -% 'sampler', 'jags',... % 'shouldPlot', 'no',... % 'mcmcParams', struct('nsamples', 10^4,... % 'nchains', 4,... @@ -109,7 +108,6 @@ Data(datapath, 'files', allFilesInFolder(datapath, 'txt')),... 'savePath', fullfile(pwd,'output','my_analysis'),... 'pointEstimateType', 'median',... - 'sampler', 'jags',... 'shouldPlot', 'yes',... 'shouldExportPlots', true,... 'exportFormats', {'png'},... @@ -122,7 +120,6 @@ % running proper analyses. I have provided small numbers here just to % confirm the code is working without having to wait a long time. % - you can change the point estimate type to mean, median, or mode -% - the sampler can be 'jags' % If we didn't ask for plots when we ran the model, then we do that diff --git a/docs/discussion/stan.md b/docs/discussion/stan.md deleted file mode 100644 index a91dec52..00000000 --- a/docs/discussion/stan.md +++ /dev/null @@ -1,16 +0,0 @@ -# STAN - -**The toolbox primarily uses JAGS. Inference using STAN is currently highly experimental. Not all discount functions are written in STAN model code, and not much work has gone in to checking things are working fine.** - -## STAN installation and setup steps - -In order to use [STAN](http://mc-stan.org) to conduct inference, it is necessary to do a bit of installation. -- [MatlabStan](https://github.com/brian-lau/MatlabStan) - this is automatically installed by the toolbox. -- [MatlabProcessManager](https://github.com/brian-lau/MatlabProcessManager/) - this is automatically installed by the toolbox. -- [CmdStan](http://mc-stan.org/interfaces/cmdstan.html) - you will need to download AND build this. Don't forget the second step. It's detailed in the manual, but I missed it the first time. - -**You will also need to locate the file `stan_home.m` (in the `matlabStan` package) and set the path in that file to the location that you placed CmdStan.** - -## Status - -I am _not_ actively working on supporting inference with STAN. I'd be very happy if someone wanted to progress the toolbox support for STAN however. Please see the [CONTRIBUTING.md](https://github.com/drbenvincent/delay-discounting-analysis/blob/master/CONTRIBUTING.md) document. diff --git a/docs/howto/store_raw_data.md b/docs/howto/store_raw_data.md index accf1f0b..c154ed13 100644 --- a/docs/howto/store_raw_data.md +++ b/docs/howto/store_raw_data.md @@ -11,21 +11,21 @@ We assume that we have one data file for every experiment run. So even if we hav Each data file should be a tab-delimited `.txt` file containing the trial data for an individual participant. The format of these participant data files needs to contain 5 columns, with headers `A`, `DA`, `B`, `DB`, `R`. Each row corresponds to one experimental trial. A DA B DB R - 80 0 85 157 0 - 34 0 50 30 1 - 25 0 60 14 1 - 11 0 30 7 1 - 49 0 60 89 0 + 80 0 85 157 A + 34 0 50 30 B + 25 0 60 14 B + 11 0 30 7 B + 49 0 60 89 A etc Optionally, we can add columns for the probability of obtaining the reward, such as A DA PA B DB PB R - 80 0 1 85 157 1 0 - 34 0 1 50 30 1 1 - 25 0 1 60 14 1 1 - 11 0 1 30 7 1 1 - 49 0 1 60 89 1 0 + 80 0 1 85 157 1 A + 34 0 1 50 30 1 B + 25 0 1 60 14 1 B + 11 0 1 30 7 1 B + 49 0 1 60 89 1 A etc Column names mean: @@ -37,8 +37,6 @@ Column names mean: - `PB` probability of achieving the reward - `R` is the participant response. -Note that the the coding of responses are: -* participant chooses delayed reward, ie option B, then `R = 1` -* participant chooses sooner reward, ie option A, then `R = 0` +The preferred way of coding participant responses `R` is by the unambiguous label `A` or `B`. An earlier (and less good, ambiguous) coding scheme was `R = 1 for chose delayed` and `R = 0 for chose immediate`. The ordering of the columns should not matter, as the data is extracted using the column header name (i.e. first row of the file). diff --git a/docs/index.md b/docs/index.md index 6c61339c..391b6ffa 100644 --- a/docs/index.md +++ b/docs/index.md @@ -31,4 +31,3 @@ Vincent, B. T. (2016) **[Hierarchical Bayesian estimation and hypothesis testing - [Hyperpriors / parameter pooling](discussion/hyperpriors.md) - [Level of parameter pooling](discussion/level_of_pooling.md) - [Hypothesis testing](discussion/hypothesis_testing.md) -- [STAN](discussion/stan.md) diff --git a/docs/ref/bayes_graphical_model.png b/docs/ref/bayes_graphical_model.png new file mode 100644 index 00000000..fec2b12b Binary files /dev/null and b/docs/ref/bayes_graphical_model.png differ diff --git a/docs/ref/discount_functions.md b/docs/ref/discount_functions.md index 82f25495..4639d32a 100644 --- a/docs/ref/discount_functions.md +++ b/docs/ref/discount_functions.md @@ -1,5 +1,11 @@ # Discount functions available +A number of discounting models are available, all with the following basic form. The models differ in either a) presence/absence of hyperpriors, b) the discount functions in the data generating process which specifies the response probability. + +![](bayes_graphical_model.png) + +The 'data generating process' describes the response probability. This consists of the psychometric link function which captures response errors, and the discount function which computes the present subjective value of prospects. See the paper for details. + | Discount function | Equation | Model suffix | Main parameters | | :--- | :---: | :---: | :---: | | Exponential | `exp(-k*D)` | `*Exp1` | `k` | diff --git a/docs/ref/param_estimation_options.md b/docs/ref/param_estimation_options.md index 11c2afbc..264a6e3b 100644 --- a/docs/ref/param_estimation_options.md +++ b/docs/ref/param_estimation_options.md @@ -32,7 +32,6 @@ The rest of the arguments are optional key/value pairs | --- | --- | | `'timeUnits'` | `'minutes'` or `'hours'` or `'days'` [default] | | `'pointEstimateType'` | `'mean'` or `'median'` or `'mode'` [default] | -| `'sampler'` | `'jags'` [default] or `'stan'` NOTE: stan models are in beta| | `'shouldPlot'` | `'yes'` or `'no'` [default]| | `'shouldExportPlots'` | `'true'` [default] or `'false'` | | `'exportFormats'` | a cell array of output formats, e.g. `{'png', 'pdf'}` (default is `{'png'}` only) | @@ -54,7 +53,6 @@ model = ModelHierarchicalME(Data(datapath, 'files', allFilesInFolder(datapath, ' 'timeUnits', 'days',... 'savePath', fullfile(pwd,'output','my_analysis'),... 'pointEstimateType', 'mode',... - 'sampler', 'jags',... 'shouldPlot', 'no',... 'shouldExportPlots', false,... 'exportFormats', {'png'},... diff --git a/tests/test_AllNonParametricModels.m b/tests/test_AllNonParametricModels.m index 77d015ba..79a493e2 100644 --- a/tests/test_AllNonParametricModels.m +++ b/tests/test_AllNonParametricModels.m @@ -14,7 +14,6 @@ properties (TestParameter) model = {'ModelSeparateNonParametric'} pointEstimateType = {'mean','median','mode'} - sampler = {'jags'} chains = {2,3,4} end @@ -82,19 +81,6 @@ function nChains(testCase, model, chains) end - function specifiedSampler(testCase, model, sampler) - % make model - makeModelFunction = str2func(model); - model = makeModelFunction(testCase.data,... - 'savePath', testCase.savePath,... - 'sampler', sampler,... - 'shouldPlot','no',... - 'mcmcParams', struct('nsamples', get_numer_of_samples_for_tests(),... - 'nchains', 2,... - 'nburnin', get_burnin_for_tests())); - % TODO: DO AN ACTUAL TEST HERE !!!!!!!!!!!!!!!!!!!!!! - end - function plotting(testCase, model) % make model makeModelFunction = str2func(model); @@ -109,12 +95,11 @@ function plotting(testCase, model) % TODO: DO AN ACTUAL TEST HERE !!!!!!!!!!!!!!!!!!!!!! end - function model_disp_function(testCase, model, sampler) + function model_disp_function(testCase, model) % make model makeModelFunction = str2func(model); modelFitted = makeModelFunction(testCase.data,... 'savePath', testCase.savePath,... - 'sampler', sampler,... 'mcmcParams', struct('nsamples', get_numer_of_samples_for_tests(),... 'nchains', 2,... 'nburnin', get_burnin_for_tests()),... diff --git a/tests/test_AllParametricModels.m b/tests/test_AllParametricModels.m index c4889a5e..38ced5e2 100644 --- a/tests/test_AllParametricModels.m +++ b/tests/test_AllParametricModels.m @@ -11,7 +11,6 @@ end properties (TestParameter) - sampler = {'jags'}; model = getAllParametricModelNames(); pointEstimateType = {'mean','median','mode'} chains = {2,3,4} @@ -84,12 +83,11 @@ function nChains(testCase, model, chains) end - function specifiedSampler(testCase, model, sampler) + function specifiedSampler(testCase, model) % make model makeModelFunction = str2func(model); model = makeModelFunction(testCase.data,... 'savePath', testCase.savePath,... - 'sampler', sampler,... 'mcmcParams', struct('nsamples', get_numer_of_samples_for_tests(),... 'nchains', 2,... 'nburnin', get_burnin_for_tests()),... @@ -99,12 +97,11 @@ function specifiedSampler(testCase, model, sampler) end - function getting_predicted_values(testCase, model, sampler) + function getting_predicted_values(testCase, model) % make model makeModelFunction = str2func(model); modelFitted = makeModelFunction(testCase.data,... 'savePath', testCase.savePath,... - 'sampler', sampler,... 'mcmcParams', struct('nsamples', get_numer_of_samples_for_tests(),... 'nchains', 2,... 'nburnin', get_burnin_for_tests()),... @@ -123,12 +120,11 @@ function getting_predicted_values(testCase, model, sampler) end - function model_disp_function(testCase, model, sampler) + function model_disp_function(testCase, model) % make model makeModelFunction = str2func(model); modelFitted = makeModelFunction(testCase.data,... 'savePath', testCase.savePath,... - 'sampler', sampler,... 'mcmcParams', struct('nsamples', get_numer_of_samples_for_tests(),... 'nchains', 2,... 'nburnin', get_burnin_for_tests()),... diff --git a/tests/test_frontend_delay.m b/tests/test_frontend_delay.m index af3c0cec..4b2d63fe 100644 --- a/tests/test_frontend_delay.m +++ b/tests/test_frontend_delay.m @@ -5,11 +5,11 @@ datapath filesToAnalyse end - + properties (TestParameter) - + end - + methods (TestClassSetup) function setup(testCase) % assuming this is running on my maching @@ -18,18 +18,17 @@ function setup(testCase) testCase.data = Data(testCase.datapath, 'files', {'fe1.txt', 'fe2.txt', 'fe3.txt', 'fe4.txt'}); end end - - + + methods (Test) - + function analysis_of_front_end_delay(testCase) % Do an analysis model = ModelHierarchicalLogK(... testCase.data,... 'savePath', fullfile(pwd,'output','my_analysis'),... 'pointEstimateType', 'median',... - 'sampler', 'jags',... 'shouldPlot', 'no',... 'shouldExportPlots', false,... 'mcmcParams', struct('nsamples', get_numer_of_samples_for_tests(),... @@ -38,6 +37,5 @@ function analysis_of_front_end_delay(testCase) end end - -end +end diff --git a/tests/test_frontend_delay_plotting.m b/tests/test_frontend_delay_plotting.m index b509d489..c742df6e 100644 --- a/tests/test_frontend_delay_plotting.m +++ b/tests/test_frontend_delay_plotting.m @@ -5,11 +5,11 @@ datapath filesToAnalyse end - + properties (TestParameter) - + end - + methods (TestClassSetup) function setup(testCase) % assuming this is running on my maching @@ -18,41 +18,38 @@ function setup(testCase) testCase.data = Data(testCase.datapath, 'files', {'example.txt'}); end end - - + + methods (Test) - + function logk_plotting(testCase) % Do an analysis model = ModelSeparateLogK(... testCase.data,... 'savePath', fullfile(pwd,'output','logk_analysis'),... 'pointEstimateType', 'median',... - 'sampler', 'jags',... 'shouldPlot', 'yes',... 'shouldExportPlots', false,... 'mcmcParams', struct('nsamples', get_numer_of_samples_for_tests(),... 'nchains', 2,... 'nburnin', get_burnin_for_tests())); end - + function me_plotting(testCase) % Do an analysis model = ModelSeparateME(... testCase.data,... 'savePath', fullfile(pwd,'output','me_analysis'),... 'pointEstimateType', 'median',... - 'sampler', 'jags',... 'shouldPlot', 'yes',... 'shouldExportPlots', false,... 'mcmcParams', struct('nsamples', get_numer_of_samples_for_tests(),... 'nchains', 2,... 'nburnin', get_burnin_for_tests())); end - + end - -end +end