Merge pull request #1813 from opencobra/develop

Develop
opencobra · Sep 23, 2021 · 68d9461 · 68d9461
2 parents 8e09472 + 94b7757
commit 68d9461
Show file tree

Hide file tree

Showing 75 changed files with 969 additions and 729 deletions.
diff --git a/initCobraToolbox.m b/initCobraToolbox.m
@@ -219,7 +219,8 @@ function initCobraToolbox(updateToolbox)
     % Update/initialize submodules
     %By default your submodules repository is in a state called 'detached HEAD'. 
     %This means that the checked-out commit -- which is the one that the super-project (core) needs -- is not associated with a local branch name.
-    [status_gitSubmodule, result_gitSubmodule] = system(['git submodule update --init --remote --no-fetch ' depthFlag]);
+    %[status_gitSubmodule, result_gitSubmodule] = system(['git submodule update --init --remote --no-fetch ' depthFlag]);%old
+    [status_gitSubmodule, result_gitSubmodule] = system(['git submodule foreach git submodule update --init --recursive']);% 23/9/21 RF submodules point to master
 
     if status_gitSubmodule ~= 0
         fprintf(strrep(result_gitSubmodule, '\', '\\'));

diff --git a/.../multiSpecies/microbiomeModelingToolbox/additionalAnalysis/analyseObjectiveShadowPrices.m b/.../multiSpecies/microbiomeModelingToolbox/additionalAnalysis/analyseObjectiveShadowPrices.m
@@ -22,7 +22,6 @@
 %
 % OPTIONAL INPUTS:
 %   resultsFolder     char with path of directory where results are saved
-%                     (default: current folder)
 %   osenseStr         String indicating whether objective function(s)
 %                     should be maximized or minimized. Allowed inputs:
 %                     'min','max', default:'max'.
@@ -50,7 +49,7 @@
 parser = inputParser();  % Define default input parameters if not specified
 parser.addRequired('modelFolder', @ischar);
 parser.addRequired('objectiveList', @iscell);
-parser.addParameter('resultsFolder',pwd, @ischar);
+parser.addParameter('resultsFolder',[pwd filesep 'ShadowPrices'], @ischar);
 parser.addParameter('osenseStr','max', @ischar);
 parser.addParameter('SPDef','Nonzero', @ischar);
 parser.addParameter('numWorkers', 0, @(x) isnumeric(x))
@@ -144,7 +143,7 @@
 %     % store computed objective values
     for j=1:length(objectiveList)
         if ~isempty(FBAsolution{j,1})
-            objectives{j+1,3+i} = FBAsolution{j,1}.obj;
+            objectives{j+1,2+i} = FBAsolution{j,1}.obj;
         else
             objectives{j+1,3+i} = 0;
         end

diff --git a/...analysis/multiSpecies/microbiomeModelingToolbox/additionalAnalysis/analyzeMgPipeResults.m b/...analysis/multiSpecies/microbiomeModelingToolbox/additionalAnalysis/analyzeMgPipeResults.m
@@ -1,6 +1,6 @@
 function analyzeMgPipeResults(infoFilePath,resPath,varargin)
 % This function takes simulation results generated by mgPipe as the input
-% and determines which computed fluxes and reaction abundances are
+% and determines which computed fluxes and reaction infoFiles are
 % significantly different between groups. Also creates violin plots of the 
 % simulation results. Requires a file with sample information (e.g., 
 % disease group, age) for the microbiome models that were generated and 
@@ -49,19 +49,20 @@ function analyzeMgPipeResults(infoFilePath,resPath,varargin)
 mkdir(violinPath)
 
 % Read in the file with sample information
-infoFile = readtable(infoFilePath, 'ReadVariableNames', false);
-infoFile = table2cell(infoFile);
+infoFile = readtable(infoFilePath);
+infoFile = [infoFile.Properties.VariableNames;table2cell(infoFile)];
 
 % get all spreadsheet files in results folder
 dInfo = dir(resPath);
 fileList={dInfo.name};
 fileList=fileList';
 fileList(~contains(fileList(:,1),{'.csv','.txt'}))=[];
+fileList(contains(fileList(:,1),{'ModelStat'}))=[];
 
 % analyze data in spreadsheets
 for i=1:length(fileList)
-    sampleData = readtable([resPath filesep fileList{i}], 'ReadVariableNames', false);
-    sampleData = table2cell(sampleData);
+    sampleData = readtable([resPath filesep fileList{i}]);
+    sampleData = [sampleData.Properties.VariableNames;table2cell(sampleData)];
 
     % merge columns for shadow price results
     if strcmp(sampleData{1,2},'Source')

diff --git a/...is/multiSpecies/microbiomeModelingToolbox/additionalAnalysis/calculateReactionAbundance.m b/...is/multiSpecies/microbiomeModelingToolbox/additionalAnalysis/calculateReactionAbundance.m
@@ -1,4 +1,4 @@
-function [ReactionAbundance,TaxonomyInfo] = calculateReactionAbundance(abundancePath, modelPath, infoFilePath, rxnsList, numWorkers)
+function [ReactionAbundance,TaxonomyInfo] = calculateReactionAbundance(abundancePath, modelPath, taxonomyPath, rxnsList, numWorkers)
 % Part of the Microbiome Modeling Toolbox. This function calculates and
 % plots the total abundance of reactions of interest in a given microbiome
 % sample based on the strain-level composition.
@@ -8,14 +8,14 @@
 %
 % USAGE
 %
-%    [ReactionAbundance,TaxonomyInfo] = calculateReactionAbundance(abundancePath, modelPath, infoFilePath, rxnsList, numWorkers)
+%    [ReactionAbundance,TaxonomyInfo] = calculateReactionAbundance(abundancePath, modelPath, taxonomyPath, rxnsList, numWorkers)
 %
 % INPUTS:
 %    abundancePath:          Path to the .csv file with the abundance data.
 %                            Example: 'cobratoolbox/papers/018_microbiomeModelingToolbox/examples/normCoverage.csv'
 %    modelPath:              Folder containing the strain-specific AGORA models
 % OPTIONAL INPUTS:
-%    infoFilePath:           Path to the spreadsheet with the taxonomy
+%    taxonomyPath:           Path to the spreadsheet with the taxonomy
 %                            information on organisms (default:
 %                            AGORA_infoFile.xlsx)
 %    rxnsList:               List of reactions for which the abundance
@@ -37,8 +37,8 @@
 %                           01/2020:    adapted to be suitable for pan-models
 
 % read the csv file with the abundance data
-abundance = readtable(abundancePath, 'ReadVariableNames', false);
-abundance = table2cell(abundance);
+abundance = readtable(abundancePath);
+abundance = [abundance.Properties.VariableNames;table2cell(abundance)];
 if isnumeric(abundance{2, 1})
     abundance(:, 1) = [];
 end
@@ -62,13 +62,12 @@
 end
 
 % Get the taxonomy information
-if exist('infoFilePath','var') && ~isempty(infoFilePath)
-    taxonomy = readtable(infoFilePath, 'ReadVariableNames', false);
-    taxonomy = table2cell(taxonomy);
+if exist('taxonomyPath','var') && ~isempty(taxonomyPath)
+    taxonomy = readtable(taxonomyPath);
 else
-    taxonomy = readtable('AGORA_infoFile.xlsx', 'ReadVariableNames', false);
-    taxonomy = table2cell(taxonomy);
+    taxonomy = readtable('AGORA_infoFile.xlsx');
 end
+taxonomy = [taxonomy.Properties.VariableNames;table2cell(taxonomy)];
 
 % load the models found in the individuals and extract which reactions are
 % in which model
@@ -186,48 +185,48 @@
             % check if the reaction is present in the strain
             if ReactionPresence{k, j + 1} == 1
                 % calculate total abundance
-                totalAbun(j) = totalAbun(j) + str2double(abundance{k, i});
+                totalAbun(j) = totalAbun(j) + abundance{k, i};
                 % calculate phylum abundance
                 t = 1;
                 findTax = taxonomy(find(strcmp(abundance{k, 1}, inputTaxa)), TaxonomyLevels{t, 3});
                 if any(strcmp(findTax{1}, TaxonomyLevels{t, 2}))
                     taxonCol = find(strcmp(findTax{1}, TaxonomyLevels{t, 2}));
-                    tmpPhyl(1, taxonCol) = tmpPhyl(1, taxonCol) + str2double(abundance{k, i});
+                    tmpPhyl(1, taxonCol) = tmpPhyl(1, taxonCol) + abundance{k, i};
                 end
                 % calculate class abundance
                 t = 2;
                 findTax = taxonomy(find(strcmp(abundance{k, 1}, inputTaxa)), TaxonomyLevels{t, 3});
                 if any(strcmp(findTax{1}, TaxonomyLevels{t, 2}))
                     taxonCol = find(strcmp(findTax{1}, TaxonomyLevels{t, 2}));
-                    tmpClass(1, taxonCol) = tmpClass(1, taxonCol) + str2double(abundance{k, i});
+                    tmpClass(1, taxonCol) = tmpClass(1, taxonCol) + abundance{k, i};
                 end
                 % calculate order abundance
                 t = 3;
                 findTax = taxonomy(find(strcmp(abundance{k, 1}, inputTaxa)), TaxonomyLevels{t, 3});
                 if any(strcmp(findTax{1}, TaxonomyLevels{t, 2}))
                     taxonCol = find(strcmp(findTax{1}, TaxonomyLevels{t, 2}));
-                    tmpOrder(1, taxonCol) = tmpOrder(1, taxonCol) + str2double(abundance{k, i});
+                    tmpOrder(1, taxonCol) = tmpOrder(1, taxonCol) + abundance{k, i};
                 end
                 % calculate family abundance
                 t = 4;
                 findTax = taxonomy(find(strcmp(abundance{k, 1}, inputTaxa)), TaxonomyLevels{t, 3});
                 if any(strcmp(findTax{1}, TaxonomyLevels{t, 2}))
                     taxonCol = find(strcmp(findTax{1}, TaxonomyLevels{t, 2}));
-                    tmpFamily(1, taxonCol) = tmpFamily(1, taxonCol) + str2double(abundance{k, i});
+                    tmpFamily(1, taxonCol) = tmpFamily(1, taxonCol) + abundance{k, i};
                 end
                 % calculate genus abundance
                 t = 5;
                 findTax = taxonomy(find(strcmp(abundance{k, 1}, inputTaxa)), TaxonomyLevels{t, 3});
                 if any(strcmp(findTax{1}, TaxonomyLevels{t, 2}))
                     taxonCol = find(strcmp(findTax{1}, TaxonomyLevels{t, 2}));
-                    tmpGenus(1, taxonCol) = tmpGenus(1, taxonCol) + str2double(abundance{k, i});
+                    tmpGenus(1, taxonCol) = tmpGenus(1, taxonCol) + abundance{k, i};
                 end
                 % calculate species abundance
                 t = 6;
                 findTax = taxonomy(find(strcmp(abundance{k, 1}, inputTaxa)), TaxonomyLevels{t, 3});
                 if any(strcmp(findTax{1}, TaxonomyLevels{t, 2}))
                     taxonCol = find(strcmp(findTax{1}, TaxonomyLevels{t, 2}));
-                    tmpSpecies(1, taxonCol) = tmpSpecies(1, taxonCol) + str2double(abundance{k, i});
+                    tmpSpecies(1, taxonCol) = tmpSpecies(1, taxonCol) + abundance{k, i};
                 end
             end
         end

diff --git a/...sis/multiSpecies/microbiomeModelingToolbox/additionalAnalysis/calculateReactionPresence.m b/...sis/multiSpecies/microbiomeModelingToolbox/additionalAnalysis/calculateReactionPresence.m
@@ -29,8 +29,8 @@
 % .. Author: - Almut Heinken, 01/2021
 
 % read the file with the abundance data
-abundance = readtable(abundancePath, 'ReadVariableNames', false);
-abundance = table2cell(abundance);
+abundance = readtable(abundancePath);
+abundance = [abundance.Properties.VariableNames;table2cell(abundance)];
 if isnumeric(abundance{2, 1})
     abundance(:, 1) = [];
 end
@@ -68,7 +68,7 @@
 microbes=abundance(2:end,1);
 for i=2:size(abundance,2)
     ReactionPresence{1,i}=abundance{1,i};
-    microbesInModels=microbes(find(str2double(abundance(2:end,i))>0),1);
+    microbesInModels=microbes(find(cell2mat(abundance(2:end,i))>0),1);
     for j=2:size(reactionInModels,1)
         ReactionPresence{j,1}=reactionInModels{j,1};
         microbesWithReaction=microbes(find(cell2mat(reactionInModels(j,2:end))==1),1);

diff --git a/...s/multiSpecies/microbiomeModelingToolbox/additionalAnalysis/calculateSubsystemAbundance.m b/...s/multiSpecies/microbiomeModelingToolbox/additionalAnalysis/calculateSubsystemAbundance.m
@@ -17,36 +17,36 @@
 % AUTHOR
 %       - Almut Heinken, 08/2020
 
-reactionDatabase = readtable('ReactionDatabase.txt', 'Delimiter', 'tab','TreatAsEmpty',['UND. -60001','UND. -2011','UND. -62011'], 'ReadVariableNames', false);
-reactionDatabase=table2cell(reactionDatabase);
+% load database
+database=loadVMHDatabase;
 
-reactionAbundance = readtable(reactionAbundancePath, 'ReadVariableNames', false);
-reactionAbundance = table2cell(reactionAbundance);
+reactionAbundance = readtable(reactionAbundancePath);
+reactionAbundance = [reactionAbundance.Properties.VariableNames;table2cell(reactionAbundance)];
 
 % remove biomass reaction
 reactionAbundance(find(strncmp(reactionAbundance(:,1),'bio',3)),:)=[];
 
 % remove reactions not in dataset
-[C,IA]=setdiff(reactionDatabase(:,1),reactionAbundance(:,1));
-reactionDatabase(IA,:)=[];
+[C,IA]=setdiff(database.reactions(:,1),reactionAbundance(:,1));
+database.reactions(IA,:)=[];
 
 % get and calculate all subsystems
-subs=unique(reactionDatabase(:,11));
+subs=unique(database.reactions(:,11));
 subs(find(strcmp(subs(:,1),'')),:)=[];
 
 subsystemAbundance(1,:)=reactionAbundance(1,:);
 subsystemAbundance{1,1}='Subsystems';
 
 for i=1:length(subs)
     subsystemAbundance{i+1,1}=subs{i};
-    rxns=reactionDatabase(find(strcmp(reactionDatabase(:,11),subs{i})),1);
+    rxns=database.reactions(find(strcmp(database.reactions(:,11),subs{i})),1);
     % use the fraction of abundance for all reactions in this subsystem
     % taken together
     abunTmp=zeros(1,size(reactionAbundance,2));
     for j=1:length(rxns)
         rxnInd=find(strcmp(reactionAbundance(:,1),rxns{j}));
         for k=2:size(reactionAbundance,2)
-            abunTmp(k)=abunTmp(k) + str2double(reactionAbundance{rxnInd,k});
+            abunTmp(k)=abunTmp(k) + reactionAbundance{rxnInd,k};
         end
     end
     for k=2:size(reactionAbundance,2)

diff --git a/...ltiSpecies/microbiomeModelingToolbox/additionalAnalysis/correlateFluxWithTaxonAbundance.m b/...ltiSpecies/microbiomeModelingToolbox/additionalAnalysis/correlateFluxWithTaxonAbundance.m
@@ -39,31 +39,37 @@
 %                                     changed flux input to a csv file.
 
 % read the csv file with the abundance data
-abundance = table2cell(readtable(abundancePath, 'ReadVariableNames', false));
+abundance = readtable(abundancePath);
+abundance = [abundance.Properties.VariableNames;table2cell(abundance)];
 if isnumeric(abundance{2, 1})
     abundance(:, 1) = [];
 end
 
-fluxes = table2cell(readtable(fluxPath, 'ReadVariableNames', false));
+fluxes = readtable(fluxPath);
+fluxes = [fluxes.Properties.VariableNames;table2cell(fluxes)];
 
-metaboliteDatabase = readtable('MetaboliteDatabase.txt', 'Delimiter', 'tab','TreatAsEmpty',['UND. -60001','UND. -2011','UND. -62011'], 'ReadVariableNames', false);
-metaboliteDatabase=table2cell(metaboliteDatabase);
+% check if data is from same samples
+if ~isempty(setdiff(fluxes(1,2:end),abundance(1,2:end)))
+    error('Sample IDs in abundance and flux files do not agree!')
+end
+
+% load database
+database=loadVMHDatabase;
 
 fluxes(:,1)=strrep(fluxes(:,1),'EX_','');
 fluxes(:,1)=strrep(fluxes(:,1),'(e)','');
 fluxes(:,1)=strrep(fluxes(:,1),'[fe]','');
 % for i=2:size(fluxes,1)
-% fluxes{i,1}=metaboliteDatabase{find(strcmp(metaboliteDatabase(:,1),fluxes{i,1})),2};
+% fluxes{i,1}=database.metabolites{find(strcmp(database.metabolites(:,1),fluxes{i,1})),2};
 % end
 
 % Get the taxonomy information
 if exist('infoFilePath','var')
-    taxonomy = readtable(infoFilePath, 'ReadVariableNames', false);
-    taxonomy = table2cell(taxonomy);
+    taxonomy = readtable(infoFilePath);
 else
-    taxonomy = readtable('AGORA_infoFile.xlsx', 'ReadVariableNames', false);
-    taxonomy = table2cell(taxonomy);
+    taxonomy = readtable('AGORA_infoFile.xlsx');
 end
+taxonomy = [taxonomy.Properties.VariableNames;table2cell(taxonomy)];
 
 if ~exist('corrMethod', 'var')  % Define correlation coefficient method if not entered
     corrMethod = 'Pearson';
@@ -135,7 +141,7 @@
                 % variable
                 findinSampleAbun = find(strcmp(findTax{1}, SampleAbundance.(TaxonomyLevels{t})(1, :)));
                 % sum up the relative abundance
-                SampleAbundance.(TaxonomyLevels{t}){i, findinSampleAbun} = SampleAbundance.(TaxonomyLevels{t}){i, findinSampleAbun} + str2double(abundance{j, i});
+                SampleAbundance.(TaxonomyLevels{t}){i, findinSampleAbun} = SampleAbundance.(TaxonomyLevels{t}){i, findinSampleAbun} + abundance{j, i};
             end
             end
         end
@@ -217,7 +223,7 @@
 % translate to metabolite descriptions
 for t = 2:size(TaxonomyLevels, 1)
     for i=2:size(FluxCorrelations.(TaxonomyLevels{t}),2)
-        FluxCorrelations.(TaxonomyLevels{t}){1,i}=metaboliteDatabase{find(strcmp(metaboliteDatabase(:,1),FluxCorrelations.(TaxonomyLevels{t}){1,i})),2};
+        FluxCorrelations.(TaxonomyLevels{t}){1,i}=database.metabolites{find(strcmp(database.metabolites(:,1),FluxCorrelations.(TaxonomyLevels{t}){1,i})),2};
     end
 end
 

diff --git a/...ultiSpecies/microbiomeModelingToolbox/additionalAnalysis/fastCalculateReactionAbundance.m b/...ultiSpecies/microbiomeModelingToolbox/additionalAnalysis/fastCalculateReactionAbundance.m
@@ -29,8 +29,8 @@
 % .. Author: - Almut Heinken, 04/2021
 
 % read the csv file with the abundance data
-abundance = readtable(abundancePath, 'ReadVariableNames', false);
-abundance = table2cell(abundance);
+abundance = readtable(abundancePath);
+abundance = [abundance.Properties.VariableNames;table2cell(abundance)];
 if isnumeric(abundance{2, 1})
     abundance(:, 1) = [];
 end
@@ -106,7 +106,7 @@
 
         for k = 1:length(presentRxns)
             % summarize total abundance
-            totalAbun{i}(presentRxns(k),1) = totalAbun{i}(presentRxns(k),1) + str2double(abundance{j,i});
+            totalAbun{i}(presentRxns(k),1) = totalAbun{i}(presentRxns(k),1) + abundance{j,i};
         end
     end
 end

diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/additionalAnalysis/makeViolinPlots.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/additionalAnalysis/makeViolinPlots.m
@@ -37,8 +37,7 @@ function makeViolinPlots(sampleData, sampleInformation, varargin)
 unit = parser.Results.unit;
 
 % read metabolite database
-metaboliteDatabase = readtable('MetaboliteDatabase.txt', 'Delimiter', 'tab','TreatAsEmpty',['UND. -60001','UND. -2011','UND. -62011'], 'ReadVariableNames', false);
-metaboliteDatabase=table2cell(metaboliteDatabase);
+database = loadVMHDatabase;
 
 % find the column with the sample information to split the samples by
 if ~isempty(stratification)
@@ -66,15 +65,15 @@ function makeViolinPlots(sampleData, sampleInformation, varargin)
     % get the predicted metabolite
     varname=strrep(sampleData{i,1},'EX_','');
     varname=strrep(varname,'[fe]','');
-    if ~isempty(find(strcmp(metaboliteDatabase(:,1),varname)))
-        varname=metaboliteDatabase{find(strcmp(metaboliteDatabase(:,1),varname)),2};
+    if ~isempty(find(strcmp(database.metabolites(:,1),varname)))
+        varname=database.metabolites{find(strcmp(database.metabolites(:,1),varname)),2};
     end
     figure;
     % plot the violins
     % if there are nonzero values in each stratification group and the
     % values aren't all the same
     strats=unique(sampleStratification);
-    plotdata=str2double(sampleData(i,2:end))';
+    plotdata=cell2mat(sampleData(i,2:end))';
     for j=1:length(strats)
         valsinstrat(j)=sum(plotdata(find(strcmp(sampleStratification,strats{j}))));
         uniquevals(j)=numel(unique(plotdata(find(strcmp(sampleStratification,strats{j})))));
@@ -83,20 +82,19 @@ function makeViolinPlots(sampleData, sampleInformation, varargin)
         hold on
         violinplot(plotdata,sampleStratification);
         if length(strats) > 3
-            set(gca, 'FontSize', 14)
+            set(gca, 'FontSize', 12)
         else
-            set(gca, 'FontSize', 18)
+            set(gca, 'FontSize', 14)
         end
         if length(strats) > 6
             xtickangle(45)
         end
-        box on
-        ylim([0 max(max(str2double(sampleData(i,2:end))))])
+        ylim([0 max(max(cell2mat(sampleData(i,2:end))))])
         if ~isempty(unit)
             h=ylabel(unit);
             set(h,'interpreter','none')
         end
-        h=title(varname,'FontSize',24);
+        h=title(varname,'FontSize',14);
         set(h,'interpreter','none')
         if ~isempty(plottedFeature)
             h=suptitle(plottedFeature);