From 32b867030db32b9e21e4b3ce61f774265d0fea64 Mon Sep 17 00:00:00 2001 From: mschwoerer <82171591+mschwoer@users.noreply.github.com> Date: Fri, 20 Sep 2024 14:06:16 +0200 Subject: [PATCH 1/2] rename metadata_path -> metadata_path_or_df --- alphastats/DataSet.py | 6 +++--- alphastats/dataset_factory.py | 8 ++++---- alphastats/gui/pages/02_Import Data.py | 2 +- alphastats/gui/utils/import_helper.py | 8 ++++++-- docs/import_data.md | 4 ++-- docs/workflow_mq.html | 2 +- nbs/getting_started.ipynb | 2 +- nbs/liu_2019.ipynb | 2 +- nbs/ramus_2016.ipynb | 18 +++++++++--------- tests/gui/test_04_preprocessing.py | 2 +- tests/test_DataSet.py | 22 +++++++++++----------- tests/test_DataSet_Pathway.py | 2 +- tests/test_gpt.py | 2 +- 13 files changed, 42 insertions(+), 38 deletions(-) diff --git a/alphastats/DataSet.py b/alphastats/DataSet.py index ae06517e..3c36e459 100644 --- a/alphastats/DataSet.py +++ b/alphastats/DataSet.py @@ -47,14 +47,14 @@ class DataSet: def __init__( self, loader: BaseLoader, - metadata_path: Optional[Union[str, pd.DataFrame]] = None, + metadata_path_or_df: Optional[Union[str, pd.DataFrame]] = None, sample_column: Optional[str] = None, ): """Create DataSet Args: loader (_type_): loader of class AlphaPeptLoader, MaxQuantLoader, DIANNLoader, FragPipeLoader, SpectronautLoader - metadata_path (str or pd.DataFrame, optional): path to metadata file or an actual df. Defaults to None. + metadata_path_or_df (str or pd.DataFrame, optional): path to metadata file or an actual df. Defaults to None. sample_column (str, optional): column in metadata file indicating the sample IDs. Defaults to None. Attributes of a DataSet instance: @@ -86,7 +86,7 @@ def __init__( rawinput=self.rawinput, index_column=self.index_column, intensity_column=self._intensity_column, - metadata_path=metadata_path, + metadata_path_or_df=metadata_path_or_df, sample_column=sample_column, ) diff --git a/alphastats/dataset_factory.py b/alphastats/dataset_factory.py index 48f89862..9e312124 100644 --- a/alphastats/dataset_factory.py +++ b/alphastats/dataset_factory.py @@ -15,14 +15,14 @@ def __init__( rawinput: pd.DataFrame, index_column: str, intensity_column: Union[List[str], str], - metadata_path: Union[str, pd.DataFrame], + metadata_path_or_df: Union[str, pd.DataFrame], sample_column: str, ): self.rawinput: pd.DataFrame = rawinput self.sample_column: str = sample_column self.index_column: str = index_column self.intensity_column: Union[List[str], str] = intensity_column - self.metadata_path: Union[str, pd.DataFrame] = metadata_path + self.metadata_path_or_df: Union[str, pd.DataFrame] = metadata_path_or_df def create_matrix_from_rawinput(self) -> Tuple[pd.DataFrame, pd.DataFrame]: """Creates a matrix: features (Proteins) as columns, samples as rows.""" @@ -61,9 +61,9 @@ def _check_matrix_values(mat: pd.DataFrame) -> None: def create_metadata(self, mat: pd.DataFrame) -> Tuple[pd.DataFrame, str]: """Create metadata DataFrame from metadata file or DataFrame.""" - if self.metadata_path is not None: + if self.metadata_path_or_df is not None: sample = self.sample_column - metadata = self._load_metadata(file_path=self.metadata_path) + metadata = self._load_metadata(file_path=self.metadata_path_or_df) metadata = self._remove_missing_samples_from_metadata(mat, metadata, sample) else: sample = "sample" diff --git a/alphastats/gui/pages/02_Import Data.py b/alphastats/gui/pages/02_Import Data.py index 68b17785..967485c1 100644 --- a/alphastats/gui/pages/02_Import Data.py +++ b/alphastats/gui/pages/02_Import Data.py @@ -158,7 +158,7 @@ def _finalize_data_loading( dataset = DataSet( loader=loader, - metadata_path=metadatafile_df, + metadata_path_or_df=metadatafile_df, sample_column=sample_column, ) metadata_columns = metadatafile_df.columns.to_list() diff --git a/alphastats/gui/utils/import_helper.py b/alphastats/gui/utils/import_helper.py index acf42d16..52d240b5 100644 --- a/alphastats/gui/utils/import_helper.py +++ b/alphastats/gui/utils/import_helper.py @@ -108,7 +108,9 @@ def load_example_data(): loader = MaxQuantLoader(file=filepath) # TODO why is this done twice? - dataset = DataSet(loader=loader, metadata_path=metadatapath, sample_column="sample") + dataset = DataSet( + loader=loader, metadata_path_or_df=metadatapath, sample_column="sample" + ) metadatapath = ( os.path.join(_parent_directory, "sample_data", "metadata.xlsx") .replace("pages/", "") @@ -116,7 +118,9 @@ def load_example_data(): ) loader = MaxQuantLoader(file=filepath) - dataset = DataSet(loader=loader, metadata_path=metadatapath, sample_column="sample") + dataset = DataSet( + loader=loader, metadata_path_or_df=metadatapath, sample_column="sample" + ) dataset.metadata = dataset.metadata[ [ diff --git a/docs/import_data.md b/docs/import_data.md index 729129bb..c5b6bb76 100644 --- a/docs/import_data.md +++ b/docs/import_data.md @@ -14,7 +14,7 @@ maxquant_data = alphastats.MaxQuantLoader( dataset = alphastats.DataSet( loader = maxquant_data, - metadata_path="../testfiles/maxquant/metadata.xlsx", + metadata_path_or_df="../testfiles/maxquant/metadata.xlsx", sample_column="sample" ) ``` @@ -126,7 +126,7 @@ maxquant_data = alphastats.MaxQuantLoader( dataset = alphastats.DataSet( loader = maxquant_data, - metadata_path="../testfiles/maxquant/metadata.xlsx", + metadata_path_or_df="../testfiles/maxquant/metadata.xlsx", sample_column="sample" ) ``` diff --git a/docs/workflow_mq.html b/docs/workflow_mq.html index b0189e2b..15db3cbd 100644 --- a/docs/workflow_mq.html +++ b/docs/workflow_mq.html @@ -781,7 +781,7 @@
ds = alphastats.DataSet(
loader = maxquant_data,
- metadata_path = "../testfiles/maxquant/metadata.xlsx",
+ metadata_path_or_df = "../testfiles/maxquant/metadata.xlsx",
sample_column = "sample" # specify the column that corresponds to the sample names in proteinGroups
)
0?[0]:[]);if(o.enter().append(\"g\").classed(f.containerClassName,!0).style(\"cursor\",\"pointer\"),o.exit().each((function(){n.select(this).selectAll(\"g.\"+f.headerGroupClassName).each(a)})).remove(),0!==r.length){var l=o.selectAll(\"g.\"+f.headerGroupClassName).data(r,p);l.enter().append(\"g\").classed(f.headerGroupClassName,!0);for(var u=s.ensureSingle(o,\"g\",f.dropdownButtonGroupClassName,(function(t){t.style(\"pointer-events\",\"all\")})),c=0;c 90&&i.log(\"Long binary search...\"),h-1},e.sorterAsc=function(t,e){return t-e},e.sorterDes=function(t,e){return e-t},e.distinctVals=function(t){var r,n=t.slice();for(n.sort(e.sorterAsc),r=n.length-1;r>-1&&n[r]===o;r--);for(var i,a=n[r]-n[0]||1,s=a/(r||1)/1e4,l=[],u=0;u<=r;u++){var c=n[u],f=c-i;void 0===i?(l.push(c),i=c):f>s&&(a=Math.min(a,f),l.push(c),i=c)}return{vals:l,minDiff:a}},e.roundUp=function(t,e,r){for(var n,i=0,a=e.length-1,o=0,s=r?0:1,l=r?1:0,u=r?Math.ceil:Math.floor;i0&&(n=1),r&&n)return t.sort(e)}return n?t:t.reverse()},e.findIndexOfMin=function(t,e){e=e||a;for(var r,n=1/0,i=0;il?r.y-l:0;return Math.sqrt(u*u+f*f)}for(var p=h(u);p;){if((u+=p+r)>f)return;p=h(u)}for(p=h(f);p;){if(u>(f-=p+r))return;p=h(f)}return{min:u,max:f,len:f-u,total:c,isClosed:0===u&&f===c&&Math.abs(n.x-i.x)<.1&&Math.abs(n.y-i.y)<.1}},e.findPointOnPath=function(t,e,r,n){for(var i,a,o,s=(n=n||{}).pathLength||t.getTotalLength(),l=n.tolerance||.001,u=n.iterationLimit||30,c=t.getPointAtLength(0)[r]>t.getPointAtLength(s)[r]?-1:1,f=0,h=0,p=s;f0?p=i:h=i,f++}return a}},81697:function(t,e,r){\"use strict\";var n=r(92770),i=r(84267),a=r(25075),o=r(21081),s=r(22399).defaultLine,l=r(73627).isArrayOrTypedArray,u=a(s);function c(t,e){var r=t;return r[3]*=e,r}function f(t){if(n(t))return u;var e=a(t);return e.length?e:u}function h(t){return n(t)?t:1}t.exports={formatColor:function(t,e,r){var n,i,s,p,d,v=t.color,g=l(v),y=l(e),m=o.extractOpts(t),x=[];if(n=void 0!==m.colorscale?o.makeColorScaleFuncFromTrace(t):f,i=g?function(t,e){return void 0===t[e]?u:a(n(t[e]))}:f,s=y?function(t,e){return void 0===t[e]?1:h(t[e])}:h,g||y)for(var b=0;b
/i;e.BR_TAG_ALL=/
/gi;var _=/(^|[\\s\"'])style\\s*=\\s*(\"([^\"]*);?\"|'([^']*);?')/i,w=/(^|[\\s\"'])href\\s*=\\s*(\"([^\"]*)\"|'([^']*)')/i,T=/(^|[\\s\"'])target\\s*=\\s*(\"([^\"\\s]*)\"|'([^'\\s]*)')/i,k=/(^|[\\s\"'])popup\\s*=\\s*(\"([\\w=,]*)\"|'([\\w=,]*)')/i;function A(t,e){if(!t)return null;var r=t.match(e),n=r&&(r[3]||r[4]);return n&&L(n)}var M=/(^|;)\\s*color:/;e.plainText=function(t,e){for(var r=void 0!==(e=e||{}).len&&-1!==e.len?e.len:1/0,n=void 0!==e.allowedTags?e.allowedTags:[\"br\"],i=t.split(m),a=[],o=\"\",s=0,l=0;l
\"+l;e.text=u}(t,o,r,u):\"log\"===c?function(t,e,r,n,a){var o=t.dtick,l=e.x,u=t.tickformat,c=\"string\"==typeof o&&o.charAt(0);if(\"never\"===a&&(a=\"\"),n&&\"L\"!==c&&(o=\"L3\",c=\"L\"),u||\"L\"===c)e.text=bt(Math.pow(10,l),t,a,n);else if(i(o)||\"D\"===c&&s.mod(l+.01,1)<.1){var f=Math.round(l),h=Math.abs(f),p=t.exponentformat;\"power\"===p||mt(p)&&xt(f)?(e.text=0===f?1:1===f?\"10\":\"10\"+(f>1?\"\":P)+h+\"\",e.fontSize*=1.25):(\"e\"===p||\"E\"===p)&&h>2?e.text=\"1\"+p+(f>0?\"+\":P)+h:(e.text=bt(Math.pow(10,l),t,\"\",\"fakehover\"),\"D1\"===o&&\"y\"===t._id.charAt(0)&&(e.dy-=e.fontSize/6))}else{if(\"D\"!==c)throw\"unrecognized dtick \"+String(o);e.text=String(Math.round(Math.pow(10,s.mod(l,1)))),e.fontSize*=.75}if(\"D1\"===t.dtick){var d=String(e.text).charAt(0);\"0\"!==d&&\"1\"!==d||(\"y\"===t._id.charAt(0)?e.dx-=e.fontSize/4:(e.dy+=e.fontSize/2,e.dx+=(t.range[1]>t.range[0]?1:-1)*e.fontSize*(l<0?.5:.25)))}}(t,o,0,u,v):\"category\"===c?function(t,e){var r=t._categories[Math.round(e.x)];void 0===r&&(r=\"\"),e.text=String(r)}(t,o):\"multicategory\"===c?function(t,e,r){var n=Math.round(e.x),i=t._categories[n]||[],a=void 0===i[1]?\"\":String(i[1]),o=void 0===i[0]?\"\":String(i[0]);r?e.text=o+\" - \"+a:(e.text=a,e.text2=o)}(t,o,r):Dt(t)?function(t,e,r,n,i){if(\"radians\"!==t.thetaunit||r)e.text=bt(e.x,t,i,n);else{var a=e.x/180;if(0===a)e.text=\"0\";else{var o=function(t){function e(t,e){return Math.abs(t-e)<=1e-6}var r=function(t){for(var r=1;!e(Math.round(t*r)/r,t);)r*=10;return r}(t),n=t*r,i=Math.abs(function t(r,n){return e(n,0)?r:t(n,r%n)}(n,r));return[Math.round(n/i),Math.round(r/i)]}(a);if(o[1]>=100)e.text=bt(s.deg2rad(e.x),t,i,n);else{var l=e.x<0;1===o[1]?1===o[0]?e.text=\"π\":e.text=o[0]+\"π\":e.text=[\"\",o[0],\"\",\"⁄\",\"\",o[1],\"\",\"π\"].join(\"\"),l&&(e.text=P+e.text)}}}}(t,o,r,u,v):function(t,e,r,n,i){\"never\"===i?i=\"\":\"all\"===t.showexponent&&Math.abs(e.x/t.dtick)<1e-6&&(i=\"hide\"),e.text=bt(e.x,t,i,n)}(t,o,0,u,v),n||(t.tickprefix&&!d(t.showtickprefix)&&(o.text=t.tickprefix+o.text),t.ticksuffix&&!d(t.showticksuffix)&&(o.text+=t.ticksuffix)),t.labelalias&&t.labelalias.hasOwnProperty(o.text)){var g=t.labelalias[o.text];\"string\"==typeof g&&(o.text=g)}if(\"boundaries\"===t.tickson||t.showdividers){var y=function(e){var r=t.l2p(e);return r>=0&&r<=t._length?e:null};o.xbnd=[y(o.x-.5),y(o.x+t.dtick-.5)]}return o},q.hoverLabelText=function(t,e,r){r&&(t=s.extendFlat({},t,{hoverformat:r}));var n=Array.isArray(e)?e[0]:e,i=Array.isArray(e)?e[1]:void 0;if(void 0!==i&&i!==n)return q.hoverLabelText(t,n,r)+\" - \"+q.hoverLabelText(t,i,r);var a=\"log\"===t.type&&n<=0,o=q.tickText(t,t.c2l(a?-n:n),\"hover\").text;return a?0===n?\"0\":P+o:o};var yt=[\"f\",\"p\",\"n\",\"μ\",\"m\",\"\",\"k\",\"M\",\"G\",\"T\"];function mt(t){return\"SI\"===t||\"B\"===t}function xt(t){return t>14||t<-15}function bt(t,e,r,n){var a=t<0,o=e._tickround,l=r||e.exponentformat||\"B\",u=e._tickexponent,c=q.getTickFormat(e),f=e.separatethousands;if(n){var h={exponentformat:l,minexponent:e.minexponent,dtick:\"none\"===e.showexponent?e.dtick:i(t)&&Math.abs(t)||1,range:\"none\"===e.showexponent?e.range.map(e.r2d):[0,t||1]};vt(h),o=(Number(h._tickround)||0)+4,u=h._tickexponent,e.hoverformat&&(c=e.hoverformat)}if(c)return e._numFormat(c)(t).replace(/-/g,P);var p,d=Math.pow(10,-o)/2;if(\"none\"===l&&(u=0),(t=Math.abs(t))
\")):x=h.textLabel;var L={x:h.traceCoordinate[0],y:h.traceCoordinate[1],z:h.traceCoordinate[2],data:w._input,fullData:w,curveNumber:w.index,pointNumber:T};d.appendArrayPointValue(L,w,T),t._module.eventData&&(L=w._module.eventData(L,h,w,{},T));var C={points:[L]};if(e.fullSceneLayout.hovermode){var P=[];d.loneHover({trace:w,x:(.5+.5*m[0]/m[3])*s,y:(.5-.5*m[1]/m[3])*l,xLabel:k.xLabel,yLabel:k.yLabel,zLabel:k.zLabel,text:x,name:c.name,color:d.castHoverOption(w,T,\"bgcolor\")||c.color,borderColor:d.castHoverOption(w,T,\"bordercolor\"),fontFamily:d.castHoverOption(w,T,\"font.family\"),fontSize:d.castHoverOption(w,T,\"font.size\"),fontColor:d.castHoverOption(w,T,\"font.color\"),nameLength:d.castHoverOption(w,T,\"namelength\"),textAlign:d.castHoverOption(w,T,\"align\"),hovertemplate:f.castOption(w,T,\"hovertemplate\"),hovertemplateLabels:f.extendFlat({},L,k),eventData:[L]},{container:n,gd:r,inOut_bbox:P}),L.bbox=P[0]}h.distance<5&&(h.buttons||_)?r.emit(\"plotly_click\",C):r.emit(\"plotly_hover\",C),this.oldEventData=C}else d.loneUnhover(n),this.oldEventData&&r.emit(\"plotly_unhover\",this.oldEventData),this.oldEventData=void 0;e.drawAnnotations(e)},T.recoverContext=function(){var t=this;t.glplot.dispose();var e=function(){t.glplot.gl.isContextLost()?requestAnimationFrame(e):t.initializeGLPlot()?t.plot.apply(t,t.plotArgs):f.error(\"Catastrophic and unrecoverable WebGL error. Context lost.\")};requestAnimationFrame(e)};var A=[\"xaxis\",\"yaxis\",\"zaxis\"];function M(t,e,r){for(var n=t.fullSceneLayout,i=0;i<3;i++){var a=A[i],o=a.charAt(0),s=n[a],l=e[o],u=e[o+\"calendar\"],c=e[\"_\"+o+\"length\"];if(f.isArrayOrTypedArray(l))for(var h,p=0;p<(c||l.length);p++)if(f.isArrayOrTypedArray(l[p]))for(var d=0;d
\");b.text(T).attr(\"data-unformatted\",T).call(f.convertToTspans,t),_=c.bBox(b.node())}b.attr(\"transform\",a(-3,8-_.height)),x.insert(\"rect\",\".static-attribution\").attr({x:-_.width-6,y:-_.height-3,width:_.width+6,height:_.height+3,fill:\"rgba(255, 255, 255, 0.75)\"});var k=1;_.width+6>w&&(k=w/(_.width+6));var A=[n.l+n.w*h.x[1],n.t+n.h*(1-h.y[0])];x.attr(\"transform\",a(A[0],A[1])+o(k))}},e.updateFx=function(t){for(var e=t._fullLayout,r=e._subplots[p],n=0;n
\")}(e,r,n,i):v.getValue(s.text,r),v.coerceString(m,o)}(C,n,i,T,M);w=function(t,e){var r=v.getValue(t.textposition,e);return v.coerceEnumerated(x,r)}(O,i);var z=\"stack\"===g.mode||\"relative\"===g.mode,R=n[i],F=!z||R._outmost;if(D&&\"none\"!==w&&(!R.isBlank&&s!==u&&f!==p||\"auto\"!==w&&\"inside\"!==w)){var B=C.font,N=d.getBarColor(n[i],O),j=d.getInsideTextFont(O,i,B,N),U=d.getOutsideTextFont(O,i,B),V=r.datum();I?\"log\"===T.type&&V.s0<=0&&(s=T.range[0]