From 0c2b2e634a63817619af58ea1ebbfc14865856fe Mon Sep 17 00:00:00 2001 From: Michael Cullan Date: Sun, 19 Jun 2022 04:59:56 -0400 Subject: [PATCH 01/11] Implement ClassTransformationReg model (#111) * added intial version of ClassTransformationReg model * add to __init__ * :wrench: Remove reshape Co-authored-by: Maksim Shevchenko Co-authored-by: Irina Elisova Co-authored-by: Irina Elisova --- sklift/models/__init__.py | 7 +- sklift/models/models.py | 133 +++++++++++++++++++++++++++++++++++++- 2 files changed, 134 insertions(+), 6 deletions(-) diff --git a/sklift/models/__init__.py b/sklift/models/__init__.py index 05ef032..81290fe 100644 --- a/sklift/models/__init__.py +++ b/sklift/models/__init__.py @@ -1,6 +1,3 @@ -from .models import (SoloModel, ClassTransformation, TwoModels) +from .models import SoloModel, ClassTransformation, ClassTransformationReg, TwoModels -__all__ = [ - 'SoloModel', - 'ClassTransformation', - 'TwoModels'] +__all__ = [SoloModel, ClassTransformation, ClassTransformationReg, TwoModels] diff --git a/sklift/models/models.py b/sklift/models/models.py index 16f3c08..83b9a84 100644 --- a/sklift/models/models.py +++ b/sklift/models/models.py @@ -22,7 +22,7 @@ class SoloModel(BaseEstimator): Args: estimator (estimator object implementing 'fit'): The object to use to fit the data. method (string, ’dummy’ or ’treatment_interaction’, default='dummy'): Specifies the approach: - + * ``'dummy'``: Single model; * ``'treatment_interaction'``: @@ -268,6 +268,137 @@ def predict(self, X): return uplift +class ClassTransformationReg(BaseEstimator): + """aka CATE (Conditional Average Treatment Effect) generating transformation approach for continuous labels. + + Redefine target variable, which indicates that treatment make some impact on target or + did target is negative without treatment: ``Z = Y * (W - p)/(p * (1 - p))``, + + where ``Y`` - target vector, ``W`` - vector of binary communication flags, and ``p`` is a propensity score (the probabilty that each y_i is assigned to the treatment group.). + + Then, train a regressor on ``Z`` to predict uplift. + + Returns uplift predictions and optionally propensity predictions. + + The propensity score can be a scalar value (e.g. p = 0.5), which would mean that every subject has identical probability of being assigned to the treatment group. + + Alternatively, the propensity can be learned using a Classifier model. In this case, the model predicts the probability that a given subject would be assigned to the treatment group. + + Read more in the :ref:`User Guide `. + + Args: + estimator (estimator object implementing 'fit'): The object to use to fit the data. + propensity_val (float): A constant propensity value, which assumes every subject has equal probability of assignment to the treatment group. + propensity_estimator (estimator object with `predict_proba`): The object used to predict the propensity score if `propensity_val` is not given. + + + Example:: + + # import approach + from sklift.models import ClassTransformationReg + # import any estimator adheres to scikit-learn conventions + from sklearn.linear_model import LinearRegression, LogisticRegression + + + # define approach + ct = ClassTransformationReg(estimator=LinearRegression, propensity_estimator=LogisticRegression()) + # fit the model + ct = ct.fit(X_train, y_train, treat_train) + # predict uplift + uplift_ct = ct.predict(X_val) + + References: + Maciej Jaskowski and Szymon Jaroszewicz. Uplift modeling for clinical trial data. + ICML Workshop on Clinical Data Analysis, 2012. + + See Also: + + **Other approaches:** + + * :class:`.SoloModel`: Single model approach. + * :class:`.TwoModels`: Double classifier approach. + * :classL1`.ClassTransformation`: Binary classifier transformation approach. + """ + + def __init__(self, estimator, propensity_val=None, propensity_estimator=None): + + if (propensity_val is None) and (propensity_estimator is None): + raise ValueError('`propensity_val` and `propensity_estimator` cannot both be equal to `None`. Both arguments are currently null.') + elif (propensity_val is not None) and (propensity_estimator is not None): + raise ValueError('Exactly one of (`propensity_val`, `propensity_estimator`) must be None, and the other must be defined. Both arguments are currently non-null.') + + self.estimator = estimator + self.propensity_val = propensity_val + self.propensity_estimator = propensity_estimator + + self._type_of_target = None + + def fit(self, X, y, treatment, estimator_fit_params=None): + """Fit the model according to the given training data. + + Args: + X (array-like, shape (n_samples, n_features)): Training vector, where n_samples is the number of samples and + n_features is the number of features. + y (array-like, shape (n_samples,)): Target vector relative to X. + treatment (array-like, shape (n_samples,)): Binary treatment vector relative to X. + estimator_fit_params (dict, optional): Parameters to pass to the fit method of the estimator. + + Returns: + object: self + """ + + check_consistent_length(X, y, treatment) + check_is_binary(treatment) + self._type_of_target = type_of_target(y) + + if self.propensity_val is not None: + p = self.propensity_val + + elif self.propensity_estimator is not None: + self.propensity_estimator.fit(X, treatment) + p = self.propensity_estimator.predict_proba(X)[:, 1] + + y_mod = y * ((treatment - p) / (p * (1 - p))) + + if estimator_fit_params is None: + estimator_fit_params = {} + + self.estimator.fit(X, y_mod, **estimator_fit_params) + + return self + + + def predict_propensity(self, X): + """Predict propensity values. + + Args: + X (array-like, shape (n_samples, n_features)): Training vector, where n_samples is the number of samples + and n_features is the number of features. + + Returns: + array (shape (n_samples,)): propensity + """ + + if self.propensity_estimator is not None: + return self.propensity_estimator.predict_proba(X)[:, 1] + else: + return self.propensity_val + + def predict(self, X): + """Perform uplift on samples in X. + + Args: + X (array-like, shape (n_samples, n_features)): Training vector, where n_samples is the number of samples + and n_features is the number of features. + + Returns: + array (shape (n_samples,)): uplift + """ + + uplift = self.estimator.predict(X) + return uplift + + class TwoModels(BaseEstimator): """aka naïve approach, or difference score method, or double classifier approach. From 8181066508fb8e8dab3fe0a59ad82be77beebe7e Mon Sep 17 00:00:00 2001 From: Maksim Shevchenko Date: Mon, 4 Jul 2022 15:01:34 +0300 Subject: [PATCH 02/11] :dizzy: Add scheme of x5 dataframes (#184) Add scheme of x5 dataframes --- docs/_static/images/x5_table_scheme.png | Bin 0 -> 39758 bytes sklift/datasets/descr/x5.rst | 3 +++ 2 files changed, 3 insertions(+) create mode 100644 docs/_static/images/x5_table_scheme.png diff --git a/docs/_static/images/x5_table_scheme.png b/docs/_static/images/x5_table_scheme.png new file mode 100644 index 0000000000000000000000000000000000000000..1d65bcaa83cb58a1cac6755e8eb485e80d11edd6 GIT binary patch literal 39758 zcmeFZbx>T}7bZ#w3BeQGNrJn(CBbRj-5TrQ?ht|nm&P^0gKOiEV2u+R_dw$s+~GC% z{^ZX4bEayhUcE`36sHeo@3Z$hd$09<-&#$$ijoW#Iw?955)zi2tfU$e5^^=-KlzL2 zh$|~P=M;z^C|2(k-ypX{uO%FuEnf4o@vw2c6+?gh`n8C&xuuYr zr1Zaf&YZYU;^q??U}| zC;#q8(!vGk403b@IoQ8`?AO%H!Od0l?c2u({pa7`<8%dC{`X1tF8`VqVuI|CzhUQO z<6!?!--xCnk5`41ok13eCqMQt#wqgGo&UqNe?3Qp{qf=d7|h=@{p%`Xs$%FO?Ef*F z7`j3yy)F{cJ0v;D_Zps0_Oj4g4d8bV%94ED?ayDmdJB1h_7O|11Tr_LrcwXVfwg{8 ztqy{JHw*ZlSVA{<%bT1_D=#m_sK@%TRJ7O+#CV@`v2fC+MCW2PvLMXuv9UCIadtga zn`z~jLSh?{|y`sx23|JMTln=DW#5-7$h9z>Y=<0xg}ZesWF)=CEhdr^Wf zuo+Lsx;;=i;2|SNxBY;kDJUKHPqUVKrhQr1E5u0-o{)!Mx?Ab`i{C%|Z5pBn2#z@M zw(3zv?s~Giy!Y4iJy(sU^)A=Ew%q7{yOoQ5*(_16wVy;$`0?LkN9Ko;?cGeX1s=zf ze;8tFg{(GOoSAUef?y1;veZK$P_XMvP?}>1=8}hED;{Rlqx5CwDz}@6s z$=_*3IkIdmTYDdNskKOx_Kwbi(2rc`vQtIQ` zJPzC%pu>FburIb6@Pb>03+@s`G4ThB8TtUb&Q zn|9CFMg!7S`yO`0mP_o4DC*pycKo#BH~mc^dHMk>fc_Zc>yN0kS$4>DCdl>le^Pu7 zmHeH5;$UFm=vwLuSJ%tz5oV-*%9^1Q6QtBT!Am_`J?YOHcVNVHO`4qkc>lyF7}4KxkgNN67$T@UImpI!|@L z&oPO08vK`hPtMHmuZSi{C#@i2C5AY8d8S>uOzwXZ7MThBJgB{9gR?{XkiHXQu-(4&zF8N(#nX5*BOW+J`SoWqSsTp#18ER8A zq&i$g8F+BJu;DXYm7Z^%wQ{L#vWxr3Lp? ze;U0Ak-O&sdLIujHUOrG`5HMWd)@9HZY=PXvG03{z*IRt(mvPP9hab*d7@tWfQQ|u zA+1E&0d#`5pF|T>2Qd}+C|W*zi_Q{jFmj$^gwiRC9$EW{${^N#dL+^`rwxktUVIoB zh;w7fWk0dnzcVeF@-$Iaocg)Uw3G9KemPH0CLNVa&E0wW(++R=CVE3 zEjH=Z=^t2TS8CztHk8=RQ)y>^f*$Vq1-G|?FlAwB8;AvI%OxtwyKv4ZJ%3`X|40vCb zSJ!mE10cEhuEJ@l)IgsK`^LcDaA&HT7TIIu~o0}JP(YO z8;o0w7q4nVc7?qCZmT=!Zw{*cj)S-ZGJTyc9Pn$5$K9xGM&}*cNTe`UxbCTfD{ca75IV{|+SD&f9P8&M1Z_k^J>lkhDlg9t6HL3bS;((@@*@+#F==-#7>DgA?%!C}y)drgMyT=d8tMF^;?5+wAxyhDkVVY;*-acU|f;XZ=0v?s2!%9>V^4Sa|vO z2RxBcp(ceE@U^TsQYP?DC;d`$kUi%vjCI|afQ$L%x@os=#O@bFb1biDI&ThPx9e^GDeFYM?0L6rNa@<@ zRQP6Y6aG+978c3U>{&A!C_@8at5>+0@szk<;8rGp?ICm1N7aOpyQv*7f=8LU`@7;> zg%?fz_NW4EJWLpU?k%SAyE87UO6l(!U;3RM24oEfX8mc}PYA*mZ}%fF3oFWY^!&y` z_S|X*?|ao@mcNl{^ev3?Ml!j*%-!;MOopnyh=Mav4W$)NKO*9URO$CIU<7n{@&&HU z%+hK8XZS%qM!d{1Dz@TW^s@Ne+3PJBW{mxHult0K)_BI^;zRwUOl21NRe2@B>h$gE zF^X0FjQ=UT6xZJg2~ILc48@BTSL>U%FIr4^_^3^tIX5XCt7!Jhf3+IIg`cTEn!(z( z_shKNC}(oizdU`xw`=0V-7`Xi;+NMw2)lWq?X~VsC#VNZoIk(*B+-$fpyOa}TD7ug zSHC}ENGT#5gLpx4=Z8qH?xNjettadcOl5+oew&x(r^h*B(1*DWW7fS2v9rs(XGNLs z19rdQVeH}_-B{jpHeFpq>bKL1vR^ZYL#fcw+E|(|6Z*F};R&3;>2Z2pD}oyM%`;o# zV2d=njuY*KQGd(z&TsPp0l<0SGw%%|e^ZPdJV^cY;#ZF35JIE6stV`5ERUs+R=5@v z>F<&eL5Bqk+7g09m1W^U4O#Gpo;&@QMQahcfGa*bKku588&KAGY6Y zBM(4>Dm%_UaykAI*}ub`bsT4%8yCnirSkuEzFsYxg3iBaw!6mUXApXKgwvr6SumOG zR6>nmaUpFv2rVcP97UL`|Ha-_bkMvEk85hS-SlDAlK4r*hb3-h(Ix+~+E?zzvshTW z1Ky4O8~Oy~I&-A?XSS1*P<+Z)uPH`*{O>+>VK~fP1@99&Z) z7hcwRu%FVfSR6+wzH-vfC+sUY>wUvc=eLO;+}@N^Uyy7B5fya&{nh-6V9s4^ zj@w1r$p5NmR{ogwb4=8;_L=bEd#(>1_jkf2dRzfP(eyMT(vG8s0oLLpGM*h*lbX+> z4IR4HqFnsbyW!9FL&!Ow;8`;v4`NdA&3j76>J}}A-(8ee4!M_cqfu)1>1j`OX-*S^ z?U+b7tW|u3$kp2gAkdhM2H}SNXY!JS2G;RwKe}Q7Yyvl^wGT^l;89M;KN|6 zZ2KEApZZBd*X`0qH$4fv{76=?is_=>3)FObC zy#?4bg9{kpOpg`2Huj;60Uk9R&*K!vuhtha@0u3mjOPx*v6mYb1J|2o4Lh!f&4(J# zz+v}X+$)i~J?Hz>$_io)!%88>mQ8n4WO&(rSnw8f)Ppt`=Zj^dU+HZ+Hz|{jVqt6@ zAMi6+F=a9+rp3ih4Y(8?_ME=dI$U~9d@`2T@yWtU;61(kJd7gr)`T8H%{Agc{3KJ6 z@W4|#A{KJlhz`%FxlP3At+m>`T^rA7WHL|oT7mNrZeM<$Y;@z7*CMUg`}W-LC=!(z zL;Ma3wF?;KcYD&|OJg@6QIEwp4^sodlWx zYCWM#dAO7bvC;%t58AnB?x(zT<~P##6L@o57sZ z7GQM7$E#HM`13!*M(^?TdZ2>^VOE4LV%YQRC%g38w>uvF+80+^EqH^iX7;m?^O>7Z zwy;MeLr42V9KZ;Xh!qoD$v7pZiat5=(DC)g-~QO;T^=ToOy+RjbiVvUY-_hAkzDWA zmAK;vuFEFBdBd#hu;9Rnx?Uu^Ak9^4`&y59&{Z2{R;QAH^T^qL)!|3H_c?J#&)BA1 zDHVTwzu#fY@ek({`mwS4@mgq<%h3jORc3RCBAc)axQ~e1C7i9WB?vY zAc;o>QJ1q7drp*o0_%H#xt#rB$~-%NY1@moc}j2}rON{1eK)=EJ*;z?57_0Ab{`5C zS2({p=Zt6BqfttCJ|UmJ8C%ZKaU{(%p%RRLHdtnQw13Yap)>5AYWTF)hI-#TwkO>a zMzTpnE*leeM5}SX4ImaSscgMfGd0pS6mDB+#L~2(~(*DKcO4IttlO(em)E3sew}P2fqIEo&TjE_O8hDGt(K#QfRED+wMo-+$drk>Z4|1 zoJ2j8M=-cEyP`HCbauH)23+w1`m?+D7o^A~8(B83(&>xsBRGB0uUNc^uu}bfnkVkq zmo<{Urn%tZ#IjJA@Ey}V;>0;#6R2rVd?~(0tp8#mTIxICi?XWKVgrgJ3m9$6fM%W9 zx5t&6X!~-FxA&(@DMRd$L#wtvuv|n6ibA3AxcACK=*Va?J>RFact-j3C4pYg5FG{!(ko6%hi4O_n(MyL;76&~}xw-1tfUw%#-9$zR1x zWG(vZRvN`8pJI$PzeV97CUDAzXy`qCb#ANp)e@zZS&KUn%dS{?Zaxy7D&gZD)erw% zzRjemcO=_68BxhtPCGX8k})>x+lL}6bhbi)eKD3XdSHDTV)#mr!{79} zC_^ln5}F?}bBg6|klT+>likd4G^94(6GLL(uFZyHy~PIL zZUsjAjaSMEy)vZCo32j?7#ZOSL=L2Etl;fQJA_ogr_a%;MHFmxIr=qi+Q6y zskCW>i})nd{c=PWT?}8JuTU&FB z8Vk+RVpVdPS>A(JPz{SCbdwv)ugp_mB+XVv(Zn5j`)tg0x?FmjGszUO;m$jNMGCJ0 zU&G35W*K> zVsI@f;#i8wwpjA&mr}$D#`C)Z=~?6P7-=gZ19m)*2}EeRc=@M!%4ekB>LW51?eD+V z?+nSv#vrU8#tjRDdC=}n+r1B~^w{ca!xgTklH~^o%cIJPC=VVmjP`C+Pit6Hhv#fI zOL-fY^p0q*RLZBwhRN>T-pkfJu1T@SHMuN!5vBO`4T8nazZ_~3{;8Y!y6x>sof6m+ z<%t?V|tTtGrhf<4&LB!GFJ-E{WYPO!kZQiCs+{!dR?0f2}x!>1$W zfb~fB$;cUIc-E>g?O$eii!g&nmcuAQX8nqR;yCfa(7MoH6GzC$TL4Vq)=4CroRYj< z{4ULRDK#q2lg2c4LKnW-l)PoTUPoz62j~1eBG75LH#e!D^e$#MPqSz$51IvU^)lQY z3wV-kAc!nj5$imev95EemE@Zm8a*A0$AjsnNpXaFtf9#(#yTW(5nFW4({Mc#pz&-t zu~)Nr1a&!2Ro@Q(Z*~Q-6cFz9@1^h`RoWX9 z0pmnBG`!#b*Af5ndDK!xH|Mw?)!)C5KuQUOpNthu%Kq|S=X^?u@a#ULP3H=K2cXAZ zzd~OkBF_K0A-ZG_H1U>M6a)?nwkDGkDBLux^@&pH@ccJ1mA z8=-s&0x_0022mP67PqPPZxI*rC)#5_gqZ*x!EIwRO_#NVD)1%qv`jXf==-lU z{2}5Kld==b`Xk3P&*&01Bou zo4TKQS}F2ncm`cza;6K@I{G>?IykydBPiO=lA!65cQ^UrxWInWX(2x;G8v{3p*~XY zR#-Gt=KamTLP(yl;TnBeVTB^h${OWm*RT6rGM^=d2-z%aeim!wG+EG*%^Re(N9v{r zOwJH6DTrp8*t2xMGT_j0hDSo0(0=zNP}2X5&Oie74l5FrPF~LGo!H1BTU5j*tY^)V zqYPy7SLEp8PE*<7Pft7HP7_@PxBe`5H7t?E-NkjIOd5$_om*6LvZW7)3qS=toP2G1 zI5Q<MRvPBBIVnn}kCM5J)$gkqWweydb{7A+~bNGn| z%K>JU5Bk@#DR`mLC?>DzWQ6y!*m0@x+>4unP>E&4#Ng1YVo)c=+CgQG7(oxUx&qgR znWu$zX{GzFgCrs;SXBL`aKBTO#)(X!+m<9Mg82o9^;K3bc_?F?s_|}0PX&M&^Y35H ziHHqEXp*b z87sYLxrnw^A?Lbdvp^b5q8C`QLQ;S}_XELd^L%5Eti zY`Y*|S9g;^;l|8sW?hsC6^PAdM5*yZar0w%#*ih9h_=i@p95-B-peo3g}~uP3cp!~ z??PPfCk;}FO0aja&l|f*iLt5oo%|jY(qA8ix@vA)quo?@A8pbeHzY8G^_<0ac~y4L zy*j6mg2anOL}7P|4OsD~Hb=4Uh4OR?5uNA1jaR1hGn1JATw>gyWMe*kKbVM0m4S9D zfbi}5Cy7g75lmTv~2!0xmSP8DF3iFpMwEgxt zo2t5Qsz`Y;?L;vHkp1U}MEj(6rleZE=t=`BF-zpOXrvgMsbiy*N^@q$k$UorS_%Az ztU7(t_EN`)y3fI^uMJt^ANa#%kf)M62^FbzQIpH8Xvs(9u}vo)o+fdf2dg$y_a)o8b#qKfVTrHEUGQM}0mM~oOmu_YswtwVb|YUgc?{3(>lBPqRrnPSM-!OEoTV%N+kRl?*gY5AzKWH05S7oXZHhI<8vGL4*o1nRvo zQ7PxESQ%QKPi7dv_iCi#Dl7Z2L0GezAi{|ZoUH)qer2G!ZZxxOOlp%{$U)`zq>X6z zSr+M{!Pl}ra-(uGfW-Ca!)rBS2OYuo@?Bv0m z(sLNAxw)j~-kaI(5eHZMaH{G_nL*7zyGHTorpu%=u2Ssr{ZWp@(kH}QYt%yeBNlM4{gZBP;0dpdtVf$3X8|h zQ5oB`goWS=cks)XNtNLUK#Bl20Ah#a&1C;~A9{hP0*$^U^ zf5-@;-HA=+OJ+;rINq_?EX1naIO@%c{!CTih5g9dJ|RVF6f=hDujof~z`jxqW8NmZ ziro>^G;%eJ=a<>#b=Rs5WyFOv=LE0Jj3o3e0|tP5Z&fJivHx0UgC$rS1i)6!qZ8Qa zXLL??1$J;Q<{LLNREaSINQ``sZyuJti9mazxD1klLbQaYFvU2{%A@O3_=ts-wi0lAo!p%S`Q*~sqfa(Sfcv6=i2Jy+bGk>UPo=e{K zc!zNR)7!CWQb`_*8S>QfRV2kC?xu{>x_?dnR9Hi=wG*jPm4nEh&yOi_f9nqtg$a>+ zB9As|NV3-bM>XFy8DnPI2`v40@s2grm4t{mNKW{wc>GLtA`)Cr7YA>xe$E=7w=6Pk z5o3wI3q>I?rwHdio%I%oD3dWYxDaTbJ^RdGVIy0xqaIU@ogh>rjl4wqsMyjG*wHs8+#Gx#AJZOuQMUv9mbmN|E*WXmkEtBXIxETza8oDTbe z^$}9rl5s|&+OFkQSU~{Ia&Tv}c*N$lc)zO|9+?LT@@%Z)0RG9Fe?#fN0JZfabk5i` z@TB=K?lwUdL__3D&<1+`1J4mB>5uS!Ly(9v{J+3E^_>!;xyxr~&VO_oxbcYZy+%T0 z{y`4t;=qXJnO_Zt{#jn)^$1I|T~^}%%ML!Fts&GGK6(Zs?*D*(gevjBd6=XIw)STY zWjs|s@1|;nHzqo=DR{o_`|kvK&HP0!($E|os9v9eHr{*N(*4pz7@impf15719 zD!VINB%32^=!6tje6RDpa>9^Lm7-(LFl)Ma8FgYrQ?rg&$6}@Z9f6y{1d8|D!1(lu zVoILsRU03%;b^z(HM%r7gcKT(uUSg0Y&D{K)MkNkh8l*ZdAgfR_X~u0mT+{1J%NX! zzL>^J7c3Gc0fVYp?UR*wR`LW`kvX<0z0%RI|2;y6hlrk*$l_yb0ZTzh_{Q^Pqh;f@ zdY&;^f-*f9$sTIh!F+hd!hub@OF_cL$v@Oe{?wcXfO<{{-Xhiuf(jx;J5S{<(#1>( zo2}vluCUPKgR?9zdPvhf3fnJ7zBs~^LU~{OIv>(aEY#Si3pCaGgG&2&Fk;Y~Z4=P{ z8@a=?MIfQ*Vz z?3H)^(NRuk85`+=!Ejw-AnaJ&SoEMusf-)2G?;rQF{!(E)h>ym;Bg7Do48~+GN65G^?G2WgdCtSNf#ghfN z(<0N$rfS3&yi>AMDpL|ufui|k<&`MY{AY(?I{BVm93{rTF}UMYiL zo21{=fE;cHzUd}Y=Uij0YnWu3O~Hy<{ketaC;HtX~LoGBZ)fvREgM0HCl*# zU9#H(iZ}j~Y$zJchYO|A?A*e;rj>?z%QW4if51$ph z{d=nSnmg0Ik|ZgJHGn>)9A~MU>!3^?GfLtGxmwEX%+P$Cq$;&d>o@BzvERr?E-gUh z2jt9d4&f)YAu+&NL6~%=gU&eHCc}vGgsRrf99m?|0ZyU~3A2k|-4HpC%7|`pQtzS~ z5bH;Glk+*EBcN;^y+zd_+U!yu)@(jI4-`8AGm+^kk~SZbKw;*)EcDAh_WcBnRMN9%$c#& z`F#!k8SP&L?gLE>L*Cc>!CL!i%z-omzW-q!qGF2BVQ!p`x{rrgOa@eKL<->1)=sY$ ziLOmWOC5pg@kCCV;UO2r5U`|A5X_qz-+3|QX?Pyt?kg!B1ejBzeTIful5bqwAX+%BI zmo#gy!&728V$QE3b|`CDBg&!nRCN%>E`;QLI6hY%M>rJ$as7NKFa|!(vsWCudK4*e zc&lilFGrrk;N}|e>)TWf`m{``0^JCOsrrn8A}2adt{!qjDRSfk6m|of$EDiNp@~kS z1SiMU#FQsw)c`bhztwGAgFL|>>_r9v067nNmWaH3R)o%{cKc2Jfs{)f_^r#I9Sdw7 z*3ly_=fu+KY@yL=QY|pPObuFcfc<$Iav?7gaN6%H8X^u}mD8m@!6c2Z1o z6%~Ly!00ViQMj{?L5ncMNb@TjDrh^k9kriiZq(UWbr#JI7o>^|*4Nmla+!~RWj;=y z?9ryDZb5{A={?t{4n3nOHKFQ5WKNXzW)yRPPLuKYpP%?1*s7(eGl90_kk`|@E=+Dd zddFBH?0}(+ljvu_-cZfj^M=VkHkGs-9+g>MHp;izLw|Ht#MxKOv*IE`!gvtnN^*wP zH{40uvT%AP$sWijuF!(C02ZOX4}=gi(k4N!h!6K{CyC)7St3lGg8I*ipvR%qnsHL+ zR|+Aq1Oa?tI`2l;xA7CJbhz8A95@}TLP{C$l<8i^C=78P@}9|S$V!mU%_Is+9z82q zRAXdl4HuJM!Nc915X6Oc)l47cf-D8JRYP|>Jh8q2XE}Br-ei2gM8(}5a8A1=;&#ei zNrf6jV2{Qd@C`TRZOJ;d2<#Ba3bg9Dx7!KUcEBFwK~v&Z;u^B7YHus^$^w!ERD73e zrH;$)9tO1>TQ6{b=vLK>1(Kw94#1dj592Npb^0Ets~tUh9G5!HR)jT3IwFS@E)!Sx zG)w^Ux-!nTZyNS#=Vn#pmeQ=E7c~frMJ#fS%87V->yo?AUfGT23HUzHnJ(ZfCC*3* zs+%=@=Hl4uGgDf>bkmr=qWgu1=GYdd>7v+^Fivg-1)w(l`NjzwB9Akt!ClwjZ6a@ zjO@i9`@yL>6vr|hjepWI%gSm`9L>m6f;qXP?`i?tuh4)`(1Axw47lju!&vF0$R1Q~ z!(iWWlZzDe`{z3DdF`BC3%Ad>oN|rj#}UlP&|6neo5{-SGO6?s!Oy@5ewNO(G4{yM ze9?R88MWnv0S$8Sd@-02u0vI9GDGBeT|54H%v93k!gv7fxPd^Ic#8&xf<-*+}-Jw-MxDG zwHteb7Z-c6A9rM*cpI0|CZ#Yul3*3Zo>dV%6W%1%d7KDh*1ef_8K)!RAGvrAi-onm z@XwKM|L$1VjSXcVZ+(SM6t|=>z96Zcbo>WUD!_q|V}J7wKc~0%`A$vGG0LlnhAv!p zAuA&%Bk%h`DknpZNPh2jiGh{6T5M&eCK;jmm&yrLh`)XVjMg(2GMbZUR`NDvYKJ88 zW{ho^gkR?sL>KI(7n&>XIn^JNp@rSN0AZnRX7V!cjUW_{Zo!%No!_CMvI2EFM}k6m ztSx^AQ6|>!kA*=6JQ^ysJ{Wc(7J>+``K1OM!OUNo#3Qu8H6~xckv8dRURq#I!bnla z2SV))_w+y0Q2ZG|7MXugQSY@3Ucj;ZR^DBI4&vtl|Kt@m7^u)bVHzgYEb;$t zqc3GuE0tuHWuRNqGbgMMPeTFz{AK`%sHb;nd`VF@U9INM`uXg0NS+;g!N@BIo~fjn z`jIKS(y{aT=KAIa$&BE|rgJN(D%cZ|B%-^UXo@eqsN4~G2<2Hmjcd7z^|HwdVaZcF z`i>>29(`!OLhce4R!;U8?~LYl*st(g`sjZ@)5B6PW_98yZ^!!XYnTDmY)4z+SEEsn zP({x7IA++${0w1>olDl0AXuB*LEDk9&j{VG0TfS`K}%#hmC$$RKrmO2Z{I)o-v7@? zPh^#_iP2?`A{Pn(i6| z>ltmZEBN_d*vlOr0gSqJn#dhGs1oddI#zA9rV{*j74dxN$qTzMz?3{ev>@|aL^ z3w;k(?(Zt)eR9}6m0mpm?-muW1dJzT0EZssX~`9K&sqJhNx40RtD{#86xn@!T^n6X zc>w8e_vz1)I%nRCOQ$m~zM){+XUt*_QQgo9=Ngp<=TAN+IuZGc1SFejMOX!{%_M>o zqIIceFjY$r$LZYE*kmR2E8*_01i`jpTCgP8Dz1CHtL2;XZ4AG~x7FOb15?^`XImFy zpxi`5s*YS9AYCl))fmnu(1@-hkX88W1C+t%sDg)h8ZV zGZseyrO!_>NghOSdoQOFrxH>xUWVTYlUs-RTgv;$PR>XYh-uDAn^B_5>SW(wY$jJr zF3T{r$KgYY-wv&)Zkuk~GWv)Nn$*N7#@DoLclYN-R~h1eQ%+R^LsDvb8)H*a2Nkwa z(0cjstVxl5Z*f1Szqdej{xv&3=qKrFB;-$c3a0%qK*bcIs1_{3)=f4p0AgF9u+XSNG%=Ih6t$_k4b-PEudeHvGRt%F7HB{AMU zxA8F~xk@wNI%89ey)S=Lo4m^@A5-b_H;+;}CydSxEC%Sb4y$&~JoD#7D@x%K@E)QF z%?FZ4Xw_{qz0a7SzO4yX1o!Q3-Ph=OXL4py_jo!LbB+KUS6Yqf#wf8_p;atF7)cwK z#Mt=|$cJM`$!ZB&*eP1j4eYcXy{kecm}^0s1jDI&VdoS zUx&Of=o5HB!k`NLS%r`_EV*%%P1&__Hmn4laYtJ-8MkmA#k3KjYSdD;GwsCA%ZZ3q zURac)nbTAa9Dw6NyjMyz#lJ@)~tu% z$rX1~jZT*#9~WFV(<6KGUFr=>DN#1m^KN7TfIW>pTGPAv&mSmNm)gP>OZ%|mRW6%& z?tMbK^4Doc&T{ev3X6vyJu9j7Z}&4E#cQ2TlJ((>VO%9t8~kQKr=y-$f~Wa!D69oH zRNcqv&D0gqTo{%pMcP52v1c`bBEa`qnwSpOJHrT|nWqIj4Y8e~1>*9KYewOVG)IL&d?^VBi6$KK zb84LkPSj-TI~@nIQABoW*r@V3E!I_GkJD-8WIHW$axzmUG7|SQ{fVy68SNOvV3FxT z`2KJA5`mek;MbScS&snIm7)+V#o`sx-gG9(2 zN5Tq71lo0d*fJbj>tFmHnn@2sYQbaSVzGP#{Eb@=v>|Ot(xD8#yj%o`)vv|UN5*vj zoP-2UQA{y|`SP*6hg9ixX0CDZraIvCzUEj9aB`)=WV24*#*ku&L(SevjsNre%{Lxh zZ-rHiciQqjz237VWI7sXR=7R|tf}x}OC$a5QU3B2YX1d$qUd4R4D^yf+1i34D@0sDNGq@n_VgN zBK7e1+T(Y{C6O z+8~_m6kInBa$rw3G4YGWH)p#f&`{Iisl1$Bd>8~lM ztLOP*dDP7=_V{skUn|Y9;l>Plt4%QslHD9e1BG64S_1y)KqPvOYiEQ4$I z?R5_}m2}Mw=>*V?Ez{yk2L#bhZH~cu)!c`*YShouKRPYX7QaikiN%`%R6%6+Z4K`$V>X=tILv?%P7ce$$HkaT{f;3zY z2LDReVw7dYf|Go3K57AfzD<5g0ho5=j+k*IJQoqIu#pWizikk!jxiwUM?3~cud;xu z5a)1Yh&@)pq0$LM5HJ#aU5wvr#tRsLxFy3}q@IM){fhd_cMF)u$~{&MWPX_XHXiYA zxb)OwyIQA6r^`{TlTZ@{l`jS!mzh1bNgAr>b#<0Sj;uQDX&_rBKY0cgP3EBEDL!lKiex!MtxWcPW{sjAVP6yJmjudoBTqE2qReAXpnY(Is9;wqIyLnxFEi3QF zZ?qSGu&uIkXyR51hPVD5= zS3T0j6}FA$asOC*k*wzAZd|KmOAXbwGw#W`OiITQE`X4}D(*T{&5PHZR5(A(FNru~ zTB!1PbC&*&-S&b#{e&0x1QDu)aYdB==x_V>BM5{EvS{Fsu=MVR#NED`fB!828ux9Z z1V}@^U)iBk@j$M>evx2B69PH!0Ca7P47=H1-f73 zkGZKu?(5*t7~8+fn80YQCH+ULe#|o+JhJ8pz-*25L|o|?Qr|)6KRC`;$m3V0lZ*8K zhx8Q^qPh+1(;nMn^61~kM*yPw%g(Er_n*15=Z`Mu|0`XxPDzPJdmIpw)rQ&ps}8W9 z=7(QZ04tfRemy-R3wW-e-`0c56IiO}hfCSy2~+9zXrOmZ!uD;iaBW2XDc zQyN*JT>{}F2`-}lvZ6IhV9UhfRVYlj z4dWhjgo=k7D8jY=@%?iInjyC^|SH^Kf^@E^1AHtZ7n=OjAs!2)u~RW|tNh z=m{_r0~STJ>raSnJ9oJL1xSL~t^ zNIPea4W>E3(uHSp$(fb?5tGorCpe{pO`|Ro$PfqJe{$I$RlklM!`XL~u z+wA?4J1$>kH|H@Oz@Skzj0Y0e{Xz2n3}yk!^d|3$0Sk=iyckVHBu7DyTt}&4H{y&| zWu;Y9uU7h_YZ+A;f0-hNd>NyFZY01dldH@^Ji%;H*w6`cGYLNNW=#OnE=g+0R-ODR z*3JG&&vsDFjfWdkHejxImO*_-&o!0zT~w%=u>d@Wp%34&!ZSFHfJ_7LM+>0$+kmq90fx>9VVPY z*o%dQ7z3eL`S#Cq4$tQVvk=yTB5)YjE_+a#5iI9yEJVw&ehCpX8$@gfyD(%ta#xAu z%S44`bp!&-f-nf26Y_I99ajm}MKX2BJ9uGSD5`WVVp(~zvNGy#N*^ahWc>lYKDErRp{dvzsHV>&Pyc!PsYf z{3^e~*XVakypdTg;_@=GHvnz<2ju}$ugW44DyCMR=*rAcloVN<0uUt{F`~Hbs8D>W zx&@9g?fqAqP?VV;v(LC68txhE01tOPEoK#kzuv$>MGjqXFK%pt04o!y2 z?a$mg@MAzi3T(BRdeAtR-1fkA_toc^--Xu}%uR9BIGR`%Ko689KI^DEY3LkAL@)Mt z$lu6`C$hy=LGxW`VgH)G|CIUlorkK;+RE+G7b!-cY6h?yWn_Ij>)H11n+VvH{da-S zUG7ocGIx_Y?u*8p<$YwKYM~uJ$#cLp!kLGR6iS}id>X!dH%)3jaycnq;N7_Gj6H=) zO(z|71EvAC7<0xXMdkfyLyK$hUAwMG44*G__a7~fo$y}o?(;mTgjCNI>4yAfY_rd_ z^i331!4HtwuBW+w-soj=B3mjO0Cu$A#*pz8{yyW0npy9Onlb$N{j?t_$7Q-Io!dfu zLAwk+vn0cF{||9*9TjEw#g8JQfFd9vA>EC1H%JfNC0$YigS3c%bPgymgfP+|-60A{ z%Ybx9Gg8tG_ZiUd>u=q=*1iATwOFp@f#*E?>~qfEpPf(EbLkyGsO?K7RI_eCd3W!J zlZz2|F8vR*a%_0o+Z zjmAhp&}9)uo_k{{@gaHo++1g36w0=>DEs*6Ah zcKQ$~LJgviyE|?+ESGS>SwT%76wkk*^SDc+NldHn6E#xmzcZb^7gs+Uh-tscLyXAN(qCNo?K29_FIMaV8{nDF$z8Aso280}m( z<_Hvp$R~db>iIF48|UR<-k}X_Q9_P7l96)k3FR)=qwM#1-g(MtN~u7$4Jd%xe&Com1k&qDX(+F-O)>!WSF6mkzdqHNF7*hm91NDyz@&j@QBP0Q*iPmPl zrX@y_kc;q-D=p* zEdJC&{sl7q?;+S$V)KMXc-X&ifQ5T_ICAw&<3R4?3A`=Fq8Vq-zy?)a%*&=6D zp?(;{OYwO&LY*;v9>`EOzc)cpuWoTEuf(c6rJU3%+2aTs^`HDmJ}?o4cZBmU!RytV z5{`JAm^1~m6sFj^t7ZF~cR_`OTk=(rAVykC$bdxP?R^}4{$N2I!J{fUQ`KU?lt?^$ zJfiS~q3TCCkPTM)yQ>e?rH^Qn{{qU)Md=<)lGViop1V)`YqrCL| zSngU_+?~WbTt+KXa@KYE-m7GtB0j$=BF%Y%IPmjb{XOda1=J}~2?vZ|e2xuIJA^9~ zZ9D{}m*$N_@V^Pmo4)09G{;=*7d$bF|3(bzvDO@+wh}W@zGq2p>C~GnB#zBe>>;XE zWyL#qT<~o&mHf6_`u9)zQ}dZXiz^G<3c^1-96Y=d6+%Q20RjkSU9 z6U0fsD;zfQWi`%D$PLSI>~)F8f=D98=BezRF&kwX8gS3{L*&fgQ~~jrU&ZCUpBpb> zy}Yl2duVt-7REpl)8imO`^=nrd@*4J^7TBe`|mNjxl<@~F>&O!L&cg152j5~B!lJ~)Xu(wd{^R_oOm{Q}y-1aYpzkIBCazCAQJ{K?yBGjLMpsfD8*|U3=2Z^@m(d*} z_l2C}m=)X;KVjRV1W9OFEQ|awVqr%>a-2YS&+D-&SJ5c>S2Nyy$K2bwUi9KUKiyu> zDXl7ByBvV_NYuYmL)%N8>G=lV_Qo!)k+}=y`QeK@phnZiUD##2H8sQS?r7icxE< z`U!@(f#dQ{?GqoIW=8zqi)dme8$Qner4N3W0kQ`-vl&N+|0^90%1{A}9X0XBjGLLR zD=_!fQ3Duid_fzVVoR(14PhC+n+P=LZ3O=Q`Hhj(00JHwBhj5{KM8Ycu>S!u{`{Z$ zq>=>%s!r|X=l>}LdM8nc=QP6)AA}djWntTnxe={7vvmld^d*1H~{#S+6hBgT>!|5-P zk~nm25(g);!X+UqU?t%d??U1|1qiv0+=(vh&yb9ofa0P<(j}`V2rt3c%eD`IzGCo)Z`o*;*KKBZbkc99t~9x z4_H;*8xuOH!CU`RX|q~|4VC1I1Bjy7X|q}?Qwc|izud!4m06$${=cXUuX2OYq;Ah7 zi;^*CGrfjQLRAt$O(RVek0dD_*CRnP#?QAxqa-K}Y+^fy-jXB&H~jE@ZwLIf=4cOebzp#Vs%4SkyCuf&9p(Hkl^%Sc zJqJ*X%-{;dT@1XGNn%Ufuo&OscDW_Jp(SxVSdLg6iO*mQ*@R(u3py>yd$$wPZI~ls zq&J3Bwpo-&(m$1J5A@!Xa0PveTZV8tn;`VpBZfB;TJ2|X4% z|K!TV*nMuq31Jrq!W4`zE*PC?6IG=+l{=X5WYWBtOhV02r5yq5BM7hm66X;mGZgXE zY$RJXU2Bq2^-TXaHD%105?NO^RJol6xt6k$gGb)%K%Ut*tb1RUrb7+wr`h>NQB>@1 zTn2V1LEHieR~m!F+Qjzy&qLHMZwbILG}oaH*8~cP>&hp3I(QPI7~j{`FiAwA4x1QpZkC?8<-S=(l;ocPK$z|Yx z!ck$}l~X&YViiAj3xOaY8s_@cU!Y%b{;-o+;MA6&RmxEL7iYQ5Y21Dt zL8luQ1NnrG@T%?6697Kz3+>x`SOpMEiT>=8BR^qU5n+#_?7%b8w|@4+R_KJ?(bE2X4W@@ozO$Co-f@e+SV3QG{9Q(HZTc3#Ao5=6^Cpn*P!RY%R+ zy2(14>12gjoaGAA+6rfsL@x*?j?@@=A72d});(tKIEt zObOvn5tLt5UZj7n4FnTR^vd?E;2WiuUdrAB?%AKrRI+#gj)6l{*KdxR2hbNuPA$nb zsWh2EVPm75Cqjl-YrzsRr?5e_@TNTSSlM0-NGmpZno1)#F+YJSxAPeC@f-vs4k6%| zc%gey!xK6pdJi_JxkfV;_r1d33{eginzBXzDaH_XEGwWHc+%K&0rSnx$)$xs=ywGY zIg)lao?OW+e{#7w=?*3{8Rq`X64#jqukzW^h|>_1dRj0|~?W*a9%=dz6X zjK857#-R||=HCh1roP_n^@mJ*n~s2Ad}%nsrbB*B)Qn`Ek6I+KoVCENBLB@70&5EG z^9|nSBgEmu42gL*jm@Y~0 zZr|kr`(?&`!16C;577}GghI`#=J3n`^iAKHiOoI0g$(0j)q|eR6eG2yhiuk;0R;UI z2Fj~NLuzTdAw??WiLA;I8?1N4mpV93DUg2o@8>NUDaLKbRZkx~AK(*UP7vZ5nxuj!@szi7!C zaE7zr?h3oXWjFt(B7wzGXY!lw(U^=3zk-p{WSi6ATui*<9T+uAB}3h zg^G+exOk*CLbyb`DsQk1G9dS*%Ym_G{@YCUAL>0K+CΝ$+{AwPe2rs-jm*zFRE& zoSqwQc<_Sj+y!bxj;_NJNVa@eMD;3|T?37J^5Fv+J3z01MNk2w6>^7F6`v7B|9WJf z%&R(SdQU{^VT=<6$vp|i8KzCzo%kJXdl3X10u!NuU}6?$Fq3hc9M=~%`jGjAOyZXW zYkwSe%7=^(-4XC^yiLs&7JE$QV)zczn8rKX^AQt;23ZR%7sb9X`CILsOzol=;>z%gD`%4=Lztob-BzdSOx1h!6pN^NY=GxloHE`|?`(BtP+Ksv&1;Y@?G36DsgUt}wJ2z?s(1uL_!C;t(#I?|^|P`wX*^oB zbz_dvx+}jYuUX~Rus}S(+EpO)&F#5=IB$b-`+FL3gZ4*pYyS^U+j|3K{$JjfKVg6` z(qVOF0ET+#Z6#6@>Ulr>K=t;+q}Ri+h%&}aIt`l$G?AAHo%4N;kjZCkRpCGV3>czi zV&{uH&jlGZ?%tjM2DS+(ob}V4KecPQl|rGBu7&i<62E}9H5eez-r8Hq_i6hHg1vL3g#LN>5RLB z@pgfp_LaN=pV~h)6J&G#(iSeIA!Hvs#?fYt!tCOCE+qFYEyKoR>8lO-cSU>X30#;l z7xA+bfp1Qu7CR&EiQPz3HB%dsM!;tvdJCR=NBEOz&Pfoe*gL797Jq zL3V~HSE);ft`_q6YQ>&az{>C}WdTU=g^6o2`)aDK5iJDhkoi{*TMKzE`E3}*iV*we(}Aa#$@~W=7UBFntN_hpV(n*DZf*!dd8vW zMOB%+3JsE{d0*ux(UtUJi%ziJ=H$l`jn%P=R~!xzqEYf}N)wTC@3-?1F#Rw5!}DNUP#~5mh8_M7oo&-%{;xAKZ&+N5 z*Y^H1HpDd}VzYqvcGt>y-{%|xTU%a7!Bm4Z2~}B2I*9>-myFR@((?uH&)>GBKjqLp z*0N9EH$XgwpWqGraDgHDnF8?B$RUV)s0A+nepFX&(Zy=FVrav!@`Su<*Od(2$g~ zoVFp7SH)1i$I^SUO^Q9E*P_Q)+M+MLQZIL75l!h4R+n(`jxH{EG9_S{ zz5OFsjIQ&oj9-dJ!dDLZngngc%j{_nl9fum=Nky>XbxIOe}~*3WA^9Oxwia{Km`P>t8JO)=XNuf@eve5Sn) z!t{*ycivn#S-g!w0U9DZb<{)lN)_ zU|0CYkocPi}OC8vJpxJ%FccErl3h#A>dHn~K_6{`_ ze4eI$BzxWE%3P-7K%1^VODs#N#&o&-0lNh%1QHs!h1D+1)0#h}Z4LzN3px#Lp15&f zI(gDO`}VgXcNQ(GkTcx9fJ3D8vC@}Z2e$OKW*xwd26)08NYy9k>BUpcOn%W4PCc`J z&Y=-n(V>fY>2~tGA$4cbBQ|j~d8L~l9wOK{wb9a> z<*%h}R;&X$CJbx`g1JwA$SU`jrJj0nrY)Z4pAkEJ54tEKl`I~4L#;=%w20J|>XxXU zEsD0#HqzK8<$m_!$q8SO{EiKI;LeiTAQvTAjn!Z@yhOW3kE{%+dSwcoXx)B#U{FIj zOEUF8v{A_TJGJmrk&)t$+G#Pqm;tU7_Xb7k#X z`|3wHFfCfUgB?8)#FbV^rk+vvvTBwg08WbNS9WipO*F&rQsJi3A?WC*R$a=#YNoBS zn%BK)lC~JfZm$Q_-N~<~{4=pLJ%C}1b{gIP9UMSAR(N^ktGBRMKrtZ1S6yZ?Zqg0eULoHI0=AR5a&)&QwgQyFS&{^WsV z8%1+7O{kczKKr`x3RDDN+)r6j%+S-YT zo|x9@Nz5k9!3kfdPQgdnM_*X(7pM8Ua+O4?tV7=(gMB7%vsbFA+PqbGxhWevyT)D6 z(omNqG6m(kESmmTX;oe}9H4_RkTJp$6g-eI&|uW_l!a&1imZ(i07l+|1>+y^tU*-q zTS>QXlMC#2@=Nl*RQZHdJd z^D`8mZ10Oy4c{wdK;%~QSn>VKkJ5aOBMV8BjL2V0o@L znb7Ta4=3)aLLwan<4dUz*qvRcGP%nzSgr^?*@A{9g=IQ%3~MaI&!5|@{!a@>+nH@! zVsjyZiJ_ECZ$5wL-2|E|Bmn0zS=5PQ|H@@s^k;72mRP{UI0mH+C{2XLbEeL(5qPQ= zk)K7Q*~RgrHF#@?EZL-@!y%$)l}Gvcb{pif1RZ#f!^4)Go*V;l#x&bW{ET7hjt zp%cwst7lh!0q&>utA2w68-asjUXM_5UrBCZsIU_6C{u5m^sTUMIehHRRH6TXe;BKs zk)x)3^l{fzrGxx}J9lxrjgsY}kXOq`L0C#@s#0l#^&oB_d3sKpdFy#;Zk(hqj6`Fs=mXWH``KNN9ahMatu?i74a9&iNAz4`S4v2h-*ZUuCIKZed7L zC8BMhd~}+Rdn=NUs30~Ida5};t$n_SdlX|msjI#)RS@NorVW-EX)G91%v2IlxX2aP z%>J@EGt`m$0H&d-){UJ<{!=h4nDEH+J_OvPGhOTt!=p6#G~5{N5wo;DLwhpW(dW)? zQyFQUfF2Yhivl$9En#pELP4X7GeJ#zgyl39cj&Y^DD79T4GoGo%a>GMnNgqm)R{H^ zLnIMLIR&+?2^fv)-86Gw_TF-4u%^Syx1anxg%i1xwf+ z*o(f#rdlQOGM}_1=xh$1ky&?4P5)w~4I@#oqzT@dW=f?E9QPIQ_V1%;=jw2`1Zbr> zJOdsLKTGJBM?H)ekMkq@iBXU=F;7N&k+NiTq^8iCwDBq?xrZy$sCtu0jB&ysYA*HX zUJ01EOF2b31<3Ak0V=xr2j@Mz2kN{m{Ac$$5R7nz?#3ocrR8xnorb5o<7EjA%HU+P zxQv7a=Z!0tS@~VAAT4?V_ntt`{(0{T8_GZ1Kri?kDc*LMWHMx`TYm4`Uk;a&rnAL=R z^d@-*Tl76|B9I&~RV^4zJh0=ZqkgpCwh zLd-UTs_rC7s81^VV(c&TX7sM()!ya1Uy{e&_oL|(8Lr*@MSSp(zymJ#OG1}dAFHWR zfpdxi^dNg;bRjX(&S!jSaetYeu(?RuF<=v)&#x(KXZ?=-SbZ54Cv^X|)$nWhi4p7k z%tCTs7hV7!OP{tKpKWHrGY9vOC7CQ!jGtdedzE<9kW=E?aiWT&PZuj+VT>959K^``gTlj6^%d5aEZ~g~dJTG8I_gX6~;{V1_7?U&q zoo5<3&rAZ>vw!AE*AKXReRFT-dLfU($nT}x`HC8On{UQkElx=Oz6KciMt&=Oe43r) zcIY)B*M9{NWBdP|7|CU{hWzf=5P_UKXjJO#sDkA`XcQ%`m-3_)?QX%m!`@Wg-A#~s zrkS#s(b}06qZM*H`CIP+peG&kjc0j{G`PsZ=`P^mPLFZHibhB^Wr0>b7X-l1W~sM9CD%F_80TDNc~i zXBlG#O5Q2%$_4I)Rtj&g#2|XaP(1Er^|iVKH3R`d4SqhpmmN>OXa~+0h{2QwR_Tw? zH&_BWB(X~S`|o~K@V(lUtz=M=@t8=Rt=TD32URD?EDi!Km z)1p_BM_rX?;RIJiD=A!$Y1()fD6BAEKY^nla1lfa{j~>Sis08y?l7STKL{4>TBj4}sO2c{dL3>sFWfHeI+cYlB{vG#j00fK&#=K18WT&|D?WHl@6XTctt z-=k?fiL^KqdD&Y^tYyjPmvzHE4tn-5c}j{*(MyFzfwObJ?c1zQApgG)JE#D}biP8g zG@yy69oX;w6ui>wp*#6)MyXxv?t?tkND`?8kXofu8_OdA!|&inYhqyFi8b6Ydv=fa zY9VqkkrO6&$|kH9pknO0%pN{9>2q`_@z7yb`~>(i)o16K1m~Fs0kd$tzb`FRdWGIt z7?C*Op!lPD^762tE72sN$$*`cO@7I-1z=6(-m4Yt5q_T-JFK&+?^p6)>C0=e@oj2DCjD&OM1PE}G>;!M~ z+x1jNU*)bAFjA=2OuM=U4^UimE&b{p&^o!>-B84qTs-8JZMC||uPDm!N28zwP2xWT z63e6Nr6Cwiyqh6(!b#=InMM~|PtyeGY+HnBy*D8#qzh-;)t1}`l!KiP59YJaHZ!K( zdJp!N>RUW7ex%T?8VmE2Tn2BF_Lya!C~29^R#%@eeThPF@_xPesh2G#bTV%hOWv9} z&4W0x+5tY3#m4a9kE)d<#00*RCil)m#|Twqqe}ion|T+niB)+=ib{;c^0d|XQLS@N zWPH`_sb=}urQoAEXjjn8VhTE47t{|_ao8{yuA0#3+?>s93bG1mxt(zPY89s&)VMvP z?LV5ANrxlm>Y|m3@%QjmQv+7IQytv#0mk!B^ETZH%Iq_)bZs-7jpq@2f^i6Cj^@7I z?)sFucg4al`*Yn_2fB07w_RT7*M4_iFT)mW%05jOs>G+n|J2Ou+fL#$`@QM9D7q{r^4HAg^@#xTRMeR^{%!$(-=p4hw~Wl zw^~_GzRaI7YF(W?U!X-9S{ZncpF{seDtOH9#WNG{AHe6(URYdEk^BWml1>1`8cpVG zuEoC<81{U!D~{udd9lNI#D#8ZUK)qVOV<9|Z_xf+-A=UQek02^k2!HPA5OS! z#(gvQlaoEcPgfU3!3o=IFWX<=2P+>=o`mg<(BHa+v6S&D^JtCb#CNx4;P|;HDAoom?4Km_ybF=CB$3j& zaZ%L!GGnZ_>}uuRXCs$^=cj`vP@=VI?OLt8&K8Tc*BsDQF;Ghz>Rb~<7`C7a#SN0Y zz|+^&qGUah2v27?5m`I;2VZ719zS(|nYYh_c-HN?Fcx{P?>3ikRIgF{n4a)5b`=U) zJ=DtOfi1)A?z6tAtupGvm=C`<2wFZ-y+H3CJJxSdn3S$CVJ~5S5I>!=svQH(RcZlM z(Gv*i?Fx#`wsSGxM~Z0rC)+LyFN`#BP&XBxWyZIv29$48cQDB}0*GPTq0pge z@%i^Q@;|EAUw0#jj1$RrcdM47-%fYqId>QW!q+5K^)nC@z~6*OhsNE#aVuyxZ~>>-G=i06^c{F*+OkAZ#Oo36pg5oayC;rvB)v*az;=R>{93)+jjd;m0!pT zt5ooa=(CJ9tvjM0mcdK2hlK83v=(_K#l-O^Z@i_T>W|$lcnW^DY1KvkvzbuBtQ}bX z3Du`Izp8A^KUYl?b=EU#pwNq(6N|h!^_N}K!fXMQRVYvq@rYUYQv`#+v7AlUa7@v< z;2?JKraz)MRR4tYacH5N?MU}(E|6#f(*9h7?LVFLYn`TJu}*737C>+>FaY_8P5J?|ch1!P#|=O(6bIT336?lI z#CJ_3vlemN!Z!sSyH&0s!TVQ()gJ-(#7j2l`v^6h9d=Fc7-w8~K*BJN!NfU=&SVYX ztI26JW*J0Z^v9w$oDP~{0uXxThRefo0mxYDT4nZ*VF75tSkvH|8JM|OG;FmG4h$8u ze8&bTGj_(%&#EdOr^;l>;Qcs#W>3#O|NR(2 zVgCRTn?OE(1}lTLDsG(x<~HV1D(*xTeR$AVeWhL(yC1Y)wS-;`=hqGQqKnK-wS2=W|k0ll(i)T~Kb025n24pUR`huJB! za%(7!2?^)w;XpQc^Ljb;PIbHeIf| z5#C@-cs1T=5N6fy%WuQQ!xD|}iEAYU{~#vG5=`T=`_CCqnhGghmA2~!bsG}3?Oa$b zS?%Qt%JwUGwRSm&?Od2E!k2hTo9QTT9cv1kM(~5;OMwPR5A2h*?OJABR;x0Xp!#Nv zPP6d`9J@3LT{JoYb9LJkE99S>}*!q$;k36sK=qx_8jbd>Fb9Au2 z8Ky&NflGhaUU~nIooch^rB9a53^{AnSMNg%X3fbbrNxNq&v$4%#WiPX<#xMYqoA?S z0qBzY2~xlVGI<7{Y1+KRmkQ;Tq%ZIaC!Al#_{i}K+}}>#l(fIU=mRD&N>CwS>X(oz z2?Ngq|D!?kyWCHvIk7_8iFSlRcMN}|c3vCNe{iTBIue7UK$Vdg{Ew|)6$3!n_2Z!i zZq&<&$1$I@JEJE5{*Hhepb1SWv~mOGl=ayH(d$fHxLA?siqx;cIZOTUll#q#$#%b! zddgTd!YuxxKPvSEltv5sht?I+=bFC-_=F{P9J^{#!9l`%DWPRXQhCGLwUdkaKXh?Jk!oK^8_1u`5Lb1+pry53`aAu&((go6l z;cF45s83&j;{8-(3I9SC6jUAR??YY|Jl?*(yCx|$RWFH(zBi_NDI6(+FAoz&6E{3Q zI(@xr<4Q$X+eh)#xMCvy@Cui+ zE3^Qfu0ACHkU^Amu%2^cn#wHEnH4xA8LnWUH zXU~-7cPPy<9|s~~RgT1Pp?jGSaMC^KxH^nl->?k)>_VbmAaP>^oWcOK2_;e-+Gs1~ zXl6LE{b}8eUAC!UZ`mEQb_*~(GaRhY!MPqaR#Qg8t<;_OZ-h4TaM0h1!LP!521#=v zK8m0*|fkk3zMs>Hcf1bR)cf@>QC1q;<0v5-mlpTvEOOCfo zgqa;&tK}x|pOlr#uK+zc1q3v-=%q(GQu06H3<56nzOOzl2VaXfqujmgLs|%WwTPx8 zKUs)v8!{TnnjZfP@UCpy4X7*=K3(L<8|;|X($BAo#qz*D0W&$;Qx>{Q5U*r%ZtqX} zC}p7ATBzRsyUkfTXu=CX%J!=&_rGx#xYY6GbVo9^>T0cLN zDk#~QaLumPB$}ooWW0xzb;)SwjU|AE>JF@WEek-nmDQiX{B!X!i2*cDtR^fI&URR- z{?M$1F1qcn8B7ln2a*66f#f?c`QB&w*U)JX)7r(CcJQtF4sBT>6+VeL^ZE;?JLr zzUU~bdbhsU=&J~RvuS)L0~hJQQM#5RqIh+Z!7bdew{a}D3V*vHUX74_WWLl?AYxV1 zp}!Zj@o+7nnfk}YTR+x-tP!v2)`^ykKU8@oH@okoCIx&`{^Q@9eBZXU>F3XZ6QuhP zFv@g}9Y)7RK7tj7v7gryr4>b7uKRr8l}R<*D-jg=lt>?&M_-b+YjGIQ-OLkp+b9a# z8#f;Z^|D9eN~#g+k;-eb3niK3+J!QteG!3WDvIMR3$H8GP%2-F?%BpoMFakH`zZs= z+M`Mo@u$^I1Wx91Tgl)^66*c(qDn1q6N0PYB}YFt6F|gFlqQcR`+0TydxMr!yH<95 zK{+sT`lBJW3Mead68CJSgUmQj@7r#0d6?3X;-xVK?Ix>SfEdBixx9kBqPXEzZC4#Qi>qg-JY$XoBIHu ztXmiawBL>1ivR^|8*!)alD6+ncneEaoa|Y%B1F4mGALy)q$f=pxN$d77lV5LA)pgU^AV^T^?y<*~)Kk(;v>wbD*e%0AA?2V2jGMaIxxyo0k%wM|^r}Or2 z^gDm7(|o=lR*#^-kjR&-+wHh!d~dKjmH!8UyBo8HoG~7XWVCc_3lHpZ6m0r4H{(ym z87x?TQQzWv>I;gWPw)b*b}bwaYAg;)qf;_R#n{sObL95;Dg%Mi#KWZLZh19aKi1w% zxmEWkQ$TdREX4a9J;HfqcLzT8iy&&!NN<}`e{Vsaq7(_eFhHZKbE17DRNmV$aKlAHxB{QPJx=zS_GFvx_T$lYrrt%KKnBC8Z=6aUEc(`+wrAj1qpGov}Kofi^Nw4h;K zgBBCqHH{~|Au_^SS&s+=p!F&u^`$&a+EqEE&?;+bQ>@Xb_($y!E$d3@X?YwtzkYZC zBI@3P+w&0rM={m?pk!v^zhKJ;8!-=K41J4golWMWvo2Ji|) za*!0#I#~OS#?St4zJ~fjUn)QagcHwh@f2h6i~bo0 zp59X090|7}wzdQ=L@JOQ+(gGJRO&ymhfZwE~ zI5>eLIO9b#=H5hlGd>EIWWey~{>t>r1$R;88};_7k&w;wBec%i4!OFgY53!&dZM5T ze5*WnHN?|yTm8rj4j-W;-=iVy5ivL^oRVJT(~+{7a$w^0(UpRiBKQ;z%JP!EqTZzS zrxIf)r-nHd<83QY%oV})5-3Y6X1){iRvkF!mU-4DiRV2o?884kezV$;vag0Pw-@P_ zzYXU%z>M{Z@~EyV;+2dzeD-!D|F=*6=&{+6B0 z?S{wnVhSjIpG6YDx?u`KLRwY1o|@4uaE1hO@x1#^x2%taw;GMi%@xij#nGpV-oLPI z+`feGf(XjDdMYhV!aKyrv~x<~6bat4jJiX2jWo&-wAj>*%xfj%KANY`cU~yzwZoz8 znjcilcQGA!OjtTiC-j+7*_$Mk`1&>L1D$iYxj0%2fXLAGeK5lP&A!ga+7uUP--0({ zJY3K6+u25W#j$l<^A62tw1}o)X}$^%kJ<;?aqwZq^TNXPgJ1x2h)aW z?9EAdClDQW|4bD}0^GUx2KVmX1UQALKLvm~Cnn8Xvap*(&Z9is97S@(&*;b=B2^XRD?80OKoR{dWZdkqA(TTai5DLE{O_}7bc!WO@Tmwb`4?LnT0$+$K) zll5f&Ceu;QgVeir%b(Bt(TLHwmD|GR|19kWxGw<>w4RqHM}GdtoOp)e zsHc5}pVmlk+4j$vVEH-LHH-etgET?5lA~^2+demAKfmts{IOcDkEjT+=z1gWyT~}v zpJV$9zjyHFk8jw$g|s4^yNRFt59Yo?5_9{X7yrKyGnp)@W>V@hWtvbVm9+@iR+}#! zd!nwb`sg;Q2X@uP4+^23?s26e%Bj?z@f9|Vc9Lew;3_T*t5nHHbCU1xcK*D<@fN5= z`Es20GZ9AMrn~5NO4|xyitgNk%>sNMZ{nBSszB0+3MWqRH8>5ze){ayihGJjNRpiV z4qLP%JHj0^Sn?y_>9g_iCMH8#chmTFhCKR4q%R=deK(*0-$XNn}DYUm^twve)qR#FN5Y&R6%Y(UZ*+PC(#!8tZgKt0=lf)c)r z=GOH|Ie)`7!YXW3Hyqzr#WFEBv3-qqi*k1ZZ7TaQ5q$Y>Ml5-|o%6Qn_J>2P_hVtA zgA$Y25p}ae)U+|Sm2owR8!vjhZarUD0Ht}=Cxb1Ti`EYXV5DUq>&RCqm0>+v{l7#E z7cSz71b0?@%^BEQx_P*W?*d+H;LLl<{p9UGjDAg$nd2cEE2O%kmRrT6r+y)ku;n6y z;6~s9O2X4VU}N|Dx7Mo6lu)I>``%Z&$L~fsbB!LSf4~+`+;|cjmk}>|Wmg;WwhPzk zBh8}Bf=tM^LZ*^hnCOxD9@pCwuCm}07tV?97P-^LswAsD&P0aK zqMOZDe=X@z1OG9r%1BU4tJA$*7_5CoVV{CKpegv&P|;!F(esaOAoD6AYKyM&$-`XX zsy%z7CN2dE40Egv-d4JUXp(zoiXE0!#KZc!_G>~{3O-W1rB%?wK*hj#A0=3i&YfV% zO$_bb&+3gD-+8Y0XoNP%qLx0VQm|FD3BM6m)l`+{5xlR}jT6gfYv1+6#1nGA$ z^86FWQrP#JxQ`09ifM}cnbGVzv+D6g7$ZngS@I3ec}=-2DkYNz<8xnT@hZaJLJ}XM zBMpQrqBg+`*T27?KC}8@U|TC;^Qk=};-$SBv+_#VcOT_T{#Vs>A`?VP{HxWF3m)S& zj}Tvl;N06X8CQ}_IuwO?l3Dy{4I>|oeEzsqcpuhDbBQDfD6%T=5KCb}AbkgsY|ycH zS`$U&rF^i&*VB`>!fNia;4qKirLerx_)BVX`U{N+-RD?te&sP=Mk)MVDYt4rW$r-) zB!y*9L#+00Yk#4mOnIw2K5){}NwS`T-&rqGg$)^ia3p?spFLKg?Om*f@kc;Qf-q#! z+ssY$NzAB6OsJ9_WQZNt48PEanM4|UaV<}1^i`ZtmqpWUGu5tsOf1`U@%w}?rp{8O zxg2t^&SBDdMvJ5JYs;M-C$gOw>0Q%KlbrD%ul)=Z)K)9lmv$^z7B$CNAEqpqcQ7xR zN4XV~b36||k~eyy+`2ls;6qj4Y$7G0+u}%`(2rT1H`ewqu%NH zcm6OGP<3(CfEFY%#0tGp{nr$#Bb@@ff*L>w8>uLP`6a1j0CRw+iWuw8e`8sqz@y7< zA8xW`ZWLxHpfGLhxQOrFJa&y)yn#o17iHQ1K$vQbeQkOFq8|pGXNbZb&+cPl5>pVM zl~z3FiTQ!Yg2)~c=a!cy=j@EW$VV^OGk{|iP^35J@eUTzVWH|Us^hq170TquTu+w| zM6(KKlnrY?=#p3N-bx)6ft$nAV}#^v?gZ5!!<195D=dFjflo{|LvKRu&u)(Us?86t zEgY)m%6p?LD=>^p3>}qaavNdRFcX*vj4zifVI2!%+|uHay`Q>YbF9Z8*L_wJ+hJ=(} z_^*mo4Ic#KdQD}s8buN1@Mbcoz1!q#VDq9UJL2I}HC1b+K%pgnc>UG*law*A&5zT|y5 z1Xllg5Q9zPCYlyf1+_{satvwml%8zrt+sHitRihR+a~b3PwuI=#w9ztvLGBE3q&tQ z2fl9>RGNRw*}Ya7jpc2nLcm z!}i1O&BdE0WMF%5eOa-@g&6ucwGffc6?_l$*iT80r%3Aa;j?$_25SK+IzI8H%>l${ z4$@i5sy;BSKbOfKb!b9q%ufKSK8@`}|Cj^4pv8;c3sW=Bn30_N@AL~F7Veu$(Fa6xqm8KKv5Nb(L^(-#A~Ap^EPPJvD6I6M8exHH2h)rIyi#*^%F zi)1j?@N(sFIDdq%z~~D3VAX-9#VZeGmSK=Z30pJ2o4~;{lZ^}-B0mG~SxgF=iL$rT zJ}lz;KA?6gbx%~txu?-VX2nwJz~I}grZOJ(Vz z0Ut~!G&sH1Xh8};Z_DumUjD6^_k0-5k!SfHb^8#H%B894G-Ywe^_h-5x~XX3JI=K* zF&Nu>N@)>gVKrebL)iw&3Zb$Rr`H_E?|_&NrDNUyHSb@YCPAM9dEXzJr}#I4A@OF4XjFE;-46Tj(Kq^K9qYRf=`UHXGQU68 zO-k+O4wo&5%FgMVor~B%$Kaf8Ti#>f;D)WzPyUwQz`=pEpV=p$74NO~e3rGme340L zRRDN65L8~D;AFIB2(*+edfgWJNPF9zo%<(cuDlkzbkVi!72gU@CSF(46%W4Gp!75H z)FX>G`?n=bt2@8-=vS{eWo}U2 zsv-)`ZwuaW-ce(!Gtlmy_H?6bT%|;^jJ6`nDWwO|%L*|eTxY5n>}#&+4N!Y%wBnAi#QfiqDFsZCe|JsdS(M}f zvi6kPgZIHb$DZe?I0D-yPi8tiv%7BLH{rRw`Y}Fu)%t1*u(F-2XLA{`+O-n6o^1B> ztI2PmH4<7w+H*>A-5WGR}6?sL`-e7)=b|{Dd{*l&rj53CTCPt`A;s z+a+)v)YJ{|2bJ*4ucoa-WU^+^Y6=DJlO{+l>rp0j%nQ`KAIwbIz61Z6C%INn&E01* Qg#idWUHx3vIVCg!09}ID82|tP literal 0 HcmV?d00001 diff --git a/sklift/datasets/descr/x5.rst b/sklift/datasets/descr/x5.rst index ca6553f..f6c789f 100644 --- a/sklift/datasets/descr/x5.rst +++ b/sklift/datasets/descr/x5.rst @@ -17,6 +17,9 @@ Data contains several parts: * clients.csv: general info about clients; * purchases.csv: clients’ purchase history prior to communication. +.. image:: ../../_static/images/x5_table_scheme.png + :alt: X5 table schema + Fields ################ From 49d873c5872e70c2df4868ec64d68356cd3ffb6f Mon Sep 17 00:00:00 2001 From: rooti123 <90433241+rooti123@users.noreply.github.com> Date: Wed, 6 Jul 2022 20:54:47 +1000 Subject: [PATCH 03/11] Maximum Profit Uplift (MPU) measure added to metrics (#172) * Maximum Profit Uplift (MPU) measure added to metrics * :wrench: fix docs * :wrench: fix docs +1 Co-authored-by: Irina Elisova --- sklift/metrics/__init__.py | 4 +-- sklift/metrics/metrics.py | 71 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+), 2 deletions(-) diff --git a/sklift/metrics/__init__.py b/sklift/metrics/__init__.py index 0b67f45..64c10a7 100644 --- a/sklift/metrics/__init__.py +++ b/sklift/metrics/__init__.py @@ -3,7 +3,7 @@ qini_curve, perfect_qini_curve, qini_auc_score, uplift_at_k, response_rate_by_percentile, weighted_average_uplift, uplift_by_percentile, treatment_balance_curve, - average_squared_deviation, make_uplift_scorer + average_squared_deviation, make_uplift_scorer, max_prof_uplift ) __all__ = [ @@ -11,5 +11,5 @@ 'qini_curve', 'perfect_qini_curve', 'qini_auc_score', 'uplift_at_k', 'response_rate_by_percentile', 'weighted_average_uplift', 'uplift_by_percentile', 'treatment_balance_curve', - 'average_squared_deviation', 'make_uplift_scorer' + 'average_squared_deviation', 'make_uplift_scorer', 'max_prof_uplift' ] diff --git a/sklift/metrics/metrics.py b/sklift/metrics/metrics.py index 87b6a74..87078fa 100644 --- a/sklift/metrics/metrics.py +++ b/sklift/metrics/metrics.py @@ -826,3 +826,74 @@ def average_squared_deviation(y_true_train, uplift_train, treatment_train, y_tru strategy=strategy, bins=bins) return np.mean(np.square(uplift_by_percentile_train['uplift'] - uplift_by_percentile_val['uplift'])) + + +def max_prof_uplift(df_sorted, treatment_name, churn_name, pos_outcome, benefit, c_incentive, c_contact, a_cost=0): + """Compute the maximum profit generated from an uplift model decided campaign + + This can be visualised by plotting plt.plot(perc, cumulative_profit) + + Args: + df_sorted (pandas dataframe): dataframe with descending uplift predictions for each customer (i.e. highest 1st) + treatment_name (string): column name of treatment columm (assuming 1 = treated) + churn_name (string): column name of churn column + pos_outcome (int or float): 1 or 0 value in churn column indicating a positive outcome (i.e. purchase = 1, whereas churn = 0) + benefit (int or float): the benefit of retaining a customer (e.g., the average customer lifetime value) + c_incentive (int or float): the cost of the incentive if a customer accepts the offer + c_contact (int or float): the cost of contacting a customer regardless of conversion + a_cost (int or float): the fixed administration cost for the campaign + + Returns: + 1d array-like: the incremental increase in x, for plotting + 1d array-like: the cumulative profit per customer + + References: + Floris Devriendt, Jeroen Berrevoets, Wouter Verbeke. Why you should stop predicting customer churn and start using uplift models. + """ +# VARIABLES + +# n_ct0 no. people not treated +# n_ct1 no. people treated + +# n_y1_ct0 no. people not treated with +ve outcome +# n_y1_ct1 no. people treated with +ve outcome + +# r_y1_ct0 mean of not treated people with +ve outcome +# r_y1_ct1 mean of treated people with +ve outcome + +# cs cumsum() of each variable + + n_ct0 = np.where(df_sorted[treatment_name] == 0, 1, 0) + cs_n_ct0 = pd.Series(n_ct0.cumsum()) + + n_ct1 = np.where(df_sorted[treatment_name] == 1, 1, 0) + cs_n_ct1 = pd.Series(n_ct1.cumsum()) + + if pos_outcome == 0: + n_y1_ct0 = np.where((df_sorted[treatment_name] == 0) & (df_sorted[churn_name] == 0), 1, 0) + n_y1_ct1 = np.where((df_sorted[treatment_name] == 1) & (df_sorted[churn_name] == 0), 1, 0) + + elif pos_outcome == 1: + n_y1_ct0 = np.where((df_sorted[treatment_name] == 0) & (df_sorted[churn_name] == 1), 1, 0) + n_y1_ct1 = np.where((df_sorted[treatment_name] == 1) & (df_sorted[churn_name] == 1), 1, 0) + + cs_n_y1_ct0 = pd.Series(n_y1_ct0.cumsum()) + cs_n_y1_ct1 = pd.Series(n_y1_ct1.cumsum()) + + cs_r_y1_ct0 = (cs_n_y1_ct0 / cs_n_ct0).fillna(0) + cs_r_y1_ct1 = (cs_n_y1_ct1 / cs_n_ct1).fillna(0) + + cs_uplift = cs_r_y1_ct1 - cs_r_y1_ct0 + + # Dataframe of all calculated variables + df = pd.concat([cs_n_ct0,cs_n_ct1,cs_n_y1_ct0,cs_n_y1_ct1, cs_r_y1_ct0, cs_r_y1_ct1, cs_uplift], axis=1) + df.columns = ['cs_n_ct0', 'cs_n_ct1', 'cs_n_y1_ct0', 'cs_n_y1_ct1', 'cs_r_y1_c0', 'cs_r_y1_ct1', 'cs_uplift'] + + x = cs_n_ct0 + cs_n_ct1 + max = cs_n_ct0.max() + cs_n_ct1.max() + + t_profit = (x * cs_uplift * benefit) - (c_incentive * x * cs_r_y1_ct1) - (c_contact * x) - a_cost + perc = x / max + cumulative_profit = t_profit / max + + return perc, cumulative_profit From a158f2ea204741bda42cfd1fefeb6695b8697827 Mon Sep 17 00:00:00 2001 From: Bezmen Evgeny <37982126+flashlight101@users.noreply.github.com> Date: Fri, 15 Jul 2022 10:47:47 +0300 Subject: [PATCH 04/11] Fix bag #180 (#186) * Add hash checker for the dataset files --- sklift/datasets/datasets.py | 111 ++++++++++++++++++++++++++++-------- 1 file changed, 87 insertions(+), 24 deletions(-) diff --git a/sklift/datasets/datasets.py b/sklift/datasets/datasets.py index ae5c24b..2838c03 100644 --- a/sklift/datasets/datasets.py +++ b/sklift/datasets/datasets.py @@ -1,5 +1,6 @@ import os import shutil +import hashlib import pandas as pd import requests @@ -95,6 +96,11 @@ def _get_data(data_home, url, dest_subdir, dest_filename, download_if_missing, raise IOError("Dataset missing") return dest_path +def _get_file_hash(csv_path): + with open(csv_path, 'rb') as file_to_check: + data = file_to_check.read() + return hashlib.md5(data).hexdigest() + def clear_data_dir(path=None): """Delete all the content of the data home cache. @@ -170,11 +176,19 @@ def fetch_lenta(data_home=None, dest_subdir=None, download_if_missing=True, retu :func:`.fetch_megafon`: Load and return the MegaFon Uplift Competition dataset (classification). """ - url = 'https://sklift.s3.eu-west-2.amazonaws.com/lenta_dataset.csv.gz' - filename = url.split('/')[-1] - csv_path = _get_data(data_home=data_home, url=url, dest_subdir=dest_subdir, + lenta_metadata = { + 'url': 'https://sklift.s3.eu-west-2.amazonaws.com/lenta_dataset.csv.gz', + 'hash': '6ab28ff0989ed8b8647f530e2e86452f' + } + + filename = lenta_metadata['url'].split('/')[-1] + csv_path = _get_data(data_home=data_home, url=lenta_metadata['url'], dest_subdir=dest_subdir, dest_filename=filename, download_if_missing=download_if_missing) + + if _get_file_hash(csv_path) != lenta_metadata['hash']: + raise ValueError(f"The {filename} file is broken,\ + please clean the directory with the clean_data_dir function, and run the function again") target_col = 'response_att' treatment_col = 'group' @@ -262,11 +276,24 @@ def fetch_x5(data_home=None, dest_subdir=None, download_if_missing=True): :func:`.fetch_megafon`: Load and return the MegaFon Uplift Competition dataset (classification). """ - url_train = 'https://sklift.s3.eu-west-2.amazonaws.com/uplift_train.csv.gz' - file_train = url_train.split('/')[-1] - csv_train_path = _get_data(data_home=data_home, url=url_train, dest_subdir=dest_subdir, + + x5_metadata = { + 'url_train': 'https://sklift.s3.eu-west-2.amazonaws.com/uplift_train.csv.gz', + 'url_clients': 'https://sklift.s3.eu-west-2.amazonaws.com/clients.csv.gz', + 'url_purchases': 'https://sklift.s3.eu-west-2.amazonaws.com/purchases.csv.gz', + 'uplift_hash': '2720bbb659daa9e0989b2777b6a42d19', + 'clients_hash': 'b9cdeb2806b732771de03e819b3354c5', + 'purchases_hash': '48d2de13428e24e8b61d66fef02957a8' + } + file_train = x5_metadata['url_train'].split('/')[-1] + csv_train_path = _get_data(data_home=data_home, url=x5_metadata['url_train'], dest_subdir=dest_subdir, dest_filename=file_train, download_if_missing=download_if_missing) + + if _get_file_hash(csv_train_path) != x5_metadata['uplift_hash']: + raise ValueError(f"The {file_train} file is broken,\ + please clean the directory with the clean_data_dir function, and run the function again") + train = pd.read_csv(csv_train_path) train_features = list(train.columns) @@ -277,19 +304,27 @@ def fetch_x5(data_home=None, dest_subdir=None, download_if_missing=True): train = train.drop([target_col, treatment_col], axis=1) - url_clients = 'https://sklift.s3.eu-west-2.amazonaws.com/clients.csv.gz' - file_clients = url_clients.split('/')[-1] - csv_clients_path = _get_data(data_home=data_home, url=url_clients, dest_subdir=dest_subdir, + file_clients = x5_metadata['url_clients'].split('/')[-1] + csv_clients_path = _get_data(data_home=data_home, url=x5_metadata['url_clients'], dest_subdir=dest_subdir, dest_filename=file_clients, download_if_missing=download_if_missing) + + if _get_file_hash(csv_clients_path) != x5_metadata['clients_hash']: + raise ValueError(f"The {file_clients} file is broken,\ + please clean the directory with the clean_data_dir function, and run the function again") + clients = pd.read_csv(csv_clients_path) clients_features = list(clients.columns) - url_purchases = 'https://sklift.s3.eu-west-2.amazonaws.com/purchases.csv.gz' - file_purchases = url_purchases.split('/')[-1] - csv_purchases_path = _get_data(data_home=data_home, url=url_purchases, dest_subdir=dest_subdir, + file_purchases = x5_metadata['url_purchases'].split('/')[-1] + csv_purchases_path = _get_data(data_home=data_home, url=x5_metadata['url_purchases'], dest_subdir=dest_subdir, dest_filename=file_purchases, download_if_missing=download_if_missing) + + if _get_file_hash(csv_clients_path) != x5_metadata['purchases_hash']: + raise ValueError(f"The {file_purchases} file is broken,\ + please clean the directory with the clean_data_dir function, and run the function again") + purchases = pd.read_csv(csv_purchases_path) purchases_features = list(purchases.columns) @@ -391,16 +426,27 @@ def fetch_criteo(target_col='visit', treatment_col='treatment', data_home=None, raise ValueError(f"The target_col must be an element of {target_cols + ['all']}. " f"Got value target_col={target_col}.") + criteo_metadata = { + 'url': '', + 'criteo_hash': '' + } + if percent10: - url = 'https://criteo-bucket.s3.eu-central-1.amazonaws.com/criteo10.csv.gz' + criteo_metadata['url'] = 'https://criteo-bucket.s3.eu-central-1.amazonaws.com/criteo10.csv.gz' + criteo_metadata['criteo_hash'] = 'fe159bcee2cea57548e48eb2d7d5d00c' else: - url = "https://criteo-bucket.s3.eu-central-1.amazonaws.com/criteo.csv.gz" + criteo_metadata['url'] = "https://criteo-bucket.s3.eu-central-1.amazonaws.com/criteo.csv.gz" + criteo_metadata['criteo_hash'] = 'd2236769ef69e9be52556110102911ec' - filename = url.split('/')[-1] - csv_path = _get_data(data_home=data_home, url=url, dest_subdir=dest_subdir, + filename = criteo_metadata['url'].split('/')[-1] + csv_path = _get_data(data_home=data_home, url=criteo_metadata['url'], dest_subdir=dest_subdir, dest_filename=filename, download_if_missing=download_if_missing) + if _get_file_hash(csv_path) != criteo_metadata['criteo_hash']: + raise ValueError(f"The {filename} file is broken,\ + please clean the directory with the clean_data_dir function, and run the function again") + dtypes = { 'exposure': 'Int8', 'treatment': 'Int8', @@ -497,11 +543,19 @@ def fetch_hillstrom(target_col='visit', data_home=None, dest_subdir=None, downlo raise ValueError(f"The target_col must be an element of {target_cols + ['all']}. " f"Got value target_col={target_col}.") - url = 'https://hillstorm1.s3.us-east-2.amazonaws.com/hillstorm_no_indices.csv.gz' - filename = url.split('/')[-1] - csv_path = _get_data(data_home=data_home, url=url, dest_subdir=dest_subdir, + hillstrom_metadata = { + 'url': 'https://hillstorm1.s3.us-east-2.amazonaws.com/hillstorm_no_indices.csv.gz', + 'hillstrom_hash': 'a68a81291f53a14f4e29002629803ba3' + } + + filename = hillstrom_metadata['url'].split('/')[-1] + csv_path = _get_data(data_home=data_home, url=hillstrom_metadata['url'], dest_subdir=dest_subdir, dest_filename=filename, download_if_missing=download_if_missing) + + if _get_file_hash(csv_path) != hillstrom_metadata['hillstrom_hash']: + raise ValueError(f"The {filename} file is broken,\ + please clean the directory with the clean_data_dir function, and run the function again") treatment_col = 'segment' @@ -582,12 +636,21 @@ def fetch_megafon(data_home=None, dest_subdir=None, download_if_missing=True, :func:`.fetch_hillstrom`: Load and return Kevin Hillstrom Dataset MineThatData (classification or regression). """ - url_train = 'https://sklift.s3.eu-west-2.amazonaws.com/megafon_dataset.csv.gz' - file_train = url_train.split('/')[-1] - csv_train_path = _get_data(data_home=data_home, url=url_train, dest_subdir=dest_subdir, - dest_filename=file_train, + megafon_metadata = { + 'url': 'https://sklift.s3.eu-west-2.amazonaws.com/megafon_dataset.csv.gz', + 'megafon_hash': 'ee8d45a343d4d2cf90bb756c93959ecd' + } + + filename = megafon_metadata['url'].split('/')[-1] + csv_path = _get_data(data_home=data_home, url=megafon_metadata['url'], dest_subdir=dest_subdir, + dest_filename=filename, download_if_missing=download_if_missing) - train = pd.read_csv(csv_train_path) + + if _get_file_hash(csv_path) != megafon_metadata['megafon_hash']: + raise ValueError(f"The {filename} file is broken,\ + please clean the directory with the clean_data_dir function, and run the function again") + + train = pd.read_csv(csv_path) target_col = 'conversion' treatment_col = 'treatment_group' From c68a7faf93f1e1bb0a453cc66eba51d0f9232161 Mon Sep 17 00:00:00 2001 From: Bezmen Evgeny <37982126+flashlight101@users.noreply.github.com> Date: Fri, 15 Jul 2022 10:49:33 +0300 Subject: [PATCH 05/11] Fix bag 181 (#187) * Fix bag 181 * Adding equals --- sklift/models/models.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/sklift/models/models.py b/sklift/models/models.py index 83b9a84..9cfbadc 100644 --- a/sklift/models/models.py +++ b/sklift/models/models.py @@ -1,3 +1,5 @@ +import warnings + import numpy as np import pandas as pd from sklearn.base import BaseEstimator @@ -512,8 +514,19 @@ def fit(self, X, y, treatment, estimator_trmnt_fit_params=None, estimator_ctrl_f check_is_binary(treatment) self._type_of_target = type_of_target(y) - X_ctrl, y_ctrl = X[treatment == 0], y[treatment == 0] - X_trmnt, y_trmnt = X[treatment == 1], y[treatment == 1] + y_copy = y.copy() + treatment_copy = treatment.copy() + + if not X.index.equals(y_copy.index): + y_copy.index = X.index + warnings.warn("Target indexes do not match data indexes, re-indexing has been performed") + + if not X.index.equals(treatment_copy.index): + treatment_copy.index = X.index + warnings.warn("Treatment indexes do not match data indexes, re-indexing has been performed") + + X_ctrl, y_ctrl = X[treatment_copy == 0], y_copy[treatment_copy == 0] + X_trmnt, y_trmnt = X[treatment_copy == 1], y_copy[treatment_copy == 1] if estimator_trmnt_fit_params is None: estimator_trmnt_fit_params = {} From 1245f7e68dc5b1fb68b434bfda507679d13ee4f9 Mon Sep 17 00:00:00 2001 From: Bezmen Evgeny <37982126+flashlight101@users.noreply.github.com> Date: Thu, 4 Aug 2022 19:30:04 +0300 Subject: [PATCH 06/11] Fix list error from #181 (#190) * Fix bag 181 --- sklift/models/models.py | 5 ++--- sklift/tests/test_models.py | 16 ++++++++++++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/sklift/models/models.py b/sklift/models/models.py index 9cfbadc..5220608 100644 --- a/sklift/models/models.py +++ b/sklift/models/models.py @@ -517,11 +517,10 @@ def fit(self, X, y, treatment, estimator_trmnt_fit_params=None, estimator_ctrl_f y_copy = y.copy() treatment_copy = treatment.copy() - if not X.index.equals(y_copy.index): + if (isinstance(X, pd.Series) or isinstance(X, pd.DataFrame)) and isinstance(y_copy, pd.Series) and not X.index.equals(y_copy.index): y_copy.index = X.index warnings.warn("Target indexes do not match data indexes, re-indexing has been performed") - - if not X.index.equals(treatment_copy.index): + if (isinstance(X, pd.Series) or isinstance(X, pd.DataFrame)) and isinstance(treatment_copy, pd.Series) and not X.index.equals(treatment_copy.index): treatment_copy.index = X.index warnings.warn("Treatment indexes do not match data indexes, re-indexing has been performed") diff --git a/sklift/tests/test_models.py b/sklift/tests/test_models.py index 1afa939..2e58281 100644 --- a/sklift/tests/test_models.py +++ b/sklift/tests/test_models.py @@ -1,5 +1,8 @@ +import warnings + import pytest import numpy as np +import pandas as pd from sklearn.linear_model import LogisticRegression, LinearRegression from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler @@ -91,3 +94,16 @@ def test_same_estimator_error(): with pytest.raises(ValueError): TwoModels(est, est) +@pytest.mark.parametrize( + "X, y, treatment", + [ + (pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),columns=['a', 'b', 'c'], index=[0,1,2]), + pd.Series(np.array([1, 0, 1]),index=[0,2,3]), pd.Series(np.array([0, 0, 1]),index=[0,1,2])), + (pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),columns=['a', 'b', 'c'], index=[0,1,2]), + pd.Series(np.array([1, 0, 1]),index=[0,1,2]), pd.Series(np.array([0, 0, 1]),index=[1,2,3])) + ] +) +def test_input_data(X, y, treatment): + model = TwoModels(LinearRegression(), LinearRegression()) + with pytest.warns(UserWarning): + model.fit(X, y, treatment) \ No newline at end of file From f1cf88f3c95939a397db4cd579558623172db558 Mon Sep 17 00:00:00 2001 From: 00helloworld <42757757+00helloworld@users.noreply.github.com> Date: Fri, 5 Aug 2022 01:37:37 +0800 Subject: [PATCH 07/11] Fix the Tags translation in Chinese in Readme.rst and /docs/index.rst (#189) Fix the Tags translation in Chinese in Readme --- Readme.rst | 2 +- docs/index.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Readme.rst b/Readme.rst index 923f4ea..89b1a69 100644 --- a/Readme.rst +++ b/Readme.rst @@ -265,5 +265,5 @@ Tags **RU**: аплифт моделирование, Uplift модель -**ZH**: 隆起建模,因果推断,因果效应,因果关系,个人治疗效应,真正的电梯,净电梯 +**ZH**: uplift增量建模, 因果推断, 因果效应, 因果关系, 个体干预因果效应, 真实增量, 净增量, 增量建模 diff --git a/docs/index.rst b/docs/index.rst index 6d790d2..571e72d 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -153,4 +153,4 @@ Tags **RU**: аплифт моделирование, Uplift модель -**ZH**: 隆起建模,因果推断,因果效应,因果关系,个人治疗效应,真正的电梯,净电梯 \ No newline at end of file +**ZH**: uplift增量建模, 因果推断, 因果效应, 因果关系, 个体干预因果效应, 真实增量, 净增量, 增量建模 From c4beef06d3a9541d8eec59a2a97698a12563e263 Mon Sep 17 00:00:00 2001 From: Maksim Shevchenko Date: Tue, 9 Aug 2022 19:14:45 +0300 Subject: [PATCH 08/11] :construction: Try to fix bullets (#193) --- .readthedocs.yml | 16 +++++++++++----- docs/requirements.txt | 4 ++-- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/.readthedocs.yml b/.readthedocs.yml index 9f9649b..7484b97 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -4,13 +4,20 @@ # Required version: 2 +# Build documentation in the docs/ directory with Sphinx +build: + os: ubuntu-20.04 + tools: + python: "3.8" +# jobs: +# pre_build: +# - cp -r notebooks docs/ + # Build documentation in the docs/ directory with Sphinx sphinx: + builder: html configuration: docs/conf.py - -# Build documentation with MkDocs -#mkdocs: -# configuration: mkdocs.yml + fail_on_warning: false # Optionally build your docs in additional formats such as PDF and ePub formats: @@ -18,7 +25,6 @@ formats: # Optionally set the version of Python and requirements required to build your docs python: - version: 3.7 install: - requirements: docs/requirements.txt - requirements: requirements.txt \ No newline at end of file diff --git a/docs/requirements.txt b/docs/requirements.txt index b34aed6..79735c3 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,4 +1,4 @@ -sphinx-autobuild -sphinx_rtd_theme +sphinx==5.1.1 +sphinx-rtd-theme==1.0.0 myst-parser sphinxcontrib-bibtex \ No newline at end of file From 667915589186a0a06d963c14d428465ea5f0ee2f Mon Sep 17 00:00:00 2001 From: Maksim Shevchenko Date: Tue, 9 Aug 2022 21:48:35 +0300 Subject: [PATCH 09/11] :memo: Fix docs --- docs/api/metrics/index.rst | 1 + docs/api/metrics/max_prof_uplift.rst | 5 +++++ 2 files changed, 6 insertions(+) create mode 100644 docs/api/metrics/max_prof_uplift.rst diff --git a/docs/api/metrics/index.rst b/docs/api/metrics/index.rst index b60d4d6..44ca0b9 100644 --- a/docs/api/metrics/index.rst +++ b/docs/api/metrics/index.rst @@ -17,4 +17,5 @@ ./response_rate_by_percentile ./treatment_balance_curve ./average_squared_deviation + ./max_prof_uplift ./make_uplift_scorer \ No newline at end of file diff --git a/docs/api/metrics/max_prof_uplift.rst b/docs/api/metrics/max_prof_uplift.rst new file mode 100644 index 0000000..105cc1e --- /dev/null +++ b/docs/api/metrics/max_prof_uplift.rst @@ -0,0 +1,5 @@ +********************************************** +`sklift.metrics <./>`_.max_prof_uplift +********************************************** + +.. autofunction:: sklift.metrics.metrics.max_prof_uplift \ No newline at end of file From 6cdb809599c08042f8cc6c27d6f81259a3f0c47c Mon Sep 17 00:00:00 2001 From: Maksim Shevchenko Date: Tue, 9 Aug 2022 21:49:20 +0300 Subject: [PATCH 10/11] :label: Add changelog --- docs/changelog.md | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/docs/changelog.md b/docs/changelog.md index 849f5d2..a045457 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -8,9 +8,28 @@ * 🔨 something that previously didn’t work as documented – or according to reasonable expectations – should now work. * ❗️ you will need to change your code to have the same effect in the future; or a feature will be removed in the future. +## Version 0.5.0 + +### [sklift.models](https://www.uplift-modeling.com/en/v0.5.0/api/models/index.html) + +* 🔥 Add [ClassTransformationReg](https://www.uplift-modeling.com/en/v0.5.0/api/models.html#sklift.models.models.TwoModels) model by [@mcullan](https://github.com/mcullan) and [@ElisovaIra](https://github.com/ElisovaIra). +* 🔨 Add the ability to process a series with different indexes in the [TwoModels](https://www.uplift-modeling.com/en/v0.5.0/api/models.html#sklift.models.models.TwoModels) by [@flashlight101](https://github.com/flashlight101). + +### [sklift.metrics](https://www.uplift-modeling.com/en/v0.5.0/api/index/metrics.html) + +* 🔥 Add new metric [Maximum profit uplift measure](https://www.uplift-modeling.com/en/v0.5.0/api/metrics/max_prof_uplift.html) by [@rooti123](https://github.com/rooti123). + +### [sklift.datasets](https://www.uplift-modeling.com/en/v0.5.0/api/datasets/index.html) + +* 💥 Add cheker based on hash for all datasets by [@flashlight101](https://github.com/flashlight101) +* 📝 Add [scheme](https://www.uplift-modeling.com/en/v0.5.0/api/datasets/fetch_x5.html) of x5 dataframes. + +### Miscellaneous +* 📝 Improve Chinise tags by [@00helloworld](https://github.com/00helloworld) + ## Version 0.4.1 -### [sklift.datasets](https://www.uplift-modeling.com/en/v0.4.0/api/datasets/index.html) +### [sklift.datasets](https://www.uplift-modeling.com/en/v0.4.1/api/datasets/index.html) * 🔨 Fix bug in dataset links. * 📝 Add about a company section From 73f3ae2207a3cccaf57674ae5d7aae075ffb2496 Mon Sep 17 00:00:00 2001 From: Maksim Shevchenko Date: Tue, 9 Aug 2022 21:50:21 +0300 Subject: [PATCH 11/11] :rocket: Bump version to 0.5.0 --- sklift/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklift/__init__.py b/sklift/__init__.py index f0ede3d..2b8877c 100644 --- a/sklift/__init__.py +++ b/sklift/__init__.py @@ -1 +1 @@ -__version__ = '0.4.1' +__version__ = '0.5.0'