From 7e27732b0b8a8322e971d4d8869606153f89f08d Mon Sep 17 00:00:00 2001 From: greninja Date: Wed, 28 Dec 2016 21:31:10 +0530 Subject: [PATCH] Features added referring to #15 --- conf/.cuckoo.conf.swp | Bin 0 -> 16384 bytes conf/cuckooml.conf | 3 +++ modules/processing/cuckooml.py | 38 +++++++++++++++++++++++++++++++-- 3 files changed, 39 insertions(+), 2 deletions(-) create mode 100644 conf/.cuckoo.conf.swp diff --git a/conf/.cuckoo.conf.swp b/conf/.cuckoo.conf.swp new file mode 100644 index 0000000000000000000000000000000000000000..c6a0f7037a8f60bdf5db047dbf8ad05e1827fa93 GIT binary patch literal 16384 zcmeI3U5q5xRmUq%0&$G(h!01ENL;rAnGLf&vy=74jI$B(?mF3!T`%7C+G4TV)m^u{ z%hOfWu8-+yqa-*FNJt?9dLe%{=?*_MyE|Gde1YY_>Nl@^Sd9ug~9mo9*4L1hx|RekIV<+kNog6-!HZ8x~&AZ64**$D}k*9wi4J%U@L*G1hx|Re<*=^eoyon_`WCb zelz@j%U!?U8ved@*YEDRaC!S{D}k*9wi4J%U@L*G1hx{`N?fgb~ZeP0y40v-iF0p0`tp6?!f0elv`0-gZhy%)OR z%iy=c^WZ`7AMc2w?|@gqXTbB|NpJ=H=MP5F*TD#U0$c^({edX@UGM_r)AMhrfyj@rOSQ2D9WS0yIF7}&5vBYNJric z{kSN-^Lad+rTN6Abw7H<^^&x#myRpG=PFhdu8Nm^cfF`R%j0_2z1UPW4;*>7Nb4yV zy}NzmdbKO+71w4n7tdzJ0&3UWy3& zv=T5v(ox6hq$%S%E%Fs#Cb(j5*XjZ~!=*7gNsoQ*@;LLZ7~2)m&>83OdOXkV+b?#G zkLC$dkRn`GBxrZ(5@XWV(8JT z+tGqeGqL@z<|~E0h5@YM%`g!RmEFT97mr51s9XZ$!LlNMf6f1;c2~{)D2-<(4_Uz3z$Ev5rRS#cG34?k99<5w>^KW1 zWP~3N2D9sWoz3=k9cI@7uo4XET3vUp~=$| zoN#pHYdaton2S?0xlxg2ah_EDD1y#h%UY5U*@@?=nI~3p`NcS&6mH>%Ypt4aZux9p zlyQlr;3=cJ;1k-e=}6A)&0vT$~k6UduOid<;tG-@y&9J#mI@*g?#& z*di?x-5+k_ADL|fPJ+(x=xOMTZKX*=+u06 zrmZpgpwlc~N;T+g+{`$HtCm&mGsQaG!5)T<(pMxxLW$O`na`VSh|O^BCF%{az#xYTHQ>lQTJSf@?6gb;>EQ09Cm}D~oip-89 z)>KBl^(H;pd+E=<)c6M0#8xL+oh>jGH+7MbrAL-sczm@P+UgC<;>hPc+pBV3+nS*_ zb46ocW@(PR24Pe5(s7s-c}k#V-}Su^9*!uSBm)z0v!e2e zIDJwAEvOAuLnZu%b`Xr#`OZlL+GXomT_AeZWe`WHd=Wp!VnUJ zOu{wQ^ z*29Elz>-0A*(@$F(S&;QAlv1IdH6&4J@W@a*@nB*AIl{p}IUn zRn>;M6E3<`8iJLGiIq?(wUHor%4w;FC=}gk!|?Nw`u~UMPd`h)s{X%wzyA$-{jY#8 zgI@&qfj@hIXbm0*KLb7h)c1c0JRN%cKcuIhfcJv`rkDR$@Gsz7;EUi>-~fD^p8l)g zHnrZ0U)pEg{ArWnXsstSQ zg4iI}kfCjsl2ho#^bC}MWb%HOXklg=&*#+Erz76F@}XbOpkYH6%H&5*1vt)7(W~DT zF4fK`y{YP{+aZhuVzN*$2sr{;ssOmq)ooqUkB5bIyU+;1SYaeo1Y$W=D6zVn)vHr? zPxzBL{8O1I2kexd;6vx|@R^m+-POPr0*Vv5Nk!jwKtZ>2AkoS!=FQ1o@gK zaaBO@5Q)tii;z`YW~a<0Xol(OTNR%ctmTDu)zjpZ6iOo9VLComS?hHV)mcy5gta}V zQ6Bo}PFPXV-gk>HvI7}|Cc9qluEdQbsnDc^(*}7trWTJm`nuj*OJ9?sV7k%9{Wshp zb~k_FlbVH~i)7gSOQ~ci%g>h8OS8khy>U?-4&(CYEGbX1w!LggayKS;jMQ9D(CF(w zd-&-24<4Lez?19EHz?Mv z|KFwk_Zp{226Dv>mGY6MyCw`VA!w_V)03ONH9p1c*bfo{NT?pdE*lyC^7WgM zfF+$}532NKKd`=-rRVm~KlbQ{F6_(qWVMl#CM_9cW(E zPNvv85D2po*j9ju?yTvem3DwNQ15Oe*GFg5|J|9}BX{YeV{kPgYuPfZqE*_duvFsw zw5X9?QWix0!1^u%n4Dp5Rxp?tXw-y4a?O6xx^;Dg36!TvnB7g5l>DI`O~)98wKp1~ zQBZ4|%h4$!QISo6h?`u{ZJMBVVg#ipwVT;j}6lK3vGi=M%-+Ux^zaLn|Gh#8}C_9*ViEdaVaDy?ru_dS8pf|@AvJ$ zP3N~qWz8$W@tj64ohU< zx+0pb2X@}tos5(*Et;7I@aSr$RuJ<}ni0kn-W1r9mw6|_y;~4_x|R0LR@%WUbXo?z zUL!r^Mi6B#Zv_x5y7>*)u*`~qbvuYL3iyx)A_KG1rC0LgO_S`wW;5M1oa`~PmY2S* zX0)zUe(GMu^SKS|H{K|%UqQ7-+`b1{$Qs$_lui*8XK4o;X8Mw|UU{X;0q!cUBV~+~ zNTu?HiA~{FXNTHr+0_k+UnAMmysCVpO0#@Kk7V{2=Kp(4_C=1hrw2uEd~g({epDH$ z+XkL(hHK#hYCC#+LkvIta>aUG^X^tK2q3g*U0Dco*3h+OpBxxsHws=4F5T1ZyRLQ! zxfnv?s~VF;mxVa;O>>kYE{+oMI^2q`KlhY z`&=~fLv+vAqrDUxhVa2&cb|Tr!~2u8om6SUp|&}3S1(sK{MqsSNuReY2mAYu?sva; z29;m7M{FFD>BS^I!fO+TdrZ&Eg11LfZaWrMT}s|ZYw++Iean?_NhzrJ;WYV(`>=?9 zq-XC!rRg`|8d^4F>Z(Z1br> sys.stderr, "Plotting libraries \ + (matplotlib and seaborn) are not available." + print >> sys.stderr, e + imported = False + + try: - import matplotlib.pyplot as plt import numpy as np import pandas as pd - import seaborn as sns from hdbscan import HDBSCAN from sklearn import metrics from sklearn.cluster import DBSCAN @@ -797,6 +809,17 @@ def filter_dataset(self, dataset=None, feature_coverage=0.1, def detect_abnormal_behaviour(self, count_dataset=None, figures=True): """Detect samples that behave significantly different than others.""" + + # Safety check for plotting + if not imported: + figures = False + else: + if not Config("cuckooml").cuckooml.plotting and figures: + print >> sys.stderr, "Warning:'plotting' flag disabled in conf/cuckooml.conf, \ + 'figures' flag will be overwritten." + figures = False + + if count_dataset is None: # Pull all count features count_features = self.feature_category(":count:") @@ -1133,6 +1156,17 @@ def performance_metric(clustering, labels, data, noise): def clustering_label_distribution(self, clustering, labels, plot=False): """Get statistics about number of ground truth labels per cluster.""" + + # Safety check for plotting + if not imported: + plot = False + else: + if not Config("cuckooml").cuckooml.plotting and plot: + print >> sys.stderr, "Warning:'plotting' flag disabled in conf/cuckooml.conf, \ + 'plot' flag will be overwritten." + plot = False + + cluster_ids = set(clustering["label"].tolist()) labels_ids = set(labels["label"].tolist()) cluster_distribution = {}