diff --git a/conf/.cuckoo.conf.swp b/conf/.cuckoo.conf.swp new file mode 100644 index 00000000..c6a0f703 Binary files /dev/null and b/conf/.cuckoo.conf.swp differ diff --git a/conf/cuckooml.conf b/conf/cuckooml.conf index 71d65e46..b4cb0296 100644 --- a/conf/cuckooml.conf +++ b/conf/cuckooml.conf @@ -38,3 +38,6 @@ compare_new_samples = true # Set folder for samples to be compared against clustering test_directory = sample_data/test + +# Enable plotting functionality +plotting = true diff --git a/modules/processing/cuckooml.py b/modules/processing/cuckooml.py index 14912c24..6c8b9306 100644 --- a/modules/processing/cuckooml.py +++ b/modules/processing/cuckooml.py @@ -15,11 +15,23 @@ from lib.cuckoo.common.constants import CUCKOO_ROOT from math import log +global imported +imported = True + +if Config("cuckooml").cuckooml.plotting: + try: + import matplotlib.pyplot as plt + import seaborn as sns + except ImportError, e: + print >> sys.stderr, "Plotting libraries \ + (matplotlib and seaborn) are not available." + print >> sys.stderr, e + imported = False + + try: - import matplotlib.pyplot as plt import numpy as np import pandas as pd - import seaborn as sns from hdbscan import HDBSCAN from sklearn import metrics from sklearn.cluster import DBSCAN @@ -797,6 +809,17 @@ def filter_dataset(self, dataset=None, feature_coverage=0.1, def detect_abnormal_behaviour(self, count_dataset=None, figures=True): """Detect samples that behave significantly different than others.""" + + # Safety check for plotting + if not imported: + figures = False + else: + if not Config("cuckooml").cuckooml.plotting and figures: + print >> sys.stderr, "Warning:'plotting' flag disabled in conf/cuckooml.conf, \ + 'figures' flag will be overwritten." + figures = False + + if count_dataset is None: # Pull all count features count_features = self.feature_category(":count:") @@ -1133,6 +1156,17 @@ def performance_metric(clustering, labels, data, noise): def clustering_label_distribution(self, clustering, labels, plot=False): """Get statistics about number of ground truth labels per cluster.""" + + # Safety check for plotting + if not imported: + plot = False + else: + if not Config("cuckooml").cuckooml.plotting and plot: + print >> sys.stderr, "Warning:'plotting' flag disabled in conf/cuckooml.conf, \ + 'plot' flag will be overwritten." + plot = False + + cluster_ids = set(clustering["label"].tolist()) labels_ids = set(labels["label"].tolist()) cluster_distribution = {}