From dcc5a87b7ee83916cc40d4ad91026b8f2be11efe Mon Sep 17 00:00:00 2001 From: dpryan79 Date: Fri, 19 Feb 2016 10:45:56 +0100 Subject: [PATCH 1/2] Playing around with stuff --- deeptools/heatmapper.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/deeptools/heatmapper.py b/deeptools/heatmapper.py index fb9b9139c..810dac23a 100644 --- a/deeptools/heatmapper.py +++ b/deeptools/heatmapper.py @@ -1078,6 +1078,12 @@ def hmcluster(self, k, method='kmeans'): if method == 'hierarchical': # normally too slow for large data sets from scipy.cluster.hierarchy import fclusterdata + from scipy.spatial.distance import pdist + print(("the type is ", type(matrix))) + print(("original ndim is ", np.asarray(matrix).ndim)) + foo = pdist(np.asarray(matrix, order='c', dtype=np.double), metric='euclidean') + print(("the pdist ndim is ", foo.ndim)) + del foo cluster_labels = fclusterdata(matrix, k, criterion='maxclust', metric='euclidean', depth=2, method='ward') # create groups using the clustering self.group_labels = [] From 86217a2be04d15241e482c9cc92f977aba5a995c Mon Sep 17 00:00:00 2001 From: dpryan79 Date: Fri, 19 Feb 2016 11:49:55 +0100 Subject: [PATCH 2/2] Fix the --hclust option. As is, scipy was trying to calculate the euclidean distance of an array of euclidean distances! --- deeptools/heatmapper.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/deeptools/heatmapper.py b/deeptools/heatmapper.py index 810dac23a..a7b65c6a1 100644 --- a/deeptools/heatmapper.py +++ b/deeptools/heatmapper.py @@ -1077,14 +1077,10 @@ def hmcluster(self, k, method='kmeans'): if method == 'hierarchical': # normally too slow for large data sets - from scipy.cluster.hierarchy import fclusterdata - from scipy.spatial.distance import pdist - print(("the type is ", type(matrix))) - print(("original ndim is ", np.asarray(matrix).ndim)) - foo = pdist(np.asarray(matrix, order='c', dtype=np.double), metric='euclidean') - print(("the pdist ndim is ", foo.ndim)) - del foo - cluster_labels = fclusterdata(matrix, k, criterion='maxclust', metric='euclidean', depth=2, method='ward') + from scipy.cluster.hierarchy import fcluster, linkage + Z = linkage(matrix, method='ward', metric='euclidean') + cluster_labels = fcluster(Z, k, criterion='maxclust') + # create groups using the clustering self.group_labels = [] self.group_boundaries = [0]