Skip to content

Commit

Permalink
substituting sklearn.AgglomerativeClustering for fastclustering and s…
Browse files Browse the repository at this point in the history
…cipy's dendrogram for ROW ONLY
  • Loading branch information
edjuaro committed May 8, 2018
1 parent 488e7bc commit c0e187f
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 12 deletions.
34 changes: 24 additions & 10 deletions src/HierarchicalClustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,16 @@
import os
from hc_functions import *
import seaborn as sns
from sklearn.cluster import AgglomerativeClustering
from sklearn.cluster import AgglomerativeClustering # We are not using this anymore
import matplotlib as mpl
import humanfriendly
import datetime
from inspect import currentframe
tasklib_path = os.path.dirname(os.path.realpath(sys.argv[0]))
# mpl.use('Agg')"
# mpl.use('Agg')
sns.set_style("white")
import fastcluster
from scipy.spatial.distance import pdist

DEBUG = True

Expand Down Expand Up @@ -79,14 +81,26 @@ def log(text, line_number='?', debug=True):
if row_distance_metric != 'No_row_clustering':
gtr_companion = True

# Set Sklearn's clustering model parameters
row_model = AgglomerativeClustering(linkage=linkage_dic[clustering_method], n_clusters=2,
affinity=str2func[row_distance_metric])
# fit Sklearn's clustering model
row_model.fit(data)
row_tree = make_tree(row_model)
order_of_rows = order_leaves(row_model, tree=row_tree, data=data,
dist=str2similarity[row_distance_metric], labels=row_labels)
# # Set Sklearn's clustering model parameters
# row_model = AgglomerativeClustering(linkage=linkage_dic[clustering_method], n_clusters=2,
# affinity=str2func[row_distance_metric])
#
# # fit Sklearn's clustering model
# row_model.fit(data)
# row_tree = make_tree(row_model, scipy=False)
# order_of_rows = order_leaves(row_model, tree=row_tree, data=data,
# dist=str2similarity[row_distance_metric], labels=row_labels)

# # fastcluster
D = pdist(data, metric=cusca.custom_pearson_dist)
Z = fastcluster.linkage(D, method='weighted')
numeric_order_of_rows, R = two_plot_2_dendrogram(Z=Z, num_clust=2, no_plot=True)
# order_of_rows = [row_labels[int(i)] for i in numeric_order_of_rows] # Getting label names from order of rows
# order_of_rows = row_labels[numeric_order_of_rows] # Getting label names from order of rows
order_of_rows = [row_labels[i] for i in numeric_order_of_rows]

row_tree = make_tree(Z, scipy=True, n_leaves=len(order_of_rows))

log("About to write gtr file", get_linenumber(), DEBUG)
# Create gtr file
make_gtr(row_tree, data=data, file_name=output_base_name+'.gtr', dist=str2similarity[row_distance_metric])
Expand Down
20 changes: 18 additions & 2 deletions src/hc_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -938,7 +938,7 @@ def parse_data(gct_name, row_normalization=False, col_normalization=False, row_c
}


def make_tree(model, data=None):
def make_tree(model, n_leaves=None, scipy=False):
"""
Modified from:
https://stackoverflow.com/questions/27386641/how-to-traverse-a-tree-from-sklearn-agglomerativeclustering
Expand Down Expand Up @@ -967,7 +967,12 @@ def make_tree(model, data=None):
# print(tree)
# return tree

return dict(enumerate(model.children_, model.n_leaves_))
if scipy:
return dict(enumerate(np.delete(model, [2, 3], 1).astype(int), n_leaves))

else:

return dict(enumerate(model.children_, model.n_leaves_))
# return dict(enumerate(model.children_, 1))


Expand Down Expand Up @@ -1249,3 +1254,14 @@ def better_dendodist(children, distance, tree, data, axis):
for pair in children:
distances_list.append(centroid_distances(pair[0], pair[1], tree, data, axis, distance=distance))
return distances_list


def two_plot_2_dendrogram(Z, num_clust, no_plot=True):
# plt.clf()
threshold = Z[-num_clust + 1, 2]
R = dendrogram(Z, no_labels=True, color_threshold=threshold, no_plot=no_plot)
order_of_columns = R['leaves']

# plt.show()
# plt.savefig('WHAT_ARE_THOOOOSE.png')
return order_of_columns, R

0 comments on commit c0e187f

Please sign in to comment.