diff --git a/.devel/sphinx/bibliography.bib b/.devel/sphinx/bibliography.bib index 49625783..33dbf6e5 100644 --- a/.devel/sphinx/bibliography.bib +++ b/.devel/sphinx/bibliography.bib @@ -18,17 +18,9 @@ @misc{nca note = {under review (preprint)} } -@misc{clustering_benchmarks_v1, - author = {M. Gagolewski and others}, - title = {Benchmark Suite for Clustering Algorithms -- Version 1}, - year = {2020}, - url = {https://github.com/gagolews/clustering-benchmarks}, - doi = {10.5281/zenodo.3815066} -} - @misc{Gagolewski2022:clustering-data-v1.1.0, author = {M. Gagolewski and others}, - title = {A benchmark suite for clustering algorithms: Version 1.1.0}, + title = {A benchmark suite for clustering algorithms: {V}ersion 1.1.0}, year = {2022}, url = {https://github.com/gagolews/clustering-data-v1/releases/tag/v1.1.0}, doi = {10.5281/zenodo.7088171} @@ -47,7 +39,7 @@ @article{clustering-benchmarks @book{datawranglingpy, author = {M. Gagolewski}, - title = {Minimalist Data Wrangling with Python}, + title = {Minimalist Data Wrangling with {P}ython}, doi = {10.5281/zenodo.6451068}, isbn = {978-0-6455719-1-2}, publisher = {Zenodo}, diff --git a/.devel/sphinx/news.md b/.devel/sphinx/news.md index ab52accf..453c90a6 100644 --- a/.devel/sphinx/news.md +++ b/.devel/sphinx/news.md @@ -1,7 +1,6 @@ # Changelog - -## 1.1.4.9xxx +## 1.1.5 (2023-10-18) * [BACKWARD INCOMPATIBILITY] [Python and R] Inequality measures are no longer referred to as inequity measures. @@ -66,9 +65,6 @@ ## 1.1.0 (2022-09-05) -* [GENERAL] The below-mentioned cluster validity measures are discussed - in more detail at . - * [Python and R] New function: `adjusted_asymmetric_accuracy`. * [Python and R] Implementations of the so-called internal cluster @@ -89,6 +85,9 @@ `silhouette_w_index`, `wcnn_index`. + These cluster validity measures are discussed + in more detail at . + * [BACKWARD INCOMPATIBILITY] `normalized_confusion_matrix` now solves the maximal assignment problem instead of applying the somewhat primitive partial pivoting. diff --git a/.devel/sphinx/weave/Makefile b/.devel/sphinx/weave/Makefile index 9480356d..710ed2f1 100644 --- a/.devel/sphinx/weave/Makefile +++ b/.devel/sphinx/weave/Makefile @@ -3,40 +3,25 @@ FILES_RMD = \ basics.Rmd \ sklearn_toy_example.Rmd \ - r.Rmd + noise.Rmd \ + r.Rmd \ + benchmarks_approx.Rmd \ + benchmarks_ar.Rmd \ + benchmarks_details.Rmd \ + timings.Rmd - -FILES_RSTW = \ - benchmarks_ar.rstw \ - benchmarks_details.rstw \ - benchmarks_approx.rstw \ - noise.rstw \ - timings.rstw - -# string.rstw \ -# sparse.rstw \ +# sparse.Rmd \ +# string.Rmd \ RMD_MD_OUTPUTS=$(patsubst %.Rmd,%.md,$(FILES_RMD)) -#RMD_RST_OUTPUTS=$(patsubst %.Rmd,%.rst,$(FILES_RMD)) - -RSTW_RST_OUTPUTS=$(patsubst %.rstw,%.rst,$(FILES_RSTW)) %.md: %.Rmd ./Rmd2md.sh "$<" -#%.rst: %.md -# pandoc -f markdown+grid_tables --wrap=none "$<" -o "$@" - -%.rst: %.rstw - ./pweave_custom.py "$<" "$@" - - -all : rmd rstw +all : rmd rmd : $(RMD_MD_OUTPUTS) -rstw : $(RSTW_RST_OUTPUTS) - clean: - rm -f $(RSTW_RST_OUTPUTS) $(RMD_MD_OUTPUTS) + rm -f $(RMD_MD_OUTPUTS) diff --git a/.devel/sphinx/weave/benchmarks_approx.rstw b/.devel/sphinx/weave/benchmarks_approx.Rmd similarity index 72% rename from .devel/sphinx/weave/benchmarks_approx.rstw rename to .devel/sphinx/weave/benchmarks_approx.Rmd index 9735deb2..e0cfb569 100644 --- a/.devel/sphinx/weave/benchmarks_approx.rstw +++ b/.devel/sphinx/weave/benchmarks_approx.Rmd @@ -1,19 +1,17 @@ -Benchmarks — Approximate Method -=============================== +# Benchmarks — Approximate Method -In one of the :any:`previous sections ` we have demonstrated that the approximate version -of the Genie algorithm (:class:`genieclust.Genie(exact=False, ...) `), i.e., -one which relies on `nmslib `_\ 's -approximate nearest neighbour search, is much faster than the exact one -on large, high-dimensional datasets. In particular, we have noted that -clustering of 1 million points in a 100d Euclidean space -takes less than 5 minutes on a laptop. +In one of the [previous sections](timings), we have demonstrated that the approximate version +of the Genie algorithm ([`genieclust.Genie(exact=False, ...)`](genieclust.Genie)), i.e., +one which relies on `nmslib`'s {cite}`nmslib` approximate nearest neighbour search, +is much faster than the exact one on large, high-dimensional datasets. +In particular, we have noted that clustering of 1 million points +in a 100d Euclidean space takes less than 5 minutes on a laptop. As *fast* does not necessarily mean *meaningful* (tl;dr spoiler alert: in our case, it does), let's again consider all the datasets -from the `Benchmark Suite for Clustering Algorithms — Version 1 `_ -:cite:`clustering_benchmarks_v1` -(except the ``h2mg`` and ``g2mg`` batteries). Features with variance of 0 were +from the [Benchmark Suite for Clustering Algorithms (Version 1.0)](https://clustering-benchmarks.gagolewski.com) +{cite}`clustering-benchmarks` +(except the `h2mg` and `g2mg` batteries). Features with variance of 0 were removed, datasets were centred at **0** and scaled so that they have total variance of 1. Tiny bit of Gaussian noise was added to each observation. Clustering is performed with respect to the Euclidean distance. @@ -21,9 +19,7 @@ Clustering is performed with respect to the Euclidean distance. - - -<>= +```{python bench-approx-imports,results="hide",echo=FALSE} import numpy as np import pandas as pd import matplotlib.pyplot as plt @@ -50,11 +46,11 @@ res = pd.read_csv("v1-timings.csv") # see timings.py dims = pd.read_csv("v1-dims.csv") dims["dataset"] = dims["battery"]+"/"+dims["dataset"] dims = dims.loc[:,"dataset":] -@ +``` -<>= +```{python approx-diffs-load,results="hide",echo=FALSE} # Load results file: res = pd.read_csv("v1-scores-approx.csv") # ari, afm can be negative --> replace negative indexes with 0.0 @@ -80,20 +76,20 @@ params.columns = ["method", "gini_threshold", "run"] res_max = pd.concat((res_max.drop("method", axis=1), params), axis=1) res_max["dataset"] = res_max["battery"] + "/" + res_max["dataset"] res_max = res_max.iloc[:, 1:] -@ +``` On each benchmark dataset ("small" and "large" altogether) -we have fired 10 runs of the approximate Genie method (``exact=False``) +we have fired 10 runs of the approximate Genie method (`exact=False`) and computed the adjusted Rand (AR) indices to quantify the similarity between the predicted outputs and the reference ones. We've computed the differences between each of the 10 AR indices and the AR index for the exact method. Here is the complete list of datasets -and `gini_threshold`\ s where this discrepancy is seen at least 2 digits of precision: +and `gini_threshold`s where this discrepancy is seen at least 2 digits of precision: -<>= +```{python approx-diffs,results="asis",echo=FALSE} # which similarity measure to report below: similarity_measure = "ar" @@ -106,35 +102,35 @@ _dat = diffs_stats.loc[(np.abs(diffs_stats["min"])>=0.0095)|(np.abs(diffs_stats[ #_dat = _dat.drop("count", axis=1) which_repeated = (_dat.dataset.shift(1) == _dat.dataset) _dat.loc[which_repeated, "dataset"] = "" -print(tabulate(_dat, _dat.columns, tablefmt="rst", showindex=False), "\n\n") -@ +print(tabulate(_dat, _dat.columns, tablefmt="github", showindex=False), "\n\n") +``` -The only noteworthy difference is for the ``sipu/birch2`` dataset +The only noteworthy difference is for the `sipu/birch2` dataset where we observe that the approximate method generates worse results (although recall that `gini_threshold` of 1 corresponds to the single linkage method). -Interestingly, for ``sipu/worms_64``, the in-exact algorithm with `gini_threshold` +Interestingly, for `sipu/worms_64`, the in-exact algorithm with `gini_threshold` of 0.5 yields a much better outcome than the original one. Here are the descriptive statistics for the AR indices across all the datasets (for the approximate method we chose the median AR in each of the 10 runs): -<>= +```{python approx-ar,results="asis",echo=FALSE} _dat = res_max.groupby(["dataset", "method"])[similarity_measure].\ median().reset_index().groupby(["method"]).describe().\ round(3).reset_index() _dat.columns = [l0 if not l1 else l1 for l0, l1 in _dat.columns] -_dat.method +#_dat.method #which_repeated = (_dat.gini_threshold.shift(1) == _dat.gini_threshold) #_dat.loc[which_repeated, "gini_threshold"] = "" #_dat = _dat.drop("count", axis=1) -print(tabulate(_dat, _dat.columns, tablefmt="rst", showindex=False), "\n\n") -@ +print(tabulate(_dat, _dat.columns, tablefmt="github", showindex=False), "\n\n") +``` For the recommended ranges of the `gini_threshold` parameter, i.e., between 0.1 and 0.5, we see that the approximate version of Genie -behaves as good as the original one. +behaves similarly to the original one. diff --git a/.devel/sphinx/weave/benchmarks_approx.md b/.devel/sphinx/weave/benchmarks_approx.md new file mode 100644 index 00000000..308005eb --- /dev/null +++ b/.devel/sphinx/weave/benchmarks_approx.md @@ -0,0 +1,83 @@ + + + + +# Benchmarks — Approximate Method + +In one of the [previous sections](timings), we have demonstrated that the approximate version +of the Genie algorithm ([`genieclust.Genie(exact=False, ...)`](genieclust.Genie)), i.e., +one which relies on `nmslib`'s {cite}`nmslib` approximate nearest neighbour search, +is much faster than the exact one on large, high-dimensional datasets. +In particular, we have noted that clustering of 1 million points +in a 100d Euclidean space takes less than 5 minutes on a laptop. + +As *fast* does not necessarily mean *meaningful* (tl;dr spoiler alert: in our case, it does), +let's again consider all the datasets +from the [Benchmark Suite for Clustering Algorithms (Version 1.0)](https://clustering-benchmarks.gagolewski.com) +{cite}`clustering-benchmarks` +(except the `h2mg` and `g2mg` batteries). Features with variance of 0 were +removed, datasets were centred at **0** and scaled so that they have total +variance of 1. Tiny bit of Gaussian noise was added to each observation. +Clustering is performed with respect to the Euclidean distance. + + + + + + + + + + + + +On each benchmark dataset ("small" and "large" altogether) +we have fired 10 runs of the approximate Genie method (`exact=False`) +and computed the adjusted Rand (AR) indices to quantify the similarity between the predicted +outputs and the reference ones. + +We've computed the differences between each of the 10 AR indices +and the AR index for the exact method. Here is the complete list of datasets +and `gini_threshold`s where this discrepancy is seen at least 2 digits of precision: + +| dataset | gini_threshold | count | mean | std | min | 25% | 50% | 75% | max | +|------------------|------------------|---------|--------|-------|-------|-------|-------|-------|-------| +| sipu/birch2 | 0.7 | 10 | -0.01 | 0.01 | -0.02 | -0.02 | -0.01 | -0.01 | 0 | +| | 1 | 10 | -0.35 | 0.18 | -0.44 | -0.44 | -0.43 | -0.43 | 0 | +| sipu/worms_64 | 0.1 | 10 | -0.03 | 0.01 | -0.06 | -0.03 | -0.02 | -0.02 | -0.02 | +| | 0.3 | 10 | 0.02 | 0.01 | -0.01 | 0.02 | 0.03 | 0.03 | 0.03 | +| | 0.5 | 10 | 0.23 | 0.08 | 0.11 | 0.16 | 0.25 | 0.29 | 0.34 | +| wut/trajectories | 0.1 | 10 | -0 | 0.02 | -0.05 | 0 | 0 | 0 | 0 | +| | 0.3 | 10 | -0 | 0.02 | -0.05 | 0 | 0 | 0 | 0 | +| | 0.5 | 10 | -0 | 0.02 | -0.05 | 0 | 0 | 0 | 0 | +| | 0.7 | 10 | -0 | 0.02 | -0.05 | 0 | 0 | 0 | 0 | +| | 1 | 10 | -0.1 | 0.32 | -1 | 0 | 0 | 0 | 0 | + + +The only noteworthy difference is for the `sipu/birch2` dataset +where we observe that the approximate method generates worse results +(although recall that `gini_threshold` of 1 corresponds to the single linkage method). +Interestingly, for `sipu/worms_64`, the in-exact algorithm with `gini_threshold` +of 0.5 yields a much better outcome than the original one. + + +Here are the descriptive statistics for the AR indices across all the datasets +(for the approximate method we chose the median AR in each of the 10 runs): + +| method | count | mean | std | min | 25% | 50% | 75% | max | +|------------------|---------|--------|-------|-------|-------|-------|-------|-------| +| Genie_0.1 | 79 | 0.728 | 0.307 | 0 | 0.516 | 0.844 | 1 | 1 | +| Genie_0.1_approx | 79 | 0.728 | 0.307 | 0 | 0.516 | 0.844 | 1 | 1 | +| Genie_0.3 | 79 | 0.755 | 0.292 | 0 | 0.555 | 0.9 | 1 | 1 | +| Genie_0.3_approx | 79 | 0.755 | 0.292 | 0 | 0.568 | 0.9 | 1 | 1 | +| Genie_0.5 | 79 | 0.731 | 0.332 | 0 | 0.531 | 0.844 | 1 | 1 | +| Genie_0.5_approx | 79 | 0.734 | 0.326 | 0 | 0.531 | 0.844 | 1 | 1 | +| Genie_0.7 | 79 | 0.624 | 0.376 | 0 | 0.264 | 0.719 | 1 | 1 | +| Genie_0.7_approx | 79 | 0.624 | 0.376 | 0 | 0.264 | 0.719 | 1 | 1 | +| Genie_1.0 | 79 | 0.415 | 0.447 | 0 | 0 | 0.174 | 1 | 1 | +| Genie_1.0_approx | 79 | 0.409 | 0.45 | 0 | 0 | 0.148 | 1 | 1 | + + +For the recommended ranges of the `gini_threshold` parameter, +i.e., between 0.1 and 0.5, we see that the approximate version of Genie +behaves similarly to the original one. diff --git a/.devel/sphinx/weave/benchmarks_approx.rst b/.devel/sphinx/weave/benchmarks_approx.rst deleted file mode 100644 index 90c34bd7..00000000 --- a/.devel/sphinx/weave/benchmarks_approx.rst +++ /dev/null @@ -1,99 +0,0 @@ -Benchmarks — Approximate Method -=============================== - -In one of the :any:`previous sections ` we have demonstrated that the approximate version -of the Genie algorithm (:class:`genieclust.Genie(exact=False, ...) `), i.e., -one which relies on `nmslib `_\ 's -approximate nearest neighbour search, is much faster than the exact one -on large, high-dimensional datasets. In particular, we have noted that -clustering of 1 million points in a 100d Euclidean space -takes less than 5 minutes on a laptop. - -As *fast* does not necessarily mean *meaningful* (tl;dr spoiler alert: in our case, it does), -let's again consider all the datasets -from the `Benchmark Suite for Clustering Algorithms — Version 1 `_ -:cite:`clustering_benchmarks_v1` -(except the ``h2mg`` and ``g2mg`` batteries). Features with variance of 0 were -removed, datasets were centred at **0** and scaled so that they have total -variance of 1. Tiny bit of Gaussian noise was added to each observation. -Clustering is performed with respect to the Euclidean distance. - - - - - - - - - - - - - - - - -On each benchmark dataset ("small" and "large" altogether) -we have fired 10 runs of the approximate Genie method (``exact=False``) -and computed the adjusted Rand (AR) indices to quantify the similarity between the predicted -outputs and the reference ones. - -We've computed the differences between each of the 10 AR indices -and the AR index for the exact method. Here is the complete list of datasets -and `gini_threshold`\ s where this discrepancy is seen at least 2 digits of precision: - - - -================ ================ ======= ====== ===== ===== ===== ===== ===== ===== -dataset gini_threshold count mean std min 25% 50% 75% max -================ ================ ======= ====== ===== ===== ===== ===== ===== ===== -sipu/birch2 0.7 10 -0.01 0.01 -0.02 -0.02 -0.01 -0.01 0 -.. 1 10 -0.35 0.18 -0.44 -0.44 -0.43 -0.43 0 -sipu/worms_64 0.1 10 -0.03 0.01 -0.06 -0.03 -0.02 -0.02 -0.02 -.. 0.3 10 0.02 0.01 -0.01 0.02 0.03 0.03 0.03 -.. 0.5 10 0.23 0.08 0.11 0.16 0.25 0.29 0.34 -wut/trajectories 0.1 10 -0 0.02 -0.05 0 0 0 0 -.. 0.3 10 -0 0.02 -0.05 0 0 0 0 -.. 0.5 10 -0 0.02 -0.05 0 0 0 0 -.. 0.7 10 -0 0.02 -0.05 0 0 0 0 -.. 1 10 -0.1 0.32 -1 0 0 0 0 -================ ================ ======= ====== ===== ===== ===== ===== ===== ===== - - - - - -The only noteworthy difference is for the ``sipu/birch2`` dataset -where we observe that the approximate method generates worse results -(although recall that `gini_threshold` of 1 corresponds to the single linkage method). -Interestingly, for ``sipu/worms_64``, the in-exact algorithm with `gini_threshold` -of 0.5 yields a much better outcome than the original one. - - -Here are the descriptive statistics for the AR indices across all the datasets -(for the approximate method we chose the median AR in each of the 10 runs): - - - -================ ======= ====== ===== ===== ===== ===== ===== ===== -method count mean std min 25% 50% 75% max -================ ======= ====== ===== ===== ===== ===== ===== ===== -Genie_0.1 79 0.728 0.307 0 0.516 0.844 1 1 -Genie_0.1_approx 79 0.728 0.307 0 0.516 0.844 1 1 -Genie_0.3 79 0.755 0.292 0 0.555 0.9 1 1 -Genie_0.3_approx 79 0.755 0.292 0 0.568 0.9 1 1 -Genie_0.5 79 0.731 0.332 0 0.531 0.844 1 1 -Genie_0.5_approx 79 0.734 0.326 0 0.531 0.844 1 1 -Genie_0.7 79 0.624 0.376 0 0.264 0.719 1 1 -Genie_0.7_approx 79 0.624 0.376 0 0.264 0.719 1 1 -Genie_1.0 79 0.415 0.447 0 0 0.174 1 1 -Genie_1.0_approx 79 0.409 0.45 0 0 0.148 1 1 -================ ======= ====== ===== ===== ===== ===== ===== ===== - - - - - -For the recommended ranges of the `gini_threshold` parameter, -i.e., between 0.1 and 0.5, we see that the approximate version of Genie -behaves as good as the original one. diff --git a/.devel/sphinx/weave/benchmarks_ar-figures/plot_large-3.pdf b/.devel/sphinx/weave/benchmarks_ar-figures/plot_large-3.pdf new file mode 100644 index 00000000..f2ed39f3 Binary files /dev/null and b/.devel/sphinx/weave/benchmarks_ar-figures/plot_large-3.pdf differ diff --git a/.devel/sphinx/weave/benchmarks_ar-figures/plot_large-3.png b/.devel/sphinx/weave/benchmarks_ar-figures/plot_large-3.png new file mode 100644 index 00000000..0f4cbbbe Binary files /dev/null and b/.devel/sphinx/weave/benchmarks_ar-figures/plot_large-3.png differ diff --git a/.devel/sphinx/weave/benchmarks_ar-figures/plot_small-1.pdf b/.devel/sphinx/weave/benchmarks_ar-figures/plot_small-1.pdf new file mode 100644 index 00000000..4fec4c97 Binary files /dev/null and b/.devel/sphinx/weave/benchmarks_ar-figures/plot_small-1.pdf differ diff --git a/.devel/sphinx/weave/benchmarks_ar-figures/plot_small-1.png b/.devel/sphinx/weave/benchmarks_ar-figures/plot_small-1.png new file mode 100644 index 00000000..7567df98 Binary files /dev/null and b/.devel/sphinx/weave/benchmarks_ar-figures/plot_small-1.png differ diff --git a/.devel/sphinx/weave/benchmarks_ar.rstw b/.devel/sphinx/weave/benchmarks_ar.Rmd similarity index 79% rename from .devel/sphinx/weave/benchmarks_ar.rstw rename to .devel/sphinx/weave/benchmarks_ar.Rmd index bacf6748..e55b0511 100644 --- a/.devel/sphinx/weave/benchmarks_ar.rstw +++ b/.devel/sphinx/weave/benchmarks_ar.Rmd @@ -1,39 +1,38 @@ -Benchmarks (How Good Is It?) -============================ +# Benchmarks (How Good Is It?) In this section we evaluate the usefulness of different clustering algorithms. -We use our `framework for benchmarking clustering algorithms (benchmark suite version 1.0.1) `_ -:cite:`clustering-benchmarks` which aggregates datasets from various sources, -including, but not limited to :cite:`uci,kmsix,fcps,graves,chameleon,xnn`. +We use our [framework for benchmarking clustering algorithms (benchmark suite version 1.0.1)](https://github.com/gagolews/clustering-benchmarks) +{cite}`clustering-benchmarks` which aggregates datasets from various sources, +including, but not limited to {cite}`uci,kmsix,fcps,graves,chameleon,xnn`. Ground-truth/reference label vectors are provided alongside each dataset. They define the desired number of clusters. Hence, we only study the algorithms that allow for setting of `n_clusters` explicitly. We will apply a few agglomerative hierarchical methods (average, centroid, complete, single, and Ward linkage; implemented in the -`fastcluster `_ package :cite:`fastclusterpkg`), +[fastcluster](http://www.danifold.net/fastcluster.html) package {cite}`fastclusterpkg`), k-means, expectation-maximisation (EM) for Gaussian mixtures, Birch, spectral -(implemented in `scikit-learn `_ :cite:`sklearn`), -`ITM `_ :cite:`itm`, -and Genie :cite:`genieins`. +(implemented in [scikit-learn](https://scikit-learn.org/) {cite}`sklearn`), +[ITM](https://github.com/amueller/information-theoretic-mst) {cite}`itm`, +and Genie {cite}`genieins`. -The adjusted Rand index (see :cite:`comparing_partitions`) will be used +The adjusted Rand index (see {cite}`comparing_partitions`) will be used to quantify the agreement between -a reference and a predicted clustering on the scale [0,1], +a reference and a predicted clustering on the scale $[0,1]$, with score of 1.0 denoting perfect agreement. However, as there might be multiple equally valid/plausible/useful partitions (see also -:cite:`sdmc` and :cite:`clustering_benchmarks_v1` for discussion), +{cite}`sdmc` and {cite}`clustering-benchmarks` for discussion), the outputs generated by a single algorithm is evaluated against all the available reference labellings and the maximal similarity score is reported. For more detailed results based on other partition similarity scores, -see the :any:`Appendix `\ . +see the [Appendix](benchmarks_details). -<>= +```{python imports,results="hide",echo=FALSE} import numpy as np import pandas as pd import matplotlib.pyplot as plt @@ -129,7 +128,7 @@ def show_ranks(res_max): r = lambda x: scipy.stats.rankdata(-x, method="min") _dat = res_max.set_index(["dataset", "method"])[similarity_measure].unstack().\ round(2).T.apply(r).T.describe().T.round(1) - print(tabulate(_dat, _dat.columns, tablefmt="rst"), "\n\n") + print(tabulate(_dat, _dat.columns, tablefmt="github"), "\n\n") def do_plot(res_max): @@ -142,17 +141,16 @@ def do_plot(res_max): showmeans=True, meanprops=dict(markeredgecolor="k", marker="x")) plt.show() -@ +``` -Small Datasets --------------- +## Small Datasets As some of the algorithms tested here have failed to generate a solution within reasonable time limits (e.g., spectral clustering), in this part we restrict ourselves to the datasets with up to 10,000 observations. As suggested in the benchmark suite's description, we omit the over-populous -"parametric" Gaussian-distributed batteries ``h2mg`` and ``g2mg``. +"parametric" Gaussian-distributed batteries `h2mg` and `g2mg`. Here are the boxplots of the empirical distributions of the adjusted Rand index. We report the results for Birch and spectral clustering with parameters @@ -165,7 +163,7 @@ and `gamma in [0.25, 0.5, 1.0, 2.5, 5.0]`). Moreover, Gaussian mixtures used `n_init=100`. -<>= +```{python prepare_small,results="hide",echo=FALSE} # We suggested that "parametric" datasets g2mg, h2mg should be studied separately. # Subset: not g2mg, not h2mg res2 = res.loc[~res.battery.isin(["g2mg", "h2mg"]), :] @@ -183,21 +181,21 @@ res2["dataset"] = res2["battery"] + "/" + res2["dataset"] res_max = res2.groupby(["dataset", "method"]).max().\ reset_index().drop(["labels"], axis=1) #res_max.head() -@ +``` -<>= +```{python plot_small,echo=FALSE,results="hide",warn=FALSE,fig.cap="Distribution of the AR index for each algorithm (small datasets); best=1.0.",fig.height=5.9375} do_plot(res_max) -@ +``` The Genie algorithm with `gini_threshold` of 0.3 gives the highest average and median AR index and, at the same time, is subject to the least variability. The (parametric!) EM algorithm fitting mixtures of Gaussians and the (perhaps lesser-known) -information-theoretic `ITM `_ -:cite:`itm` method (which is also based on a minimum spanning tree; -compare :cite:`clustering-msts`) +information-theoretic [ITM](https://github.com/amueller/information-theoretic-mst) +{cite}`itm` method (which is also based on a minimum spanning tree; +compare {cite}`clustering-msts`) tend to output good quality outcomes as well. @@ -207,17 +205,16 @@ Descriptive statistics for the ranks (for each dataset, each algorithm that gets the highest AR index rounded to 2 decimal digits, gets a rank of 1); lower ranks are better: -<>= +```{python ranks_small,echo=FALSE,results="asis"} show_ranks(res_max) -@ +``` -Large Datasets --------------- +## Large Datasets Below we provide the results for the larger datasets (70,000-105,600 points). -<>= +```{python prepare_large,results="hide",echo=FALSE} # We suggested that "parametric" datasets g2mg, h2mg should be studied separately. # Subset: not g2mg, not h2mg res2 = res.loc[~res.battery.isin(["g2mg", "h2mg"]), :] @@ -235,14 +232,14 @@ res2["dataset"] = res2["battery"] + "/" + res2["dataset"] res_max = res2.groupby(["dataset", "method"]).max().\ reset_index().drop(["labels"], axis=1) #res_max.head() -@ +``` -<>= +```{python plot_large,echo=FALSE,results="hide",warn=FALSE,fig.cap="Distribution of the AR index for each algorithm (large datasets); best=1.0.",fig.height=5.9375} do_plot(res_max) -@ +``` This time, the ITM method and Genie with `gini_threshold` of 0.1 give the highest typical scores. @@ -250,23 +247,20 @@ the highest typical scores. Descriptive statistics for the ranks (AR index): -<>= +```{python ranks_large,echo=FALSE,results="asis"} show_ranks(res_max) -@ +``` -Summary -------- +## Summary -Overall, the Genie algorithm tends to outperform other algorithms considered +Overall, the Genie algorithm often outperforms other algorithms considered in this study, at least on this rich benchmark battery. -In :cite:`genieins`, based on a much smaller sample of reference datasets, -we have recommended `gini_threshold` of 0.3, +In {cite}`genieins`, based on a much smaller sample of reference datasets, +we have recommended using `gini_threshold=0.3`, which is set as the default also in the `genieclust` package. However, sometimes inspecting thresholds equal to 0.1 and 0.5 is worth a try. -interestingly, the algorithm is quite stable in the sense that +Interestingly, the algorithm is quite stable in the sense that small changes of this parameter should not affect the generated clusterings in a significant way. - - diff --git a/.devel/sphinx/weave/benchmarks_ar.md b/.devel/sphinx/weave/benchmarks_ar.md new file mode 100644 index 00000000..a084f506 --- /dev/null +++ b/.devel/sphinx/weave/benchmarks_ar.md @@ -0,0 +1,144 @@ + + + + +# Benchmarks (How Good Is It?) + +In this section we evaluate the usefulness of different clustering algorithms. +We use our [framework for benchmarking clustering algorithms (benchmark suite version 1.0.1)](https://github.com/gagolews/clustering-benchmarks) +{cite}`clustering-benchmarks` which aggregates datasets from various sources, +including, but not limited to {cite}`uci,kmsix,fcps,graves,chameleon,xnn`. +Ground-truth/reference label vectors are provided alongside each dataset. +They define the desired number of clusters. Hence, we only study +the algorithms that allow for setting of `n_clusters` explicitly. + +We will apply a few agglomerative hierarchical +methods (average, centroid, complete, single, and Ward linkage; implemented in the +[fastcluster](http://www.danifold.net/fastcluster.html) package {cite}`fastclusterpkg`), +k-means, expectation-maximisation (EM) for Gaussian mixtures, Birch, spectral +(implemented in [scikit-learn](https://scikit-learn.org/) {cite}`sklearn`), +[ITM](https://github.com/amueller/information-theoretic-mst) {cite}`itm`, +and Genie {cite}`genieins`. + +The adjusted Rand index (see {cite}`comparing_partitions`) will be used +to quantify the agreement between +a reference and a predicted clustering on the scale $[0,1]$, +with score of 1.0 denoting perfect agreement. However, as there might be +multiple equally valid/plausible/useful partitions (see also +{cite}`sdmc` and {cite}`clustering-benchmarks` for discussion), +the outputs generated by a single algorithm is evaluated +against all the available reference labellings and the maximal similarity score +is reported. + +For more detailed results based on other partition similarity scores, +see the [Appendix](benchmarks_details). + + + + + + + +## Small Datasets + +As some of the algorithms tested here have failed to generate a solution +within reasonable time limits (e.g., spectral clustering), +in this part we restrict ourselves to the datasets with up to 10,000 observations. +As suggested in the benchmark suite's description, we omit the over-populous +"parametric" Gaussian-distributed batteries `h2mg` and `g2mg`. + +Here are the boxplots of the empirical distributions of the adjusted Rand index. +We report the results for Birch and spectral clustering with parameters +that lead to the highest average AR score +(the former was tested on a parameter grid of +`branching_factor in [10, 50, 100]` +and `threshold in [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0]` +and the latter on `affinity in ["rbf", "laplacian", "poly", "sigmoid"]` +and `gamma in [0.25, 0.5, 1.0, 2.5, 5.0]`). +Moreover, Gaussian mixtures used `n_init=100`. + + + + + + + +(fig:plot_small)= +```{figure} benchmarks_ar-figures/plot_small-1.* +Distribution of the AR index for each algorithm (small datasets); best=1.0. +``` + +The Genie algorithm with `gini_threshold` of 0.3 gives the highest average +and median AR index and, at the same time, is subject to the least variability. +The (parametric!) EM algorithm fitting mixtures of Gaussians and the (perhaps lesser-known) +information-theoretic [ITM](https://github.com/amueller/information-theoretic-mst) +{cite}`itm` method (which is also based on a minimum spanning tree; +compare {cite}`clustering-msts`) +tend to output good quality outcomes as well. + + + + +Descriptive statistics for the ranks (for each dataset, +each algorithm that gets the highest AR index rounded to 2 decimal digits, +gets a rank of 1); lower ranks are better: + +| | count | mean | std | min | 25% | 50% | 75% | max | +|-------------------|---------|--------|-------|-------|-------|-------|-------|-------| +| Average linkage | 72 | 6.6 | 3.5 | 1 | 4.8 | 7 | 9.2 | 12 | +| Birch_0.01 | 72 | 5.8 | 2.9 | 1 | 4 | 6 | 8 | 12 | +| Complete linkage | 72 | 7.7 | 3.2 | 1 | 6 | 8 | 11 | 12 | +| Gaussian mixtures | 72 | 4.2 | 3.6 | 1 | 1 | 3 | 7 | 12 | +| Genie_0.1 | 72 | 3.8 | 3.3 | 1 | 1 | 3 | 6 | 12 | +| Genie_0.3 | 72 | 3.3 | 3 | 1 | 1 | 2 | 5 | 11 | +| Genie_0.5 | 72 | 4.2 | 3.9 | 1 | 1 | 2 | 8 | 11 | +| ITM | 72 | 5.4 | 3.9 | 1 | 1 | 5 | 9 | 12 | +| K-means | 72 | 5.6 | 3.8 | 1 | 1 | 6 | 9 | 12 | +| Single linkage | 72 | 7.4 | 5.1 | 1 | 1 | 11 | 12 | 12 | +| Spectral_RBF_5 | 72 | 5.2 | 3.5 | 1 | 1 | 6 | 8 | 11 | +| Ward linkage | 72 | 6 | 3 | 1 | 4 | 6 | 8 | 12 | + + +## Large Datasets + +Below we provide the results for the larger datasets (70,000-105,600 points). + + + + + + +(fig:plot_large)= +```{figure} benchmarks_ar-figures/plot_large-3.* +Distribution of the AR index for each algorithm (large datasets); best=1.0. +``` + +This time, the ITM method and Genie with `gini_threshold` of 0.1 give +the highest typical scores. + + +Descriptive statistics for the ranks (AR index): + +| | count | mean | std | min | 25% | 50% | 75% | max | +|----------------|---------|--------|-------|-------|-------|-------|-------|-------| +| Genie_0.1 | 6 | 1.8 | 1.2 | 1 | 1 | 1.5 | 2 | 4 | +| Genie_0.3 | 6 | 3.2 | 1.7 | 1 | 2 | 3 | 4.8 | 5 | +| Genie_0.5 | 6 | 4.8 | 1.9 | 1 | 5 | 5.5 | 6 | 6 | +| ITM | 6 | 3.3 | 2.3 | 1 | 1.5 | 3 | 5.2 | 6 | +| K-means | 6 | 3.3 | 1.6 | 1 | 2.2 | 3.5 | 4.8 | 5 | +| Single linkage | 6 | 6.8 | 0.4 | 6 | 7 | 7 | 7 | 7 | +| Ward linkage | 6 | 3.2 | 1.5 | 1 | 2.2 | 3.5 | 4 | 5 | + + + +## Summary + +Overall, the Genie algorithm often outperforms other algorithms considered +in this study, at least on this rich benchmark battery. +In {cite}`genieins`, based on a much smaller sample of reference datasets, +we have recommended using `gini_threshold=0.3`, +which is set as the default also in the `genieclust` package. +However, sometimes inspecting thresholds equal to 0.1 and 0.5 is worth a try. +Interestingly, the algorithm is quite stable in the sense that +small changes of this parameter should not affect the generated clusterings +in a significant way. diff --git a/.devel/sphinx/weave/benchmarks_ar.rst b/.devel/sphinx/weave/benchmarks_ar.rst deleted file mode 100644 index 3baab3d2..00000000 --- a/.devel/sphinx/weave/benchmarks_ar.rst +++ /dev/null @@ -1,195 +0,0 @@ -Benchmarks (How Good Is It?) -============================ - -In this section we evaluate the usefulness of different clustering algorithms. -We use our `framework for benchmarking clustering algorithms (benchmark suite version 1.0.1) `_ -:cite:`clustering-benchmarks` which aggregates datasets from various sources, -including, but not limited to :cite:`uci,kmsix,fcps,graves,chameleon,xnn`. -Ground-truth/reference label vectors are provided alongside each dataset. -They define the desired number of clusters. Hence, we only study -the algorithms that allow for setting of `n_clusters` explicitly. - -We will apply a few agglomerative hierarchical -methods (average, centroid, complete, single, and Ward linkage; implemented in the -`fastcluster `_ package :cite:`fastclusterpkg`), -k-means, expectation-maximisation (EM) for Gaussian mixtures, Birch, spectral -(implemented in `scikit-learn `_ :cite:`sklearn`), -`ITM `_ :cite:`itm`, -and Genie :cite:`genieins`. - -The adjusted Rand index (see :cite:`comparing_partitions`) will be used -to quantify the agreement between -a reference and a predicted clustering on the scale [0,1], -with score of 1.0 denoting perfect agreement. However, as there might be -multiple equally valid/plausible/useful partitions (see also -:cite:`sdmc` and :cite:`clustering_benchmarks_v1` for discussion), -the outputs generated by a single algorithm is evaluated -against all the available reference labellings and the maximal similarity score -is reported. - -For more detailed results based on other partition similarity scores, -see the :any:`Appendix `\ . - - - - - - - - -Small Datasets --------------- - -As some of the algorithms tested here have failed to generate a solution -within reasonable time limits (e.g., spectral clustering), -in this part we restrict ourselves to the datasets with up to 10,000 observations. -As suggested in the benchmark suite's description, we omit the over-populous -"parametric" Gaussian-distributed batteries ``h2mg`` and ``g2mg``. - -Here are the boxplots of the empirical distributions of the adjusted Rand index. -We report the results for Birch and spectral clustering with parameters -that lead to the highest average AR score -(the former was tested on a parameter grid of -`branching_factor in [10, 50, 100]` -and `threshold in [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0]` -and the latter on `affinity in ["rbf", "laplacian", "poly", "sigmoid"]` -and `gamma in [0.25, 0.5, 1.0, 2.5, 5.0]`). -Moreover, Gaussian mixtures used `n_init=100`. - - - - - - - - - -:: - - ## /home/gagolews/.virtualenvs/python3-default/lib/python3.11/site- - ## packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype - ## is deprecated and will be removed in a future version. Use - ## isinstance(dtype, CategoricalDtype) instead - ## if pd.api.types.is_categorical_dtype(vector): - ## /home/gagolews/.virtualenvs/python3-default/lib/python3.11/site- - ## packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype - ## is deprecated and will be removed in a future version. Use - ## isinstance(dtype, CategoricalDtype) instead - ## if pd.api.types.is_categorical_dtype(vector): - -.. figure:: figures/benchmarks_ar_plot_small_1.png - :width: 15 cm - - Distribution of the AR index for each algorithm (small datasets); best=1.0. - - - -The Genie algorithm with `gini_threshold` of 0.3 gives the highest average -and median AR index and, at the same time, is subject to the least variability. -The (parametric!) EM algorithm fitting mixtures of Gaussians and the (perhaps lesser-known) -information-theoretic `ITM `_ -:cite:`itm` method (which is also based on a minimum spanning tree; -compare :cite:`clustering-msts`) -tend to output good quality outcomes as well. - - - - -Descriptive statistics for the ranks (for each dataset, -each algorithm that gets the highest AR index rounded to 2 decimal digits, -gets a rank of 1); lower ranks are better: - - - -================= ======= ====== ===== ===== ===== ===== ===== ===== -.. count mean std min 25% 50% 75% max -================= ======= ====== ===== ===== ===== ===== ===== ===== -Average linkage 72 6.6 3.5 1 4.8 7 9.2 12 -Birch_0.01 72 5.8 2.9 1 4 6 8 12 -Complete linkage 72 7.7 3.2 1 6 8 11 12 -Gaussian mixtures 72 4.2 3.6 1 1 3 7 12 -Genie_0.1 72 3.8 3.3 1 1 3 6 12 -Genie_0.3 72 3.3 3 1 1 2 5 11 -Genie_0.5 72 4.2 3.9 1 1 2 8 11 -ITM 72 5.4 3.9 1 1 5 9 12 -K-means 72 5.6 3.8 1 1 6 9 12 -Single linkage 72 7.4 5.1 1 1 11 12 12 -Spectral_RBF_5 72 5.2 3.5 1 1 6 8 11 -Ward linkage 72 6 3 1 4 6 8 12 -================= ======= ====== ===== ===== ===== ===== ===== ===== - - - - - -Large Datasets --------------- - -Below we provide the results for the larger datasets (70,000-105,600 points). - - - - - - - - -:: - - ## /home/gagolews/.virtualenvs/python3-default/lib/python3.11/site- - ## packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype - ## is deprecated and will be removed in a future version. Use - ## isinstance(dtype, CategoricalDtype) instead - ## if pd.api.types.is_categorical_dtype(vector): - ## /home/gagolews/.virtualenvs/python3-default/lib/python3.11/site- - ## packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype - ## is deprecated and will be removed in a future version. Use - ## isinstance(dtype, CategoricalDtype) instead - ## if pd.api.types.is_categorical_dtype(vector): - -.. figure:: figures/benchmarks_ar_plot_large_1.png - :width: 15 cm - - Distribution of the AR index for each algorithm (large datasets); best=1.0. - - - -This time, the ITM method and Genie with `gini_threshold` of 0.1 give -the highest typical scores. - - -Descriptive statistics for the ranks (AR index): - - - -============== ======= ====== ===== ===== ===== ===== ===== ===== -.. count mean std min 25% 50% 75% max -============== ======= ====== ===== ===== ===== ===== ===== ===== -Genie_0.1 6 1.8 1.2 1 1 1.5 2 4 -Genie_0.3 6 3.2 1.7 1 2 3 4.8 5 -Genie_0.5 6 4.8 1.9 1 5 5.5 6 6 -ITM 6 3.3 2.3 1 1.5 3 5.2 6 -K-means 6 3.3 1.6 1 2.2 3.5 4.8 5 -Single linkage 6 6.8 0.4 6 7 7 7 7 -Ward linkage 6 3.2 1.5 1 2.2 3.5 4 5 -============== ======= ====== ===== ===== ===== ===== ===== ===== - - - - - - -Summary -------- - -Overall, the Genie algorithm tends to outperform other algorithms considered -in this study, at least on this rich benchmark battery. -In :cite:`genieins`, based on a much smaller sample of reference datasets, -we have recommended `gini_threshold` of 0.3, -which is set as the default also in the `genieclust` package. -However, sometimes inspecting thresholds equal to 0.1 and 0.5 is worth a try. -interestingly, the algorithm is quite stable in the sense that -small changes of this parameter should not affect the generated clusterings -in a significant way. - - diff --git a/.devel/sphinx/weave/benchmarks_details-figures/indices_large-3.pdf b/.devel/sphinx/weave/benchmarks_details-figures/indices_large-3.pdf new file mode 100644 index 00000000..861ff3ee Binary files /dev/null and b/.devel/sphinx/weave/benchmarks_details-figures/indices_large-3.pdf differ diff --git a/.devel/sphinx/weave/benchmarks_details-figures/indices_large-3.png b/.devel/sphinx/weave/benchmarks_details-figures/indices_large-3.png new file mode 100644 index 00000000..903fd432 Binary files /dev/null and b/.devel/sphinx/weave/benchmarks_details-figures/indices_large-3.png differ diff --git a/.devel/sphinx/weave/benchmarks_details-figures/indices_small-1.pdf b/.devel/sphinx/weave/benchmarks_details-figures/indices_small-1.pdf new file mode 100644 index 00000000..00a576f8 Binary files /dev/null and b/.devel/sphinx/weave/benchmarks_details-figures/indices_small-1.pdf differ diff --git a/.devel/sphinx/weave/benchmarks_details-figures/indices_small-1.png b/.devel/sphinx/weave/benchmarks_details-figures/indices_small-1.png new file mode 100644 index 00000000..87f74215 Binary files /dev/null and b/.devel/sphinx/weave/benchmarks_details-figures/indices_small-1.png differ diff --git a/.devel/sphinx/weave/benchmarks_details.rstw b/.devel/sphinx/weave/benchmarks_details.Rmd similarity index 76% rename from .devel/sphinx/weave/benchmarks_details.rstw rename to .devel/sphinx/weave/benchmarks_details.Rmd index 957f6c30..bd45eb88 100644 --- a/.devel/sphinx/weave/benchmarks_details.rstw +++ b/.devel/sphinx/weave/benchmarks_details.Rmd @@ -1,18 +1,17 @@ -Benchmarks — Detailed Results -============================= +# Benchmarks — Detailed Results -In one of the :any:`above sections ` +In one of the [above sections](benchmarks_ar), we have summarised the AR indices based on the datasets from -the `Benchmark Suite for Clustering Algorithms – Version 1 `_ -:cite:`clustering_benchmarks_v1`. -In this section we present more detailed results for +the [Benchmark Suite for Clustering Algorithms (Version 1.0)](https://clustering-benchmarks.gagolewski.com) +{cite}`clustering-benchmarks`. +In this section, we present more detailed results for some other partition similarity measures implemented in the `genieclust` -package — Fowlkes–Mallows :cite:`fm`, adjusted Rand :cite:`comparing_partitions`, -adjusted and normalised mutual information :cite:`nmi`, +package: Fowlkes–Mallows {cite}`fm`, adjusted Rand {cite}`comparing_partitions`, +adjusted and normalised mutual information {cite}`nmi`, normalised pivoted accuracy (which is based on set-matching classification rate), -normalised clustering accuracy :cite:`nca`, -and pair sets index :cite:`psi`, -see the API documentation of :mod:`genieclust.compare_partitions` for more details. +normalised clustering accuracy {cite}`nca`, +and pair sets index {cite}`psi`; for more details, +see the API documentation of [genieclust.compare_partitions](genieclust.compare_partitions). In each case, a score of 1.0 denotes perfect agreement between the clustering results and the reference partitions. @@ -22,9 +21,9 @@ variance of 1. Then, a tiny bit of Gaussian noise has been added to each item. Clustering is performed with respect to the Euclidean distance (wherever applicable). -All raw results can be found `here `_. +All raw results can be found [here](https://github.com/gagolews/clustering_results_v1/). -<>= +```{python imports,results="hide",echo=FALSE} import numpy as np import pandas as pd import matplotlib.pyplot as plt @@ -82,32 +81,30 @@ res["method"] = res["method"].map({ def det(res_max): for similarity_measure in ["nca", "ar", "fm", "ami", "nmi", "npa", "psi"]: - print("%s\n%s\n" % (similarity_measure, "^"*len(similarity_measure))) + print("### %s\n\n" % (similarity_measure,)) print("Summary statistics for ``%s`` (best=1.0):\n\n" % similarity_measure) _dat = res_max.set_index(["dataset", "method"])[similarity_measure].unstack().\ describe().T.round(2) - print(tabulate(_dat, _dat.columns, tablefmt="rst"), "\n\n") + print(tabulate(_dat, _dat.columns, tablefmt="github"), "\n\n") print("Ranks for ``%s`` (best=1):\n\n" % similarity_measure) r = lambda x: scipy.stats.rankdata(-x, method="min") _dat = res_max.set_index(["dataset", "method"])[similarity_measure].unstack().\ round(2).T.apply(r).T.describe().T.round(1) - print(tabulate(_dat, _dat.columns, tablefmt="rst"), "\n\n") + print(tabulate(_dat, _dat.columns, tablefmt="github"), "\n\n") print("Raw results for ``%s`` (best=1.0):\n\n" % similarity_measure) _dat = res_max.set_index(["dataset", "method"])[similarity_measure].unstack().round(2) - print(tabulate(_dat, _dat.columns, tablefmt="rst"), "\n\n") -@ + print(tabulate(_dat, _dat.columns, tablefmt="github"), "\n\n") +``` -Small Datasets --------------- +## Small Datasets - -<>= +```{python prepare_small,results="hide",echo=FALSE} # We suggested that "parametric" datasets g2mg, h2mg should be studied separately. # Subset: not g2mg, not h2mg res2 = res.loc[~res.battery.isin(["g2mg", "h2mg"]), :] @@ -125,23 +122,22 @@ res2["dataset"] = res2["battery"] + "/" + res2["dataset"] res_max = res2.groupby(["dataset", "method"]).max().\ reset_index().drop(["labels"], axis=1) #res_max.head() -@ +``` -<>= +```{python det_small,echo=FALSE,results="asis",fig.cap=""} det(res_max) -@ +``` -Summary -^^^^^^^ +### Summary Medians and means of the partition similarity scores (read row-wise, in groups of 2 columns): -<>= +```{python indices_small,echo=FALSE,results="hide",fig.cap="Heat map of median and mean similarity scores"} sns.heatmap(res_max.groupby("method")[["nca", "ar", "fm", "ami", "nmi", "npa", "psi"]].\ aggregate([np.median, np.mean]), annot=True, vmin=0.5, vmax=1.0, fmt=".2f") plt.yticks(rotation=0) @@ -149,15 +145,16 @@ plt.xticks(rotation=45) plt.xlabel('') plt.ylabel('') plt.show() -@ +``` + +## Large Datase -Large Datasets --------------- +### Results -<>= +```{python prepare_large,results="hide",echo=FALSE} # We suggested that "parametric" datasets g2mg, h2mg should be studied separately. # Subset: not g2mg, not h2mg res2 = res.loc[~res.battery.isin(["g2mg", "h2mg"]), :] @@ -175,21 +172,20 @@ res2["dataset"] = res2["battery"] + "/" + res2["dataset"] res_max = res2.groupby(["dataset", "method"]).max().\ reset_index().drop(["labels"], axis=1) #res_max.head() -@ +``` -<>= +```{python det_large,echo=FALSE,results="asis",fig.cap=""} det(res_max) -@ +``` -Summary -^^^^^^^ +### Summary Medians and means of the partition similarity scores: -<>= +```{python indices_large,echo=FALSE,results="hide",fig.cap="Heat map of median and mean similarity scores"} sns.heatmap(res_max.groupby("method")[["nca", "ar", "fm", "ami", "nmi", "npa", "psi"]].\ aggregate([np.median, np.mean]), annot=True, vmin=0.5, vmax=1.0, fmt=".2f") plt.yticks(rotation=0) @@ -197,4 +193,4 @@ plt.xticks(rotation=45) plt.xlabel('') plt.ylabel('') plt.show() -@ +``` diff --git a/.devel/sphinx/weave/benchmarks_details.md b/.devel/sphinx/weave/benchmarks_details.md new file mode 100644 index 00000000..b2d29e19 --- /dev/null +++ b/.devel/sphinx/weave/benchmarks_details.md @@ -0,0 +1,1223 @@ + + + + +# Benchmarks — Detailed Results + +In one of the [above sections](benchmarks_ar), +we have summarised the AR indices based on the datasets from +the [Benchmark Suite for Clustering Algorithms (Version 1.0)](https://clustering-benchmarks.gagolewski.com) +{cite}`clustering-benchmarks`. +In this section, we present more detailed results for +some other partition similarity measures implemented in the `genieclust` +package: Fowlkes–Mallows {cite}`fm`, adjusted Rand {cite}`comparing_partitions`, +adjusted and normalised mutual information {cite}`nmi`, +normalised pivoted accuracy (which is based on set-matching classification rate), +normalised clustering accuracy {cite}`nca`, +and pair sets index {cite}`psi`; for more details, +see the API documentation of [genieclust.compare_partitions](genieclust.compare_partitions). +In each case, a score of 1.0 denotes perfect agreement between the clustering +results and the reference partitions. + +At the preprocessing stage, features with variance of 0 were removed. +Every dataset has been centred at **0** and scaled so that is has total +variance of 1. Then, a tiny bit of Gaussian noise has been added to each +item. Clustering is performed with respect to the Euclidean distance +(wherever applicable). + +All raw results can be found [here](https://github.com/gagolews/clustering_results_v1/). + + + + +## Small Datasets + + + + + + +### nca + + +Summary statistics for ``nca`` (best=1.0): + + +| | count | mean | std | min | 25% | 50% | 75% | max | +|-------------------|---------|--------|-------|-------|-------|-------|-------|-------| +| Average linkage | 73 | 0.6 | 0.34 | 0 | 0.34 | 0.64 | 0.96 | 1 | +| Birch_0.01 | 73 | 0.67 | 0.3 | 0.01 | 0.44 | 0.76 | 0.96 | 1 | +| Complete linkage | 73 | 0.6 | 0.3 | 0.02 | 0.41 | 0.57 | 0.83 | 1 | +| Gaussian mixtures | 73 | 0.72 | 0.32 | 0.01 | 0.53 | 0.85 | 0.99 | 1 | +| Genie_0.1 | 73 | 0.81 | 0.24 | 0 | 0.7 | 0.93 | 1 | 1 | +| Genie_0.3 | 73 | 0.82 | 0.24 | 0 | 0.64 | 0.95 | 1 | 1 | +| Genie_0.5 | 73 | 0.79 | 0.26 | 0.15 | 0.63 | 0.94 | 1 | 1 | +| ITM | 73 | 0.77 | 0.22 | 0.09 | 0.67 | 0.8 | 0.99 | 1 | +| K-means | 73 | 0.67 | 0.3 | 0.01 | 0.46 | 0.7 | 0.97 | 1 | +| Single linkage | 73 | 0.44 | 0.43 | 0 | 0.01 | 0.3 | 1 | 1 | +| Spectral_RBF_5 | 72 | 0.69 | 0.34 | 0 | 0.44 | 0.84 | 1 | 1 | +| Ward linkage | 73 | 0.67 | 0.29 | 0.05 | 0.44 | 0.78 | 0.95 | 1 | + + +Ranks for ``nca`` (best=1): + + +| | count | mean | std | min | 25% | 50% | 75% | max | +|-------------------|---------|--------|-------|-------|-------|-------|-------|-------| +| Average linkage | 72 | 6.7 | 3.7 | 1 | 4 | 8 | 10 | 12 | +| Birch_0.01 | 72 | 5.5 | 3.2 | 1 | 2 | 6 | 8 | 12 | +| Complete linkage | 72 | 7.8 | 2.8 | 1 | 7 | 8 | 10 | 12 | +| Gaussian mixtures | 72 | 4.6 | 3.9 | 1 | 1 | 3.5 | 7.2 | 12 | +| Genie_0.1 | 72 | 3.5 | 3.3 | 1 | 1 | 2 | 4.2 | 11 | +| Genie_0.3 | 72 | 3.8 | 3.5 | 1 | 1 | 2 | 7 | 12 | +| Genie_0.5 | 72 | 4.4 | 4 | 1 | 1 | 2 | 9 | 11 | +| ITM | 72 | 5.1 | 3.7 | 1 | 1 | 5 | 8.2 | 12 | +| K-means | 72 | 5.2 | 3.6 | 1 | 1 | 5.5 | 8 | 12 | +| Single linkage | 72 | 8.1 | 5 | 1 | 1 | 12 | 12 | 12 | +| Spectral_RBF_5 | 72 | 5.4 | 3.8 | 1 | 1 | 6 | 9 | 12 | +| Ward linkage | 72 | 5.8 | 3.4 | 1 | 2.8 | 6 | 8 | 12 | + + +Raw results for ``nca`` (best=1.0): + + +| | Average linkage | Birch_0.01 | Complete linkage | Gaussian mixtures | Genie_0.1 | Genie_0.3 | Genie_0.5 | ITM | K-means | Single linkage | Spectral_RBF_5 | Ward linkage | +|------------------------|-------------------|--------------|--------------------|----------------------|-------------|-------------|-------------|-------|-----------|------------------|------------------|----------------| +| fcps/atom | 0.31 | 0.31 | 0.29 | 0.17 | 1 | 1 | 1 | 1 | 0.43 | 1 | 1 | 0.31 | +| fcps/chainlink | 0.52 | 0.53 | 0.56 | 0.95 | 1 | 1 | 1 | 1 | 0.31 | 1 | 1 | 0.53 | +| fcps/engytime | 0.23 | 0.88 | 0.21 | 0.99 | 0.92 | 0.92 | 0.92 | 0.91 | 0.92 | 0 | 0.96 | 0.86 | +| fcps/hepta | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0.94 | 1 | 1 | 1 | 1 | +| fcps/lsun | 0.61 | 0.66 | 0.66 | 1 | 1 | 1 | 1 | 1 | 0.7 | 1 | 0.86 | 0.64 | +| fcps/target | 0.86 | 0.84 | 0.88 | 0.84 | 1 | 1 | 1 | 1 | 0.84 | 1 | 1 | 0.83 | +| fcps/tetra | 1 | 0.98 | 0.99 | 1 | 1 | 1 | 1 | 1 | 1 | 0.01 | 1 | 0.98 | +| fcps/twodiamonds | 1 | 1 | 0.99 | 1 | 0.99 | 0.99 | 0.99 | 0.99 | 1 | 0 | 1 | 1 | +| fcps/wingnut | 1 | 1 | 1 | 0.93 | 1 | 1 | 1 | 1 | 0.93 | 1 | 0.88 | 0.78 | +| graves/dense | 0.96 | 0.96 | 0.51 | 1 | 0.98 | 0.98 | 0.98 | 1 | 0.9 | 0.03 | 0.96 | 0.96 | +| graves/fuzzyx | 0.86 | 0.95 | 0.97 | 1 | 0.84 | 0.72 | 0.63 | 0.97 | 1 | 0 | 0.78 | 0.88 | +| graves/line | 0.37 | 0.37 | 0.36 | 1 | 0.5 | 1 | 1 | 0.6 | 0.46 | 1 | 1 | 0.25 | +| graves/parabolic | 0.77 | 0.76 | 0.77 | 0.73 | 0.9 | 0.9 | 0.9 | 0.8 | 0.77 | 0 | 0.81 | 0.79 | +| graves/ring | 0.34 | 0.34 | 0.46 | 0.17 | 1 | 1 | 1 | 1 | 0.01 | 1 | 1 | 0.34 | +| graves/ring_noisy | 0 | 0.34 | 0.52 | 0.16 | 1 | 1 | 1 | 1 | 0.41 | 0 | 1 | 0.39 | +| graves/ring_outliers | 0.58 | 0.33 | 0.56 | 0.31 | 1 | 1 | 1 | 1 | 0.57 | 1 | 1 | 0.34 | +| graves/zigzag | 0.69 | 0.74 | 0.57 | 0.98 | 1 | 1 | 1 | 0.87 | 0.7 | 1 | 0.8 | 0.64 | +| graves/zigzag_noisy | 0.65 | 0.77 | 0.58 | 0.95 | 0.88 | 1 | 1 | 0.75 | 0.64 | 0.5 | 0.78 | 0.79 | +| graves/zigzag_outliers | 0.5 | 0.68 | 0.57 | 0.99 | 1 | 1 | 1 | 0.91 | 0.62 | 0.5 | 0.4 | 0.62 | +| other/chameleon_t4_8k | 0.64 | 0.62 | 0.53 | 0.59 | 0.72 | 1 | 0.8 | 0.78 | 0.61 | 0 | 0.61 | 0.58 | +| other/chameleon_t5_8k | 1 | 1 | 0.8 | 1 | 1 | 1 | 0.8 | 0.69 | 1 | 0 | 1 | 1 | +| other/chameleon_t7_10k | 0.59 | 0.62 | 0.56 | 0.59 | 0.73 | 0.87 | 1 | 0.75 | 0.54 | 0 | 0.55 | 0.64 | +| other/chameleon_t8_8k | 0.45 | 0.49 | 0.48 | 0.59 | 0.67 | 0.48 | 0.73 | 0.62 | 0.52 | 0 | 0.5 | 0.48 | +| other/hdbscan | 0.38 | 0.69 | 0.52 | 0.83 | 0.98 | 0.78 | 0.79 | 0.89 | 0.77 | 0 | 0.45 | 0.9 | +| other/iris | 0.86 | 0.84 | 0.76 | 0.95 | 0.94 | 0.94 | 0.55 | 0.94 | 0.84 | 0.52 | 0.85 | 0.84 | +| other/iris5 | 0.86 | 0.84 | 0.76 | 0.95 | 0.41 | 0.41 | 0.94 | 0.28 | 0.84 | 0.52 | 0.85 | 0.84 | +| other/square | 0.39 | 0.01 | 0.41 | 0.19 | 1 | 1 | 1 | 1 | 0.17 | 1 | 0.41 | 0.5 | +| sipu/a1 | 0.96 | 0.96 | 0.96 | 0.98 | 0.97 | 0.91 | 0.84 | 0.82 | 0.98 | 0.37 | 0.97 | 0.95 | +| sipu/a2 | 0.97 | 0.97 | 0.95 | 0.98 | 0.97 | 0.94 | 0.83 | 0.83 | 0.98 | 0.3 | 0.93 | 0.96 | +| sipu/a3 | 0.97 | 0.97 | 0.96 | 0.96 | 0.98 | 0.95 | 0.84 | 0.83 | 0.96 | 0.25 | 0.95 | 0.97 | +| sipu/aggregation | 1 | 0.78 | 0.75 | 1 | 0.58 | 0.58 | 0.88 | 0.66 | 0.75 | 0.66 | 1 | 0.78 | +| sipu/compound | 0.67 | 0.87 | 0.67 | 0.84 | 0.84 | 0.87 | 0.76 | 0.79 | 0.81 | 0.67 | 0.77 | 0.87 | +| sipu/d31 | 0.94 | 0.96 | 0.96 | 0.97 | 0.97 | 0.93 | 0.76 | 0.85 | 0.98 | 0.24 | 0.97 | 0.96 | +| sipu/flame | 0.74 | 0.59 | 0.21 | 0.68 | 1 | 1 | 1 | 0.68 | 0.74 | 0.02 | 0.91 | 0.59 | +| sipu/jain | 0.79 | 0.81 | 0.79 | 0.43 | 0.49 | 1 | 1 | 0.71 | 0.7 | 0.27 | 0.81 | 0.81 | +| sipu/pathbased | 0.63 | 0.67 | 0.56 | 0.61 | 0.98 | 0.98 | 0.76 | 0.68 | 0.65 | 0.01 | 0.65 | 0.67 | +| sipu/r15 | 1 | 1 | 1 | 1 | 0.99 | 0.99 | 1 | 0.99 | 1 | 1 | 1 | 1 | +| sipu/s1 | 0.99 | 0.99 | 0.98 | 0.99 | 0.99 | 0.99 | 0.99 | 0.8 | 0.99 | 0.43 | 0.99 | 0.99 | +| sipu/s2 | 0.95 | 0.95 | 0.83 | 0.97 | 0.95 | 0.95 | 0.83 | 0.86 | 0.97 | 0 | 0.97 | 0.95 | +| sipu/s3 | 0.66 | 0.8 | 0.57 | 0.85 | 0.82 | 0.76 | 0.63 | 0.76 | 0.84 | 0 | 0.84 | 0.81 | +| sipu/s4 | 0.59 | 0.68 | 0.52 | 0.79 | 0.77 | 0.73 | 0.54 | 0.67 | 0.78 | 0 | 0.69 | 0.68 | +| sipu/spiral | 0.04 | 0.07 | 0.06 | 0.03 | 1 | 1 | 1 | 0.83 | 0.01 | 1 | 0.02 | 0.07 | +| sipu/unbalance | 1 | 1 | 0.8 | 1 | 0.24 | 0.29 | 0.35 | 0.22 | 1 | 0.86 | 1 | 1 | +| uci/ecoli | 0.5 | 0.52 | 0.47 | 0.51 | 0.42 | 0.44 | 0.4 | 0.46 | 0.57 | 0.2 | 0.48 | 0.52 | +| uci/glass | 0.08 | 0.31 | 0.28 | 0.3 | 0.39 | 0.42 | 0.32 | 0.43 | 0.37 | 0.06 | 0.23 | 0.34 | +| uci/ionosphere | 0.01 | 0.44 | 0.02 | 0.65 | 0.41 | 0.41 | 0.15 | 0.36 | 0.44 | 0.01 | 0.01 | 0.44 | +| uci/sonar | 0.05 | 0.05 | 0.04 | 0.09 | 0 | 0 | 0.16 | 0.09 | 0.11 | 0.01 | 0.01 | 0.05 | +| uci/statlog | 0 | 0.45 | 0.17 | 0.53 | 0.74 | 0.59 | 0.51 | 0.6 | 0.43 | 0 | nan | 0.41 | +| uci/wdbc | 0.09 | 0.41 | 0.09 | 0.8 | 0.45 | 0.4 | 0.4 | 0.8 | 0.61 | 0 | 0 | 0.41 | +| uci/wine | 0.34 | 0.53 | 0.48 | 0.92 | 0.58 | 0.58 | 0.22 | 0.57 | 0.54 | 0.04 | 0.36 | 0.53 | +| uci/yeast | 0.19 | 0.37 | 0.3 | 0.32 | 0.34 | 0.33 | 0.28 | 0.33 | 0.38 | 0.14 | 0.16 | 0.35 | +| wut/circles | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | +| wut/cross | 0.03 | 0.17 | 0.32 | 0.48 | 0.87 | 0.68 | 0.38 | 0.44 | 0.39 | 0 | 0.01 | 0.31 | +| wut/graph | 0.56 | 0.58 | 0.56 | 0.93 | 0.63 | 0.59 | 0.45 | 0.63 | 0.59 | 0.07 | 0.58 | 0.62 | +| wut/isolation | 0.03 | 0.04 | 0.02 | 0.01 | 1 | 1 | 1 | 1 | 0.01 | 1 | 0.01 | 0.05 | +| wut/labirynth | 0.59 | 0.8 | 0.7 | 0.66 | 0.74 | 0.62 | 0.71 | 0.7 | 0.56 | 0.6 | 0.56 | 0.55 | +| wut/mk1 | 1 | 1 | 0.98 | 1 | 1 | 1 | 1 | 0.68 | 1 | 0.5 | 1 | 1 | +| wut/mk2 | 0.07 | 0.11 | 0.09 | 0.09 | 1 | 1 | 1 | 1 | 0.09 | 1 | 0.08 | 0.07 | +| wut/mk3 | 0.5 | 0.92 | 0.92 | 0.94 | 0.88 | 0.88 | 0.58 | 0.68 | 0.94 | 0 | 0.94 | 0.93 | +| wut/mk4 | 0.18 | 0.31 | 0.42 | 0.55 | 1 | 1 | 1 | 0.74 | 0.37 | 1 | 0.39 | 0.36 | +| wut/olympic | 0.27 | 0.26 | 0.21 | 0.2 | 0.32 | 0.29 | 0.25 | 0.36 | 0.22 | 0 | 0.26 | 0.21 | +| wut/smile | 0.99 | 0.86 | 0.63 | 0.5 | 0.55 | 1 | 1 | 0.7 | 0.86 | 1 | 1 | 0.88 | +| wut/stripes | 0.05 | 0.04 | 0.11 | 0.11 | 1 | 1 | 1 | 1 | 0.11 | 1 | 0.11 | 0.12 | +| wut/trajectories | 1 | 1 | 0.74 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | +| wut/trapped_lovers | 0.58 | 0.61 | 0.72 | 0.61 | 0.76 | 1 | 1 | 0.76 | 0.64 | 1 | 0.93 | 0.65 | +| wut/twosplashes | 0.05 | 0.53 | 0.47 | 0.82 | 0.71 | 0.71 | 0.71 | 0.86 | 0.53 | 0.01 | 0.53 | 0.44 | +| wut/windows | 0.8 | 0.8 | 0.32 | 0.82 | 0.57 | 0.57 | 1 | 0.36 | 0.69 | 1 | 0.3 | 0.79 | +| wut/x1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | +| wut/x2 | 0.65 | 0.98 | 1 | 0.87 | 0.87 | 0.87 | 0.59 | 0.87 | 0.98 | 0.01 | 0.34 | 0.98 | +| wut/x3 | 0.94 | 0.99 | 0.74 | 0.96 | 0.93 | 0.94 | 0.94 | 0.6 | 1 | 0.03 | 0.54 | 0.99 | +| wut/z1 | 0.32 | 0.27 | 0.36 | 0.11 | 0.5 | 0.5 | 0.3 | 0.5 | 0.31 | 0.06 | 0.33 | 0.27 | +| wut/z2 | 0.75 | 0.88 | 0.73 | 1 | 0.88 | 0.64 | 0.64 | 0.88 | 0.86 | 0.5 | 0.99 | 0.84 | +| wut/z3 | 1 | 0.99 | 0.95 | 0.99 | 0.7 | 0.49 | 0.94 | 0.83 | 1 | 0.66 | 0.98 | 1 | + + +### ar + + +Summary statistics for ``ar`` (best=1.0): + + +| | count | mean | std | min | 25% | 50% | 75% | max | +|-------------------|---------|--------|-------|-------|-------|-------|-------|-------| +| Average linkage | 73 | 0.51 | 0.38 | 0 | 0.11 | 0.51 | 0.93 | 1 | +| Birch_0.01 | 73 | 0.55 | 0.36 | 0 | 0.22 | 0.56 | 0.93 | 1 | +| Complete linkage | 73 | 0.48 | 0.34 | 0 | 0.21 | 0.4 | 0.79 | 1 | +| Gaussian mixtures | 73 | 0.65 | 0.37 | 0 | 0.4 | 0.82 | 0.98 | 1 | +| Genie_0.1 | 73 | 0.74 | 0.31 | 0 | 0.53 | 0.88 | 1 | 1 | +| Genie_0.3 | 73 | 0.78 | 0.27 | 0 | 0.59 | 0.94 | 1 | 1 | +| Genie_0.5 | 73 | 0.77 | 0.3 | 0 | 0.66 | 0.92 | 1 | 1 | +| ITM | 73 | 0.68 | 0.27 | 0 | 0.53 | 0.69 | 0.99 | 1 | +| K-means | 73 | 0.55 | 0.35 | 0 | 0.2 | 0.51 | 0.95 | 1 | +| Single linkage | 73 | 0.44 | 0.45 | 0 | 0 | 0.32 | 1 | 1 | +| Spectral_RBF_5 | 72 | 0.63 | 0.37 | 0 | 0.33 | 0.73 | 0.99 | 1 | +| Ward linkage | 73 | 0.54 | 0.35 | 0 | 0.22 | 0.54 | 0.91 | 1 | + + +Ranks for ``ar`` (best=1): + + +| | count | mean | std | min | 25% | 50% | 75% | max | +|-------------------|---------|--------|-------|-------|-------|-------|-------|-------| +| Average linkage | 72 | 6.6 | 3.5 | 1 | 4.8 | 7 | 9.2 | 12 | +| Birch_0.01 | 72 | 5.8 | 2.9 | 1 | 4 | 6 | 8 | 12 | +| Complete linkage | 72 | 7.7 | 3.2 | 1 | 6 | 8 | 11 | 12 | +| Gaussian mixtures | 72 | 4.2 | 3.6 | 1 | 1 | 3 | 7 | 12 | +| Genie_0.1 | 72 | 3.8 | 3.3 | 1 | 1 | 3 | 6 | 12 | +| Genie_0.3 | 72 | 3.3 | 3 | 1 | 1 | 2 | 5 | 11 | +| Genie_0.5 | 72 | 4.2 | 3.9 | 1 | 1 | 2 | 8 | 11 | +| ITM | 72 | 5.4 | 3.9 | 1 | 1 | 5 | 9 | 12 | +| K-means | 72 | 5.6 | 3.8 | 1 | 1 | 6 | 9 | 12 | +| Single linkage | 72 | 7.4 | 5.1 | 1 | 1 | 11 | 12 | 12 | +| Spectral_RBF_5 | 72 | 5.2 | 3.5 | 1 | 1 | 6 | 8 | 11 | +| Ward linkage | 72 | 6 | 3 | 1 | 4 | 6 | 8 | 12 | + + +Raw results for ``ar`` (best=1.0): + + +| | Average linkage | Birch_0.01 | Complete linkage | Gaussian mixtures | Genie_0.1 | Genie_0.3 | Genie_0.5 | ITM | K-means | Single linkage | Spectral_RBF_5 | Ward linkage | +|------------------------|-------------------|--------------|--------------------|----------------------|-------------|-------------|-------------|-------|-----------|------------------|------------------|----------------| +| fcps/atom | 0.1 | 0.1 | 0.08 | 0.03 | 1 | 1 | 1 | 1 | 0.18 | 1 | 1 | 0.1 | +| fcps/chainlink | 0.27 | 0.28 | 0.31 | 0.91 | 1 | 1 | 1 | 1 | 0.09 | 1 | 1 | 0.28 | +| fcps/engytime | 0.05 | 0.78 | 0.04 | 0.98 | 0.84 | 0.84 | 0.84 | 0.83 | 0.85 | 0 | 0.92 | 0.75 | +| fcps/hepta | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0.9 | 1 | 1 | 1 | 1 | +| fcps/lsun | 0.36 | 0.4 | 0.4 | 1 | 1 | 1 | 1 | 1 | 0.44 | 1 | 0.8 | 0.37 | +| fcps/target | 0.15 | 0.64 | 0.21 | 0.65 | 1 | 1 | 1 | 1 | 0.63 | 1 | 1 | 0.64 | +| fcps/tetra | 0.99 | 0.97 | 0.99 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 0.97 | +| fcps/twodiamonds | 0.99 | 0.99 | 0.99 | 1 | 0.98 | 0.98 | 0.98 | 0.99 | 1 | 0 | 1 | 1 | +| fcps/wingnut | 1 | 1 | 1 | 0.86 | 1 | 1 | 1 | 1 | 0.86 | 1 | 0.78 | 0.6 | +| graves/dense | 0.92 | 0.92 | 0.26 | 1 | 0.96 | 0.96 | 0.96 | 1 | 0.81 | 0 | 0.92 | 0.92 | +| graves/fuzzyx | 0.74 | 0.9 | 0.94 | 1 | 0.73 | 0.5 | 0.4 | 0.95 | 1 | 0 | 0.59 | 0.78 | +| graves/line | 0 | 0 | 0 | 1 | 0.02 | 1 | 1 | 0.12 | 0 | 1 | 1 | 0 | +| graves/parabolic | 0.6 | 0.57 | 0.59 | 0.54 | 0.81 | 0.81 | 0.81 | 0.64 | 0.59 | 0 | 0.66 | 0.62 | +| graves/ring | 0.11 | 0.12 | 0.21 | 0.03 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 0.12 | +| graves/ring_noisy | 0 | 0.11 | 0.27 | 0.02 | 1 | 1 | 1 | 1 | 0.16 | 0 | 1 | 0.15 | +| graves/ring_outliers | 0.63 | 0.63 | 0.34 | 0.62 | 1 | 1 | 1 | 1 | 0.62 | 1 | 1 | 0.63 | +| graves/zigzag | 0.53 | 0.62 | 0.36 | 0.96 | 1 | 1 | 1 | 0.78 | 0.53 | 1 | 0.68 | 0.54 | +| graves/zigzag_noisy | 0.52 | 0.64 | 0.52 | 0.9 | 0.77 | 1 | 1 | 0.54 | 0.51 | 0.47 | 0.63 | 0.66 | +| graves/zigzag_outliers | 0.31 | 0.51 | 0.33 | 0.98 | 1 | 1 | 1 | 0.83 | 0.44 | 0.48 | 0.34 | 0.39 | +| other/chameleon_t4_8k | 0.64 | 0.62 | 0.55 | 0.56 | 0.83 | 1 | 0.93 | 0.84 | 0.6 | 0 | 0.63 | 0.61 | +| other/chameleon_t5_8k | 1 | 1 | 0.73 | 1 | 1 | 1 | 0.83 | 0.59 | 1 | 0 | 1 | 1 | +| other/chameleon_t7_10k | 0.45 | 0.44 | 0.37 | 0.4 | 0.53 | 0.7 | 1 | 0.53 | 0.42 | 0 | 0.38 | 0.43 | +| other/chameleon_t8_8k | 0.37 | 0.39 | 0.33 | 0.44 | 0.61 | 0.64 | 0.71 | 0.57 | 0.37 | 0 | 0.36 | 0.37 | +| other/hdbscan | 0.43 | 0.63 | 0.46 | 0.82 | 0.97 | 0.71 | 0.71 | 0.75 | 0.64 | 0 | 0.33 | 0.84 | +| other/iris | 0.76 | 0.73 | 0.64 | 0.9 | 0.89 | 0.89 | 0.56 | 0.89 | 0.73 | 0.56 | 0.75 | 0.73 | +| other/iris5 | 0.56 | 0.51 | 0.34 | 0.82 | 0.59 | 0.59 | 0.79 | 0.52 | 0.51 | 0.15 | 0.53 | 0.51 | +| other/square | 0.15 | 0 | 0.17 | 0.04 | 1 | 1 | 1 | 1 | 0.03 | 1 | 0.17 | 0.25 | +| sipu/a1 | 0.93 | 0.93 | 0.92 | 0.96 | 0.94 | 0.9 | 0.83 | 0.77 | 0.97 | 0.44 | 0.94 | 0.91 | +| sipu/a2 | 0.93 | 0.94 | 0.91 | 0.96 | 0.95 | 0.92 | 0.83 | 0.77 | 0.97 | 0.35 | 0.91 | 0.92 | +| sipu/a3 | 0.94 | 0.94 | 0.92 | 0.95 | 0.96 | 0.94 | 0.82 | 0.77 | 0.95 | 0.32 | 0.93 | 0.94 | +| sipu/aggregation | 1 | 0.82 | 0.78 | 1 | 0.48 | 0.57 | 0.88 | 0.61 | 0.76 | 0.8 | 0.99 | 0.81 | +| sipu/compound | 0.91 | 0.88 | 0.91 | 0.91 | 0.78 | 0.78 | 0.88 | 0.62 | 0.76 | 0.93 | 0.87 | 0.88 | +| sipu/d31 | 0.91 | 0.93 | 0.92 | 0.95 | 0.94 | 0.9 | 0.71 | 0.8 | 0.95 | 0.17 | 0.94 | 0.92 | +| sipu/flame | 0.44 | 0.22 | 0 | 0.34 | 1 | 1 | 1 | 0.35 | 0.48 | 0.01 | 0.83 | 0.22 | +| sipu/jain | 0.78 | 0.51 | 0.78 | 0 | 0.04 | 1 | 1 | 0.32 | 0.32 | 0.26 | 0.51 | 0.51 | +| sipu/pathbased | 0.59 | 0.54 | 0.41 | 0.6 | 0.97 | 0.97 | 0.7 | 0.54 | 0.5 | 0 | 0.6 | 0.54 | +| sipu/r15 | 1 | 1 | 1 | 1 | 0.99 | 0.99 | 1 | 0.99 | 1 | 1 | 1 | 1 | +| sipu/s1 | 0.98 | 0.99 | 0.97 | 0.99 | 0.99 | 0.99 | 0.99 | 0.76 | 0.99 | 0.46 | 0.99 | 0.98 | +| sipu/s2 | 0.91 | 0.9 | 0.79 | 0.94 | 0.92 | 0.92 | 0.78 | 0.77 | 0.94 | 0 | 0.94 | 0.91 | +| sipu/s3 | 0.6 | 0.68 | 0.51 | 0.73 | 0.69 | 0.67 | 0.56 | 0.61 | 0.72 | 0 | 0.71 | 0.68 | +| sipu/s4 | 0.49 | 0.56 | 0.44 | 0.64 | 0.62 | 0.59 | 0.47 | 0.55 | 0.63 | 0 | 0.57 | 0.55 | +| sipu/spiral | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 0.73 | 0 | 1 | 0 | 0 | +| sipu/unbalance | 1 | 1 | 0.61 | 1 | 0.57 | 0.62 | 0.78 | 0.53 | 1 | 1 | 1 | 1 | +| uci/ecoli | 0.74 | 0.49 | 0.62 | 0.61 | 0.36 | 0.46 | 0.66 | 0.33 | 0.46 | 0.04 | 0.35 | 0.49 | +| uci/glass | 0.02 | 0.25 | 0.23 | 0.24 | 0.12 | 0.25 | 0.22 | 0.23 | 0.27 | 0.01 | 0.22 | 0.26 | +| uci/ionosphere | 0 | 0.19 | 0.01 | 0.4 | 0.21 | 0.21 | 0 | 0.09 | 0.18 | 0 | 0 | 0.19 | +| uci/sonar | 0.01 | 0 | 0 | 0 | 0 | 0 | 0.01 | 0 | 0.01 | 0 | 0 | 0 | +| uci/statlog | 0 | 0.33 | 0.1 | 0.47 | 0.62 | 0.52 | 0.47 | 0.53 | 0.36 | 0 | nan | 0.31 | +| uci/wdbc | 0.05 | 0.29 | 0.05 | 0.71 | 0.09 | 0.28 | 0.28 | 0.63 | 0.49 | 0 | 0 | 0.29 | +| uci/wine | 0.29 | 0.37 | 0.37 | 0.82 | 0.36 | 0.36 | 0.25 | 0.39 | 0.37 | 0.01 | 0.32 | 0.37 | +| uci/yeast | 0.01 | 0.12 | 0.09 | 0.05 | 0.11 | 0.18 | 0.08 | 0.08 | 0.14 | 0.01 | 0.01 | 0.13 | +| wut/circles | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | +| wut/cross | 0 | 0.02 | 0.36 | 0.47 | 0.76 | 0.46 | 0.1 | 0.53 | 0.11 | 0 | 0 | 0.07 | +| wut/graph | 0.41 | 0.44 | 0.4 | 0.86 | 0.48 | 0.49 | 0.39 | 0.52 | 0.44 | 0.03 | 0.41 | 0.46 | +| wut/isolation | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | +| wut/labirynth | 0.36 | 0.49 | 0.3 | 0.62 | 0.5 | 0.59 | 0.72 | 0.72 | 0.29 | 0.76 | 0.39 | 0.34 | +| wut/mk1 | 0.99 | 0.99 | 0.97 | 0.99 | 0.99 | 0.99 | 0.99 | 0.53 | 0.99 | 0.56 | 0.99 | 0.99 | +| wut/mk2 | 0 | 0.01 | 0.01 | 0.01 | 1 | 1 | 1 | 1 | 0.01 | 1 | 0 | 0 | +| wut/mk3 | 0.56 | 0.85 | 0.84 | 0.88 | 0.8 | 0.8 | 0.56 | 0.54 | 0.89 | 0 | 0.88 | 0.86 | +| wut/mk4 | 0.04 | 0.14 | 0.28 | 0.5 | 1 | 1 | 1 | 0.59 | 0.2 | 1 | 0.2 | 0.19 | +| wut/olympic | 0.14 | 0.15 | 0.12 | 0.14 | 0.17 | 0.15 | 0.09 | 0.21 | 0.11 | 0 | 0.13 | 0.13 | +| wut/smile | 0.99 | 0.61 | 0.77 | 0.61 | 0.64 | 1 | 1 | 0.62 | 0.61 | 1 | 1 | 0.65 | +| wut/stripes | 0 | 0 | 0.01 | 0.01 | 1 | 1 | 1 | 1 | 0.01 | 1 | 0.01 | 0.01 | +| wut/trajectories | 1 | 1 | 0.71 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | +| wut/trapped_lovers | 0.13 | 0.14 | 0.25 | 0.14 | 0.39 | 1 | 1 | 0.39 | 0.15 | 1 | 0.75 | 0.16 | +| wut/twosplashes | 0 | 0.28 | 0.22 | 0.67 | 0.5 | 0.5 | 0.5 | 0.73 | 0.28 | 0 | 0.28 | 0.19 | +| wut/windows | 0.1 | 0.1 | 0.09 | 0.14 | 0.14 | 0.2 | 1 | 0.35 | 0.09 | 1 | 0.15 | 0.1 | +| wut/x1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | +| wut/x2 | 0.51 | 0.97 | 1 | 0.69 | 0.81 | 0.81 | 0.5 | 0.69 | 0.97 | 0 | 0.27 | 0.97 | +| wut/x3 | 0.96 | 0.98 | 0.47 | 0.93 | 0.88 | 0.96 | 0.96 | 0.65 | 1 | 0.02 | 0.51 | 0.98 | +| wut/z1 | 0.21 | 0.1 | 0.19 | 0.01 | 0.39 | 0.39 | 0.14 | 0.39 | 0.2 | 0 | 0.15 | 0.1 | +| wut/z2 | 0.51 | 0.5 | 0.44 | 1 | 0.5 | 0.63 | 0.82 | 0.49 | 0.47 | 0.73 | 0.99 | 0.43 | +| wut/z3 | 1 | 1 | 0.93 | 1 | 0.63 | 0.66 | 0.92 | 0.65 | 1 | 0.74 | 0.98 | 1 | + + +### fm + + +Summary statistics for ``fm`` (best=1.0): + + +| | count | mean | std | min | 25% | 50% | 75% | max | +|-------------------|---------|--------|-------|-------|-------|-------|-------|-------| +| Average linkage | 73 | 0.73 | 0.21 | 0.34 | 0.55 | 0.72 | 0.94 | 1 | +| Birch_0.01 | 73 | 0.73 | 0.21 | 0.28 | 0.58 | 0.7 | 0.94 | 1 | +| Complete linkage | 73 | 0.69 | 0.2 | 0.3 | 0.55 | 0.65 | 0.9 | 1 | +| Gaussian mixtures | 73 | 0.79 | 0.22 | 0.26 | 0.66 | 0.87 | 0.99 | 1 | +| Genie_0.1 | 73 | 0.82 | 0.2 | 0.25 | 0.66 | 0.92 | 1 | 1 | +| Genie_0.3 | 73 | 0.85 | 0.18 | 0.37 | 0.73 | 0.94 | 1 | 1 | +| Genie_0.5 | 73 | 0.86 | 0.18 | 0.36 | 0.75 | 0.94 | 1 | 1 | +| ITM | 73 | 0.78 | 0.18 | 0.23 | 0.65 | 0.78 | 0.99 | 1 | +| K-means | 73 | 0.72 | 0.22 | 0.29 | 0.51 | 0.7 | 0.95 | 1 | +| Single linkage | 73 | 0.73 | 0.24 | 0.26 | 0.53 | 0.71 | 1 | 1 | +| Spectral_RBF_5 | 72 | 0.78 | 0.21 | 0.33 | 0.61 | 0.83 | 0.99 | 1 | +| Ward linkage | 73 | 0.72 | 0.21 | 0.29 | 0.58 | 0.7 | 0.92 | 1 | + + +Ranks for ``fm`` (best=1): + + +| | count | mean | std | min | 25% | 50% | 75% | max | +|-------------------|---------|--------|-------|-------|-------|-------|-------|-------| +| Average linkage | 72 | 5.9 | 3.3 | 1 | 3 | 6 | 8.2 | 12 | +| Birch_0.01 | 72 | 6 | 3 | 1 | 4.8 | 6.5 | 8 | 12 | +| Complete linkage | 72 | 7.7 | 3.5 | 1 | 5.8 | 8 | 11 | 12 | +| Gaussian mixtures | 72 | 4.5 | 3.6 | 1 | 1 | 3 | 8 | 12 | +| Genie_0.1 | 72 | 4.1 | 3.6 | 1 | 1 | 3 | 7 | 12 | +| Genie_0.3 | 72 | 3.5 | 3.1 | 1 | 1 | 2 | 5 | 11 | +| Genie_0.5 | 72 | 3.9 | 3.6 | 1 | 1 | 2 | 6.2 | 12 | +| ITM | 72 | 5.9 | 4.2 | 1 | 1 | 5 | 10 | 12 | +| K-means | 72 | 6.3 | 4 | 1 | 1.8 | 8 | 9 | 12 | +| Single linkage | 72 | 6.1 | 4.9 | 1 | 1 | 6 | 12 | 12 | +| Spectral_RBF_5 | 72 | 4.8 | 3.4 | 1 | 1 | 5 | 8 | 11 | +| Ward linkage | 72 | 6.3 | 3.1 | 1 | 5 | 7 | 9 | 12 | + + +Raw results for ``fm`` (best=1.0): + + +| | Average linkage | Birch_0.01 | Complete linkage | Gaussian mixtures | Genie_0.1 | Genie_0.3 | Genie_0.5 | ITM | K-means | Single linkage | Spectral_RBF_5 | Ward linkage | +|------------------------|-------------------|--------------|--------------------|----------------------|-------------|-------------|-------------|-------|-----------|------------------|------------------|----------------| +| fcps/atom | 0.65 | 0.65 | 0.65 | 0.66 | 1 | 1 | 1 | 1 | 0.65 | 1 | 1 | 0.65 | +| fcps/chainlink | 0.68 | 0.68 | 0.69 | 0.95 | 1 | 1 | 1 | 1 | 0.55 | 1 | 1 | 0.68 | +| fcps/engytime | 0.65 | 0.89 | 0.65 | 0.99 | 0.92 | 0.92 | 0.92 | 0.92 | 0.92 | 0.71 | 0.96 | 0.87 | +| fcps/hepta | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0.91 | 1 | 1 | 1 | 1 | +| fcps/lsun | 0.6 | 0.63 | 0.63 | 1 | 1 | 1 | 1 | 1 | 0.65 | 1 | 0.88 | 0.61 | +| fcps/target | 0.71 | 0.8 | 0.71 | 0.8 | 1 | 1 | 1 | 1 | 0.79 | 1 | 1 | 0.79 | +| fcps/tetra | 0.99 | 0.98 | 0.99 | 1 | 1 | 1 | 1 | 1 | 1 | 0.49 | 1 | 0.98 | +| fcps/twodiamonds | 1 | 1 | 0.99 | 1 | 0.99 | 0.99 | 0.99 | 0.99 | 1 | 0.71 | 1 | 1 | +| fcps/wingnut | 1 | 1 | 1 | 0.93 | 1 | 1 | 1 | 1 | 0.93 | 1 | 0.89 | 0.8 | +| graves/dense | 0.96 | 0.96 | 0.67 | 1 | 0.98 | 0.98 | 0.98 | 1 | 0.9 | 0.69 | 0.96 | 0.96 | +| graves/fuzzyx | 0.8 | 0.95 | 0.97 | 1 | 0.8 | 0.66 | 0.66 | 0.96 | 1 | 0.71 | 0.7 | 0.89 | +| graves/line | 0.6 | 0.6 | 0.61 | 1 | 0.6 | 1 | 1 | 0.63 | 0.6 | 1 | 1 | 0.64 | +| graves/parabolic | 0.8 | 0.79 | 0.8 | 0.77 | 0.91 | 0.91 | 0.91 | 0.82 | 0.79 | 0.71 | 0.83 | 0.81 | +| graves/ring | 0.65 | 0.65 | 0.66 | 0.66 | 1 | 1 | 1 | 1 | 0.5 | 1 | 1 | 0.65 | +| graves/ring_noisy | 0.71 | 0.65 | 0.68 | 0.66 | 1 | 1 | 1 | 1 | 0.65 | 0.71 | 1 | 0.65 | +| graves/ring_outliers | 0.78 | 0.78 | 0.65 | 0.78 | 1 | 1 | 1 | 1 | 0.78 | 1 | 1 | 0.78 | +| graves/zigzag | 0.63 | 0.7 | 0.53 | 0.97 | 1 | 1 | 1 | 0.82 | 0.64 | 1 | 0.75 | 0.64 | +| graves/zigzag_noisy | 0.62 | 0.71 | 0.63 | 0.92 | 0.82 | 1 | 1 | 0.63 | 0.62 | 0.67 | 0.72 | 0.73 | +| graves/zigzag_outliers | 0.55 | 0.61 | 0.52 | 0.98 | 1 | 1 | 1 | 0.86 | 0.59 | 0.67 | 0.66 | 0.58 | +| other/chameleon_t4_8k | 0.72 | 0.69 | 0.63 | 0.64 | 0.86 | 1 | 0.94 | 0.87 | 0.67 | 0.44 | 0.69 | 0.68 | +| other/chameleon_t5_8k | 1 | 1 | 0.78 | 1 | 1 | 1 | 0.87 | 0.67 | 1 | 0.41 | 1 | 1 | +| other/chameleon_t7_10k | 0.54 | 0.54 | 0.47 | 0.5 | 0.61 | 0.76 | 1 | 0.62 | 0.51 | 0.43 | 0.48 | 0.52 | +| other/chameleon_t8_8k | 0.47 | 0.49 | 0.43 | 0.53 | 0.68 | 0.7 | 0.77 | 0.64 | 0.46 | 0.41 | 0.46 | 0.47 | +| other/hdbscan | 0.6 | 0.7 | 0.58 | 0.85 | 0.97 | 0.79 | 0.79 | 0.8 | 0.7 | 0.42 | 0.53 | 0.87 | +| other/iris | 0.84 | 0.82 | 0.77 | 0.94 | 0.92 | 0.92 | 0.75 | 0.92 | 0.82 | 0.76 | 0.83 | 0.82 | +| other/iris5 | 0.77 | 0.74 | 0.67 | 0.9 | 0.76 | 0.76 | 0.89 | 0.72 | 0.73 | 0.69 | 0.75 | 0.74 | +| other/square | 0.65 | 0.5 | 0.65 | 0.52 | 1 | 1 | 1 | 1 | 0.52 | 1 | 0.65 | 0.67 | +| sipu/a1 | 0.93 | 0.94 | 0.92 | 0.96 | 0.94 | 0.9 | 0.85 | 0.78 | 0.97 | 0.56 | 0.94 | 0.92 | +| sipu/a2 | 0.94 | 0.94 | 0.91 | 0.96 | 0.95 | 0.92 | 0.84 | 0.78 | 0.97 | 0.48 | 0.91 | 0.92 | +| sipu/a3 | 0.94 | 0.94 | 0.92 | 0.95 | 0.96 | 0.94 | 0.84 | 0.77 | 0.95 | 0.45 | 0.93 | 0.94 | +| sipu/aggregation | 1 | 0.86 | 0.83 | 1 | 0.58 | 0.66 | 0.91 | 0.69 | 0.82 | 0.86 | 0.99 | 0.86 | +| sipu/compound | 0.94 | 0.92 | 0.94 | 0.94 | 0.85 | 0.85 | 0.92 | 0.74 | 0.83 | 0.95 | 0.91 | 0.92 | +| sipu/d31 | 0.91 | 0.93 | 0.93 | 0.95 | 0.94 | 0.9 | 0.74 | 0.81 | 0.96 | 0.35 | 0.94 | 0.92 | +| sipu/flame | 0.73 | 0.63 | 0.62 | 0.68 | 1 | 1 | 1 | 0.69 | 0.75 | 0.73 | 0.92 | 0.63 | +| sipu/jain | 0.92 | 0.79 | 0.92 | 0.59 | 0.59 | 1 | 1 | 0.7 | 0.7 | 0.8 | 0.79 | 0.79 | +| sipu/pathbased | 0.73 | 0.67 | 0.6 | 0.74 | 0.98 | 0.98 | 0.8 | 0.69 | 0.66 | 0.57 | 0.74 | 0.67 | +| sipu/r15 | 1 | 1 | 1 | 1 | 0.99 | 0.99 | 1 | 0.99 | 1 | 1 | 1 | 1 | +| sipu/s1 | 0.98 | 0.99 | 0.97 | 0.99 | 0.99 | 0.99 | 0.99 | 0.77 | 0.99 | 0.59 | 0.99 | 0.98 | +| sipu/s2 | 0.92 | 0.91 | 0.81 | 0.95 | 0.92 | 0.92 | 0.8 | 0.78 | 0.94 | 0.26 | 0.94 | 0.91 | +| sipu/s3 | 0.64 | 0.7 | 0.55 | 0.75 | 0.71 | 0.69 | 0.61 | 0.64 | 0.74 | 0.26 | 0.73 | 0.7 | +| sipu/s4 | 0.55 | 0.59 | 0.49 | 0.67 | 0.64 | 0.62 | 0.53 | 0.58 | 0.66 | 0.26 | 0.61 | 0.58 | +| sipu/spiral | 0.36 | 0.34 | 0.34 | 0.33 | 1 | 1 | 1 | 0.82 | 0.33 | 1 | 0.33 | 0.34 | +| sipu/unbalance | 1 | 1 | 0.77 | 1 | 0.69 | 0.73 | 0.84 | 0.66 | 1 | 1 | 1 | 1 | +| uci/ecoli | 0.82 | 0.61 | 0.72 | 0.71 | 0.51 | 0.59 | 0.75 | 0.48 | 0.59 | 0.53 | 0.62 | 0.61 | +| uci/glass | 0.49 | 0.5 | 0.55 | 0.47 | 0.33 | 0.48 | 0.48 | 0.41 | 0.51 | 0.51 | 0.47 | 0.51 | +| uci/ionosphere | 0.73 | 0.61 | 0.73 | 0.71 | 0.64 | 0.64 | 0.64 | 0.56 | 0.61 | 0.73 | 0.73 | 0.61 | +| uci/sonar | 0.65 | 0.53 | 0.53 | 0.51 | 0.52 | 0.52 | 0.64 | 0.51 | 0.5 | 0.7 | 0.7 | 0.53 | +| uci/statlog | 0.37 | 0.47 | 0.43 | 0.57 | 0.68 | 0.6 | 0.58 | 0.6 | 0.48 | 0.38 | nan | 0.45 | +| uci/wdbc | 0.72 | 0.74 | 0.72 | 0.87 | 0.6 | 0.74 | 0.74 | 0.82 | 0.79 | 0.73 | 0.73 | 0.74 | +| uci/wine | 0.62 | 0.58 | 0.59 | 0.88 | 0.58 | 0.58 | 0.59 | 0.6 | 0.58 | 0.56 | 0.63 | 0.58 | +| uci/yeast | 0.46 | 0.28 | 0.42 | 0.26 | 0.25 | 0.39 | 0.39 | 0.23 | 0.3 | 0.47 | 0.47 | 0.29 | +| wut/circles | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | +| wut/cross | 0.49 | 0.45 | 0.6 | 0.64 | 0.82 | 0.62 | 0.44 | 0.67 | 0.44 | 0.5 | 0.5 | 0.44 | +| wut/graph | 0.49 | 0.51 | 0.48 | 0.88 | 0.54 | 0.55 | 0.49 | 0.57 | 0.51 | 0.31 | 0.49 | 0.53 | +| wut/isolation | 0.34 | 0.35 | 0.34 | 0.33 | 1 | 1 | 1 | 1 | 0.33 | 1 | 0.33 | 0.34 | +| wut/labirynth | 0.51 | 0.61 | 0.46 | 0.72 | 0.62 | 0.69 | 0.79 | 0.79 | 0.45 | 0.85 | 0.53 | 0.49 | +| wut/mk1 | 0.99 | 0.99 | 0.98 | 0.99 | 0.99 | 0.99 | 0.99 | 0.7 | 0.99 | 0.77 | 0.99 | 0.99 | +| wut/mk2 | 0.5 | 0.51 | 0.5 | 0.5 | 1 | 1 | 1 | 1 | 0.5 | 1 | 0.5 | 0.51 | +| wut/mk3 | 0.77 | 0.9 | 0.9 | 0.92 | 0.86 | 0.86 | 0.75 | 0.7 | 0.93 | 0.57 | 0.92 | 0.91 | +| wut/mk4 | 0.49 | 0.49 | 0.55 | 0.67 | 1 | 1 | 1 | 0.73 | 0.5 | 1 | 0.51 | 0.5 | +| wut/olympic | 0.35 | 0.33 | 0.3 | 0.32 | 0.37 | 0.37 | 0.36 | 0.38 | 0.29 | 0.45 | 0.33 | 0.31 | +| wut/smile | 0.99 | 0.73 | 0.86 | 0.73 | 0.75 | 1 | 1 | 0.72 | 0.73 | 1 | 1 | 0.76 | +| wut/stripes | 0.51 | 0.54 | 0.53 | 0.51 | 1 | 1 | 1 | 1 | 0.51 | 1 | 0.51 | 0.55 | +| wut/trajectories | 1 | 1 | 0.8 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | +| wut/trapped_lovers | 0.5 | 0.5 | 0.54 | 0.5 | 0.64 | 1 | 1 | 0.64 | 0.5 | 1 | 0.86 | 0.5 | +| wut/twosplashes | 0.69 | 0.64 | 0.61 | 0.84 | 0.75 | 0.75 | 0.75 | 0.87 | 0.64 | 0.7 | 0.64 | 0.6 | +| wut/windows | 0.39 | 0.39 | 0.39 | 0.41 | 0.43 | 0.5 | 1 | 0.58 | 0.37 | 1 | 0.43 | 0.4 | +| wut/x1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | +| wut/x2 | 0.73 | 0.98 | 1 | 0.8 | 0.87 | 0.87 | 0.72 | 0.79 | 0.98 | 0.57 | 0.6 | 0.98 | +| wut/x3 | 0.97 | 0.99 | 0.62 | 0.96 | 0.91 | 0.97 | 0.97 | 0.76 | 1 | 0.66 | 0.69 | 0.99 | +| wut/z1 | 0.51 | 0.42 | 0.46 | 0.36 | 0.6 | 0.6 | 0.5 | 0.6 | 0.47 | 0.55 | 0.43 | 0.42 | +| wut/z2 | 0.68 | 0.66 | 0.62 | 1 | 0.66 | 0.76 | 0.89 | 0.65 | 0.64 | 0.86 | 0.99 | 0.61 | +| wut/z3 | 1 | 1 | 0.95 | 1 | 0.74 | 0.77 | 0.94 | 0.74 | 1 | 0.84 | 0.99 | 1 | + + +### ami + + +Summary statistics for ``ami`` (best=1.0): + + +| | count | mean | std | min | 25% | 50% | 75% | max | +|-------------------|---------|--------|-------|-------|-------|-------|-------|-------| +| Average linkage | 73 | 0.57 | 0.36 | 0 | 0.27 | 0.65 | 0.95 | 1 | +| Birch_0.01 | 73 | 0.61 | 0.33 | 0 | 0.34 | 0.68 | 0.95 | 1 | +| Complete linkage | 73 | 0.56 | 0.32 | 0 | 0.35 | 0.56 | 0.88 | 1 | +| Gaussian mixtures | 73 | 0.68 | 0.34 | 0 | 0.43 | 0.8 | 0.98 | 1 | +| Genie_0.1 | 73 | 0.79 | 0.26 | 0 | 0.71 | 0.87 | 1 | 1 | +| Genie_0.3 | 73 | 0.82 | 0.24 | 0 | 0.74 | 0.95 | 1 | 1 | +| Genie_0.5 | 73 | 0.82 | 0.25 | 0.06 | 0.74 | 0.94 | 1 | 1 | +| ITM | 73 | 0.75 | 0.24 | 0 | 0.63 | 0.77 | 0.97 | 1 | +| K-means | 73 | 0.6 | 0.33 | 0 | 0.36 | 0.65 | 0.97 | 1 | +| Single linkage | 73 | 0.49 | 0.46 | 0 | 0 | 0.71 | 1 | 1 | +| Spectral_RBF_5 | 72 | 0.67 | 0.34 | 0 | 0.47 | 0.76 | 0.99 | 1 | +| Ward linkage | 73 | 0.61 | 0.32 | 0 | 0.35 | 0.65 | 0.93 | 1 | + + +Ranks for ``ami`` (best=1): + + +| | count | mean | std | min | 25% | 50% | 75% | max | +|-------------------|---------|--------|-------|-------|-------|-------|-------|-------| +| Average linkage | 72 | 6.5 | 3.6 | 1 | 3.8 | 6 | 9 | 12 | +| Birch_0.01 | 72 | 5.7 | 3 | 1 | 3.8 | 6 | 8 | 12 | +| Complete linkage | 72 | 7.6 | 3.3 | 1 | 6 | 8 | 11 | 12 | +| Gaussian mixtures | 72 | 4.4 | 3.7 | 1 | 1 | 3 | 7 | 12 | +| Genie_0.1 | 72 | 3.7 | 3.2 | 1 | 1 | 3 | 5 | 12 | +| Genie_0.3 | 72 | 3.1 | 2.9 | 1 | 1 | 1.5 | 4.2 | 11 | +| Genie_0.5 | 72 | 3.9 | 3.6 | 1 | 1 | 1.5 | 7.2 | 12 | +| ITM | 72 | 5.6 | 3.9 | 1 | 1.8 | 5 | 9 | 12 | +| K-means | 72 | 5.6 | 3.9 | 1 | 1 | 6 | 9 | 12 | +| Single linkage | 72 | 7.5 | 5 | 1 | 1 | 11 | 12 | 12 | +| Spectral_RBF_5 | 72 | 5 | 3.5 | 1 | 1 | 4.5 | 8 | 11 | +| Ward linkage | 72 | 5.9 | 3.1 | 1 | 3.8 | 6 | 8 | 12 | + + +Raw results for ``ami`` (best=1.0): + + +| | Average linkage | Birch_0.01 | Complete linkage | Gaussian mixtures | Genie_0.1 | Genie_0.3 | Genie_0.5 | ITM | K-means | Single linkage | Spectral_RBF_5 | Ward linkage | +|------------------------|-------------------|--------------|--------------------|----------------------|-------------|-------------|-------------|-------|-----------|------------------|------------------|----------------| +| fcps/atom | 0.22 | 0.22 | 0.2 | 0.13 | 1 | 1 | 1 | 1 | 0.29 | 1 | 1 | 0.22 | +| fcps/chainlink | 0.36 | 0.37 | 0.39 | 0.84 | 1 | 1 | 1 | 1 | 0.07 | 1 | 1 | 0.37 | +| fcps/engytime | 0.16 | 0.68 | 0.15 | 0.96 | 0.79 | 0.79 | 0.79 | 0.74 | 0.77 | 0 | 0.87 | 0.68 | +| fcps/hepta | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0.94 | 1 | 1 | 1 | 1 | +| fcps/lsun | 0.5 | 0.53 | 0.53 | 1 | 1 | 1 | 1 | 1 | 0.54 | 1 | 0.82 | 0.51 | +| fcps/target | 0.33 | 0.64 | 0.38 | 0.65 | 1 | 1 | 1 | 1 | 0.63 | 1 | 1 | 0.63 | +| fcps/tetra | 0.99 | 0.96 | 0.98 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 0.96 | +| fcps/twodiamonds | 0.99 | 0.99 | 0.97 | 1 | 0.95 | 0.95 | 0.95 | 0.97 | 1 | 0 | 1 | 1 | +| fcps/wingnut | 1 | 1 | 1 | 0.78 | 1 | 1 | 1 | 1 | 0.77 | 1 | 0.68 | 0.49 | +| graves/dense | 0.88 | 0.88 | 0.35 | 1 | 0.93 | 0.93 | 0.93 | 1 | 0.76 | 0.02 | 0.88 | 0.88 | +| graves/fuzzyx | 0.8 | 0.84 | 0.9 | 0.99 | 0.79 | 0.64 | 0.59 | 0.94 | 0.99 | 0 | 0.69 | 0.75 | +| graves/line | 0.14 | 0.14 | 0.14 | 1 | 0.2 | 1 | 1 | 0.25 | 0.18 | 1 | 1 | 0.1 | +| graves/parabolic | 0.49 | 0.57 | 0.51 | 0.43 | 0.74 | 0.74 | 0.74 | 0.61 | 0.48 | 0 | 0.55 | 0.52 | +| graves/ring | 0.23 | 0.24 | 0.31 | 0.13 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 0.24 | +| graves/ring_noisy | 0 | 0.23 | 0.36 | 0.12 | 1 | 1 | 1 | 1 | 0.28 | 0 | 1 | 0.26 | +| graves/ring_outliers | 0.65 | 0.65 | 0.41 | 0.65 | 1 | 1 | 1 | 1 | 0.65 | 1 | 1 | 0.65 | +| graves/zigzag | 0.67 | 0.76 | 0.56 | 0.96 | 1 | 1 | 1 | 0.85 | 0.71 | 1 | 0.8 | 0.71 | +| graves/zigzag_noisy | 0.66 | 0.75 | 0.67 | 0.89 | 0.85 | 1 | 1 | 0.68 | 0.66 | 0.74 | 0.78 | 0.76 | +| graves/zigzag_outliers | 0.49 | 0.65 | 0.53 | 0.97 | 1 | 1 | 1 | 0.9 | 0.61 | 0.74 | 0.53 | 0.57 | +| other/chameleon_t4_8k | 0.76 | 0.73 | 0.62 | 0.69 | 0.91 | 1 | 0.95 | 0.87 | 0.7 | 0 | 0.72 | 0.73 | +| other/chameleon_t5_8k | 1 | 1 | 0.83 | 1 | 1 | 1 | 0.93 | 0.75 | 1 | 0 | 1 | 1 | +| other/chameleon_t7_10k | 0.69 | 0.71 | 0.6 | 0.68 | 0.78 | 0.87 | 1 | 0.76 | 0.66 | 0 | 0.64 | 0.69 | +| other/chameleon_t8_8k | 0.59 | 0.58 | 0.55 | 0.64 | 0.79 | 0.79 | 0.86 | 0.76 | 0.59 | 0 | 0.58 | 0.59 | +| other/hdbscan | 0.62 | 0.75 | 0.61 | 0.82 | 0.97 | 0.87 | 0.87 | 0.85 | 0.73 | 0 | 0.55 | 0.86 | +| other/iris | 0.8 | 0.77 | 0.72 | 0.9 | 0.87 | 0.87 | 0.7 | 0.87 | 0.76 | 0.71 | 0.8 | 0.77 | +| other/iris5 | 0.63 | 0.56 | 0.46 | 0.81 | 0.58 | 0.58 | 0.76 | 0.54 | 0.54 | 0.34 | 0.61 | 0.56 | +| other/square | 0.27 | 0 | 0.28 | 0.03 | 1 | 1 | 1 | 1 | 0.02 | 1 | 0.28 | 0.35 | +| sipu/a1 | 0.95 | 0.96 | 0.95 | 0.97 | 0.96 | 0.95 | 0.94 | 0.89 | 0.97 | 0.78 | 0.96 | 0.95 | +| sipu/a2 | 0.96 | 0.96 | 0.95 | 0.98 | 0.97 | 0.96 | 0.94 | 0.9 | 0.98 | 0.76 | 0.96 | 0.96 | +| sipu/a3 | 0.97 | 0.97 | 0.96 | 0.97 | 0.97 | 0.97 | 0.95 | 0.91 | 0.97 | 0.76 | 0.96 | 0.97 | +| sipu/aggregation | 1 | 0.92 | 0.9 | 1 | 0.7 | 0.76 | 0.92 | 0.78 | 0.88 | 0.88 | 0.99 | 0.92 | +| sipu/compound | 0.93 | 0.88 | 0.93 | 0.93 | 0.85 | 0.85 | 0.88 | 0.74 | 0.83 | 0.93 | 0.86 | 0.88 | +| sipu/d31 | 0.95 | 0.95 | 0.95 | 0.96 | 0.96 | 0.95 | 0.9 | 0.91 | 0.97 | 0.63 | 0.96 | 0.95 | +| sipu/flame | 0.48 | 0.35 | 0.12 | 0.42 | 1 | 1 | 1 | 0.43 | 0.43 | 0.02 | 0.73 | 0.35 | +| sipu/jain | 0.7 | 0.5 | 0.7 | 0.2 | 0.23 | 1 | 1 | 0.39 | 0.37 | 0.24 | 0.5 | 0.5 | +| sipu/pathbased | 0.64 | 0.59 | 0.5 | 0.66 | 0.95 | 0.95 | 0.81 | 0.61 | 0.58 | 0 | 0.67 | 0.59 | +| sipu/r15 | 1 | 1 | 1 | 1 | 0.99 | 0.99 | 1 | 0.99 | 1 | 1 | 1 | 1 | +| sipu/s1 | 0.98 | 0.99 | 0.98 | 0.99 | 0.99 | 0.99 | 0.99 | 0.88 | 0.99 | 0.79 | 0.99 | 0.98 | +| sipu/s2 | 0.93 | 0.92 | 0.88 | 0.95 | 0.93 | 0.93 | 0.91 | 0.86 | 0.95 | 0 | 0.94 | 0.93 | +| sipu/s3 | 0.75 | 0.77 | 0.7 | 0.8 | 0.78 | 0.77 | 0.75 | 0.74 | 0.79 | 0 | 0.79 | 0.77 | +| sipu/s4 | 0.66 | 0.69 | 0.63 | 0.73 | 0.71 | 0.7 | 0.66 | 0.68 | 0.72 | 0 | 0.71 | 0.69 | +| sipu/spiral | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 0.78 | 0 | 1 | 0 | 0 | +| sipu/unbalance | 1 | 1 | 0.82 | 1 | 0.75 | 0.77 | 0.82 | 0.75 | 1 | 0.99 | 1 | 1 | +| uci/ecoli | 0.71 | 0.62 | 0.64 | 0.57 | 0.49 | 0.54 | 0.57 | 0.49 | 0.58 | 0.11 | 0.48 | 0.62 | +| uci/glass | 0.07 | 0.34 | 0.35 | 0.33 | 0.25 | 0.38 | 0.34 | 0.33 | 0.4 | 0.03 | 0.31 | 0.37 | +| uci/ionosphere | 0 | 0.14 | 0.01 | 0.32 | 0.13 | 0.13 | 0.06 | 0.09 | 0.13 | 0 | 0 | 0.14 | +| uci/sonar | 0 | 0 | 0 | 0 | 0 | 0 | 0.07 | 0 | 0.01 | 0 | 0 | 0 | +| uci/statlog | 0.01 | 0.53 | 0.35 | 0.61 | 0.68 | 0.68 | 0.7 | 0.63 | 0.52 | 0 | nan | 0.49 | +| uci/wdbc | 0.09 | 0.32 | 0.09 | 0.61 | 0.24 | 0.31 | 0.31 | 0.51 | 0.46 | 0 | 0 | 0.32 | +| uci/wine | 0.4 | 0.41 | 0.44 | 0.82 | 0.41 | 0.41 | 0.38 | 0.37 | 0.42 | 0.04 | 0.42 | 0.41 | +| uci/yeast | 0.05 | 0.22 | 0.18 | 0.14 | 0.22 | 0.25 | 0.19 | 0.19 | 0.26 | 0.05 | 0.06 | 0.23 | +| wut/circles | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | +| wut/cross | 0.04 | 0.18 | 0.48 | 0.61 | 0.81 | 0.62 | 0.36 | 0.7 | 0.36 | 0 | 0.01 | 0.31 | +| wut/graph | 0.62 | 0.63 | 0.6 | 0.89 | 0.68 | 0.69 | 0.62 | 0.69 | 0.64 | 0.23 | 0.62 | 0.65 | +| wut/isolation | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | +| wut/labirynth | 0.59 | 0.71 | 0.54 | 0.76 | 0.71 | 0.78 | 0.88 | 0.87 | 0.5 | 0.85 | 0.6 | 0.58 | +| wut/mk1 | 0.98 | 0.98 | 0.95 | 0.98 | 0.98 | 0.98 | 0.98 | 0.61 | 0.98 | 0.72 | 0.98 | 0.98 | +| wut/mk2 | 0 | 0.01 | 0.01 | 0 | 1 | 1 | 1 | 1 | 0.01 | 1 | 0 | 0 | +| wut/mk3 | 0.71 | 0.83 | 0.83 | 0.85 | 0.8 | 0.8 | 0.69 | 0.61 | 0.86 | 0 | 0.85 | 0.84 | +| wut/mk4 | 0.11 | 0.2 | 0.3 | 0.58 | 1 | 1 | 1 | 0.65 | 0.25 | 1 | 0.25 | 0.24 | +| wut/olympic | 0.31 | 0.27 | 0.21 | 0.25 | 0.33 | 0.33 | 0.31 | 0.31 | 0.2 | 0 | 0.31 | 0.23 | +| wut/smile | 0.98 | 0.79 | 0.83 | 0.79 | 0.8 | 1 | 1 | 0.85 | 0.79 | 1 | 1 | 0.8 | +| wut/stripes | 0 | 0 | 0.01 | 0.01 | 1 | 1 | 1 | 1 | 0.01 | 1 | 0.01 | 0.01 | +| wut/trajectories | 1 | 1 | 0.83 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | +| wut/trapped_lovers | 0.35 | 0.36 | 0.45 | 0.36 | 0.62 | 1 | 1 | 0.62 | 0.38 | 1 | 0.74 | 0.39 | +| wut/twosplashes | 0.04 | 0.21 | 0.17 | 0.56 | 0.4 | 0.4 | 0.4 | 0.69 | 0.21 | 0.01 | 0.21 | 0.15 | +| wut/windows | 0.4 | 0.4 | 0.4 | 0.43 | 0.48 | 0.56 | 1 | 0.65 | 0.35 | 1 | 0.43 | 0.4 | +| wut/x1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | +| wut/x2 | 0.72 | 0.97 | 1 | 0.75 | 0.84 | 0.84 | 0.72 | 0.77 | 0.97 | 0 | 0.41 | 0.97 | +| wut/x3 | 0.91 | 0.97 | 0.63 | 0.91 | 0.87 | 0.91 | 0.91 | 0.69 | 1 | 0.01 | 0.68 | 0.97 | +| wut/z1 | 0.32 | 0.13 | 0.26 | 0.03 | 0.47 | 0.47 | 0.24 | 0.46 | 0.27 | 0.05 | 0.19 | 0.13 | +| wut/z2 | 0.72 | 0.72 | 0.64 | 1 | 0.72 | 0.74 | 0.86 | 0.71 | 0.68 | 0.81 | 0.98 | 0.69 | +| wut/z3 | 0.99 | 0.99 | 0.93 | 0.99 | 0.74 | 0.74 | 0.91 | 0.75 | 1 | 0.84 | 0.97 | 1 | + + +### nmi + + +Summary statistics for ``nmi`` (best=1.0): + + +| | count | mean | std | min | 25% | 50% | 75% | max | +|-------------------|---------|--------|-------|-------|-------|-------|-------|-------| +| Average linkage | 73 | 0.58 | 0.36 | 0 | 0.27 | 0.66 | 0.95 | 1 | +| Birch_0.01 | 73 | 0.61 | 0.33 | 0 | 0.35 | 0.68 | 0.96 | 1 | +| Complete linkage | 73 | 0.57 | 0.32 | 0 | 0.35 | 0.57 | 0.88 | 1 | +| Gaussian mixtures | 73 | 0.68 | 0.34 | 0 | 0.43 | 0.8 | 0.98 | 1 | +| Genie_0.1 | 73 | 0.79 | 0.26 | 0 | 0.71 | 0.87 | 1 | 1 | +| Genie_0.3 | 73 | 0.83 | 0.24 | 0 | 0.74 | 0.95 | 1 | 1 | +| Genie_0.5 | 73 | 0.82 | 0.25 | 0.07 | 0.74 | 0.94 | 1 | 1 | +| ITM | 73 | 0.75 | 0.24 | 0.01 | 0.63 | 0.78 | 0.97 | 1 | +| K-means | 73 | 0.61 | 0.33 | 0 | 0.37 | 0.65 | 0.97 | 1 | +| Single linkage | 73 | 0.5 | 0.45 | 0 | 0.01 | 0.72 | 1 | 1 | +| Spectral_RBF_5 | 72 | 0.67 | 0.34 | 0 | 0.49 | 0.76 | 0.99 | 1 | +| Ward linkage | 73 | 0.61 | 0.32 | 0 | 0.35 | 0.66 | 0.93 | 1 | + + +Ranks for ``nmi`` (best=1): + + +| | count | mean | std | min | 25% | 50% | 75% | max | +|-------------------|---------|--------|-------|-------|-------|-------|-------|-------| +| Average linkage | 72 | 6.6 | 3.6 | 1 | 4 | 6.5 | 9.2 | 12 | +| Birch_0.01 | 72 | 5.8 | 2.9 | 1 | 4 | 6 | 8 | 12 | +| Complete linkage | 72 | 7.7 | 3.3 | 1 | 6 | 8.5 | 11 | 12 | +| Gaussian mixtures | 72 | 4.4 | 3.7 | 1 | 1 | 3 | 7.2 | 12 | +| Genie_0.1 | 72 | 3.8 | 3.3 | 1 | 1 | 2.5 | 5.2 | 12 | +| Genie_0.3 | 72 | 3.1 | 2.9 | 1 | 1 | 1.5 | 4 | 11 | +| Genie_0.5 | 72 | 3.8 | 3.6 | 1 | 1 | 1.5 | 7.2 | 12 | +| ITM | 72 | 5.5 | 3.9 | 1 | 1 | 5.5 | 9 | 12 | +| K-means | 72 | 5.7 | 3.9 | 1 | 1 | 6 | 9 | 12 | +| Single linkage | 72 | 7.5 | 5 | 1 | 1 | 10.5 | 12 | 12 | +| Spectral_RBF_5 | 72 | 5 | 3.5 | 1 | 1 | 4 | 8 | 11 | +| Ward linkage | 72 | 6 | 3.1 | 1 | 4 | 6 | 8 | 12 | + + +Raw results for ``nmi`` (best=1.0): + + +| | Average linkage | Birch_0.01 | Complete linkage | Gaussian mixtures | Genie_0.1 | Genie_0.3 | Genie_0.5 | ITM | K-means | Single linkage | Spectral_RBF_5 | Ward linkage | +|------------------------|-------------------|--------------|--------------------|----------------------|-------------|-------------|-------------|-------|-----------|------------------|------------------|----------------| +| fcps/atom | 0.22 | 0.22 | 0.2 | 0.13 | 1 | 1 | 1 | 1 | 0.29 | 1 | 1 | 0.22 | +| fcps/chainlink | 0.36 | 0.37 | 0.39 | 0.84 | 1 | 1 | 1 | 1 | 0.07 | 1 | 1 | 0.37 | +| fcps/engytime | 0.16 | 0.68 | 0.15 | 0.96 | 0.79 | 0.79 | 0.79 | 0.74 | 0.77 | 0 | 0.87 | 0.68 | +| fcps/hepta | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0.95 | 1 | 1 | 1 | 1 | +| fcps/lsun | 0.5 | 0.53 | 0.53 | 1 | 1 | 1 | 1 | 1 | 0.54 | 1 | 0.82 | 0.51 | +| fcps/target | 0.34 | 0.64 | 0.38 | 0.65 | 1 | 1 | 1 | 1 | 0.64 | 1 | 1 | 0.64 | +| fcps/tetra | 0.99 | 0.96 | 0.98 | 1 | 1 | 1 | 1 | 1 | 1 | 0.01 | 1 | 0.96 | +| fcps/twodiamonds | 0.99 | 0.99 | 0.97 | 1 | 0.95 | 0.95 | 0.95 | 0.97 | 1 | 0 | 1 | 1 | +| fcps/wingnut | 1 | 1 | 1 | 0.78 | 1 | 1 | 1 | 1 | 0.77 | 1 | 0.68 | 0.49 | +| graves/dense | 0.88 | 0.88 | 0.35 | 1 | 0.93 | 0.93 | 0.93 | 1 | 0.76 | 0.03 | 0.88 | 0.88 | +| graves/fuzzyx | 0.8 | 0.84 | 0.9 | 0.99 | 0.79 | 0.64 | 0.59 | 0.94 | 0.99 | 0.01 | 0.69 | 0.75 | +| graves/line | 0.14 | 0.14 | 0.14 | 1 | 0.2 | 1 | 1 | 0.25 | 0.18 | 1 | 1 | 0.1 | +| graves/parabolic | 0.49 | 0.57 | 0.51 | 0.43 | 0.74 | 0.74 | 0.74 | 0.61 | 0.48 | 0.01 | 0.55 | 0.52 | +| graves/ring | 0.23 | 0.24 | 0.31 | 0.13 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 0.24 | +| graves/ring_noisy | 0 | 0.24 | 0.36 | 0.12 | 1 | 1 | 1 | 1 | 0.28 | 0 | 1 | 0.27 | +| graves/ring_outliers | 0.66 | 0.66 | 0.41 | 0.65 | 1 | 1 | 1 | 1 | 0.65 | 1 | 1 | 0.66 | +| graves/zigzag | 0.67 | 0.76 | 0.57 | 0.96 | 1 | 1 | 1 | 0.85 | 0.71 | 1 | 0.81 | 0.71 | +| graves/zigzag_noisy | 0.67 | 0.76 | 0.68 | 0.89 | 0.85 | 1 | 1 | 0.68 | 0.67 | 0.74 | 0.79 | 0.77 | +| graves/zigzag_outliers | 0.5 | 0.66 | 0.54 | 0.98 | 1 | 1 | 1 | 0.9 | 0.62 | 0.74 | 0.53 | 0.57 | +| other/chameleon_t4_8k | 0.76 | 0.73 | 0.62 | 0.69 | 0.91 | 1 | 0.95 | 0.87 | 0.7 | 0 | 0.72 | 0.73 | +| other/chameleon_t5_8k | 1 | 1 | 0.83 | 1 | 1 | 1 | 0.93 | 0.75 | 1 | 0 | 1 | 1 | +| other/chameleon_t7_10k | 0.69 | 0.71 | 0.6 | 0.68 | 0.78 | 0.87 | 1 | 0.76 | 0.66 | 0 | 0.64 | 0.69 | +| other/chameleon_t8_8k | 0.59 | 0.58 | 0.55 | 0.64 | 0.79 | 0.79 | 0.86 | 0.76 | 0.59 | 0 | 0.58 | 0.59 | +| other/hdbscan | 0.62 | 0.75 | 0.61 | 0.82 | 0.97 | 0.87 | 0.88 | 0.85 | 0.73 | 0 | 0.55 | 0.86 | +| other/iris | 0.81 | 0.77 | 0.72 | 0.9 | 0.87 | 0.87 | 0.71 | 0.87 | 0.76 | 0.72 | 0.8 | 0.77 | +| other/iris5 | 0.64 | 0.57 | 0.47 | 0.81 | 0.59 | 0.59 | 0.76 | 0.55 | 0.55 | 0.36 | 0.62 | 0.57 | +| other/square | 0.27 | 0 | 0.28 | 0.03 | 1 | 1 | 1 | 1 | 0.02 | 1 | 0.28 | 0.35 | +| sipu/a1 | 0.95 | 0.96 | 0.95 | 0.97 | 0.96 | 0.95 | 0.94 | 0.89 | 0.97 | 0.79 | 0.96 | 0.95 | +| sipu/a2 | 0.96 | 0.96 | 0.95 | 0.98 | 0.97 | 0.96 | 0.95 | 0.91 | 0.98 | 0.77 | 0.96 | 0.96 | +| sipu/a3 | 0.97 | 0.97 | 0.96 | 0.98 | 0.98 | 0.97 | 0.95 | 0.91 | 0.98 | 0.76 | 0.97 | 0.97 | +| sipu/aggregation | 1 | 0.92 | 0.9 | 1 | 0.71 | 0.76 | 0.92 | 0.78 | 0.88 | 0.88 | 0.99 | 0.92 | +| sipu/compound | 0.93 | 0.88 | 0.93 | 0.93 | 0.85 | 0.85 | 0.89 | 0.75 | 0.83 | 0.93 | 0.86 | 0.88 | +| sipu/d31 | 0.95 | 0.96 | 0.95 | 0.96 | 0.96 | 0.95 | 0.91 | 0.91 | 0.97 | 0.64 | 0.96 | 0.95 | +| sipu/flame | 0.48 | 0.35 | 0.13 | 0.42 | 1 | 1 | 1 | 0.43 | 0.43 | 0.02 | 0.73 | 0.35 | +| sipu/jain | 0.7 | 0.51 | 0.7 | 0.2 | 0.23 | 1 | 1 | 0.39 | 0.37 | 0.25 | 0.51 | 0.51 | +| sipu/pathbased | 0.64 | 0.6 | 0.51 | 0.66 | 0.95 | 0.95 | 0.81 | 0.61 | 0.59 | 0.02 | 0.67 | 0.6 | +| sipu/r15 | 1 | 1 | 1 | 1 | 0.99 | 0.99 | 1 | 0.99 | 1 | 1 | 1 | 1 | +| sipu/s1 | 0.98 | 0.99 | 0.98 | 0.99 | 0.99 | 0.99 | 0.99 | 0.88 | 0.99 | 0.79 | 0.99 | 0.98 | +| sipu/s2 | 0.93 | 0.92 | 0.88 | 0.95 | 0.93 | 0.93 | 0.91 | 0.86 | 0.95 | 0.01 | 0.94 | 0.93 | +| sipu/s3 | 0.75 | 0.77 | 0.71 | 0.8 | 0.78 | 0.78 | 0.75 | 0.75 | 0.79 | 0.01 | 0.79 | 0.77 | +| sipu/s4 | 0.67 | 0.69 | 0.63 | 0.73 | 0.72 | 0.71 | 0.67 | 0.68 | 0.72 | 0.01 | 0.71 | 0.69 | +| sipu/spiral | 0 | 0.01 | 0.01 | 0 | 1 | 1 | 1 | 0.79 | 0 | 1 | 0 | 0.01 | +| sipu/unbalance | 1 | 1 | 0.82 | 1 | 0.75 | 0.77 | 0.82 | 0.75 | 1 | 0.99 | 1 | 1 | +| uci/ecoli | 0.72 | 0.63 | 0.65 | 0.59 | 0.51 | 0.56 | 0.59 | 0.51 | 0.6 | 0.15 | 0.51 | 0.63 | +| uci/glass | 0.11 | 0.37 | 0.38 | 0.36 | 0.28 | 0.41 | 0.37 | 0.35 | 0.43 | 0.07 | 0.34 | 0.39 | +| uci/ionosphere | 0.01 | 0.14 | 0.02 | 0.32 | 0.13 | 0.13 | 0.07 | 0.09 | 0.13 | 0.01 | 0.01 | 0.14 | +| uci/sonar | 0.01 | 0 | 0 | 0.01 | 0 | 0 | 0.08 | 0.01 | 0.01 | 0.01 | 0.01 | 0 | +| uci/statlog | 0.02 | 0.53 | 0.35 | 0.62 | 0.68 | 0.68 | 0.7 | 0.63 | 0.52 | 0.01 | nan | 0.49 | +| uci/wdbc | 0.09 | 0.32 | 0.09 | 0.61 | 0.24 | 0.32 | 0.32 | 0.51 | 0.46 | 0.01 | 0.01 | 0.32 | +| uci/wine | 0.4 | 0.42 | 0.44 | 0.82 | 0.42 | 0.42 | 0.39 | 0.38 | 0.43 | 0.06 | 0.43 | 0.42 | +| uci/yeast | 0.07 | 0.23 | 0.19 | 0.15 | 0.23 | 0.27 | 0.2 | 0.2 | 0.27 | 0.07 | 0.07 | 0.24 | +| wut/circles | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | +| wut/cross | 0.04 | 0.19 | 0.48 | 0.61 | 0.81 | 0.62 | 0.36 | 0.7 | 0.37 | 0 | 0.01 | 0.31 | +| wut/graph | 0.62 | 0.63 | 0.61 | 0.89 | 0.68 | 0.69 | 0.62 | 0.7 | 0.64 | 0.24 | 0.63 | 0.66 | +| wut/isolation | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | +| wut/labirynth | 0.59 | 0.71 | 0.54 | 0.76 | 0.71 | 0.78 | 0.88 | 0.87 | 0.5 | 0.85 | 0.6 | 0.58 | +| wut/mk1 | 0.98 | 0.98 | 0.95 | 0.98 | 0.98 | 0.98 | 0.98 | 0.62 | 0.98 | 0.73 | 0.98 | 0.98 | +| wut/mk2 | 0 | 0.01 | 0.01 | 0.01 | 1 | 1 | 1 | 1 | 0.01 | 1 | 0 | 0 | +| wut/mk3 | 0.71 | 0.83 | 0.83 | 0.85 | 0.8 | 0.8 | 0.69 | 0.61 | 0.86 | 0.01 | 0.85 | 0.84 | +| wut/mk4 | 0.12 | 0.2 | 0.3 | 0.58 | 1 | 1 | 1 | 0.65 | 0.25 | 1 | 0.25 | 0.24 | +| wut/olympic | 0.31 | 0.27 | 0.21 | 0.25 | 0.33 | 0.34 | 0.31 | 0.31 | 0.2 | 0 | 0.31 | 0.23 | +| wut/smile | 0.98 | 0.79 | 0.83 | 0.79 | 0.8 | 1 | 1 | 0.85 | 0.79 | 1 | 1 | 0.81 | +| wut/stripes | 0 | 0 | 0.01 | 0.01 | 1 | 1 | 1 | 1 | 0.01 | 1 | 0.01 | 0.01 | +| wut/trajectories | 1 | 1 | 0.83 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | +| wut/trapped_lovers | 0.35 | 0.36 | 0.45 | 0.36 | 0.63 | 1 | 1 | 0.62 | 0.38 | 1 | 0.74 | 0.39 | +| wut/twosplashes | 0.04 | 0.21 | 0.17 | 0.56 | 0.4 | 0.4 | 0.4 | 0.69 | 0.21 | 0.01 | 0.21 | 0.15 | +| wut/windows | 0.41 | 0.41 | 0.4 | 0.43 | 0.48 | 0.56 | 1 | 0.65 | 0.35 | 1 | 0.43 | 0.4 | +| wut/x1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | +| wut/x2 | 0.73 | 0.97 | 1 | 0.75 | 0.85 | 0.85 | 0.73 | 0.78 | 0.97 | 0.03 | 0.43 | 0.97 | +| wut/x3 | 0.91 | 0.97 | 0.64 | 0.91 | 0.87 | 0.91 | 0.91 | 0.7 | 1 | 0.04 | 0.69 | 0.97 | +| wut/z1 | 0.33 | 0.14 | 0.27 | 0.04 | 0.47 | 0.47 | 0.24 | 0.47 | 0.27 | 0.07 | 0.19 | 0.14 | +| wut/z2 | 0.72 | 0.72 | 0.64 | 1 | 0.72 | 0.74 | 0.86 | 0.72 | 0.68 | 0.81 | 0.98 | 0.69 | +| wut/z3 | 0.99 | 0.99 | 0.93 | 0.99 | 0.74 | 0.75 | 0.91 | 0.75 | 1 | 0.84 | 0.97 | 1 | + + +### npa + + +Summary statistics for ``npa`` (best=1.0): + + +| | count | mean | std | min | 25% | 50% | 75% | max | +|-------------------|---------|--------|-------|-------|-------|-------|-------|-------| +| Average linkage | 73 | 0.59 | 0.34 | 0 | 0.32 | 0.6 | 0.96 | 1 | +| Birch_0.01 | 73 | 0.64 | 0.31 | 0.01 | 0.44 | 0.67 | 0.96 | 1 | +| Complete linkage | 73 | 0.59 | 0.3 | 0.02 | 0.39 | 0.56 | 0.89 | 1 | +| Gaussian mixtures | 73 | 0.71 | 0.33 | 0.01 | 0.51 | 0.86 | 0.99 | 1 | +| Genie_0.1 | 73 | 0.79 | 0.26 | 0.02 | 0.63 | 0.94 | 1 | 1 | +| Genie_0.3 | 73 | 0.83 | 0.22 | 0.02 | 0.68 | 0.96 | 1 | 1 | +| Genie_0.5 | 73 | 0.81 | 0.25 | 0.08 | 0.71 | 0.92 | 1 | 1 | +| ITM | 73 | 0.75 | 0.23 | 0.08 | 0.6 | 0.8 | 0.99 | 1 | +| K-means | 73 | 0.64 | 0.31 | 0.01 | 0.42 | 0.65 | 0.97 | 1 | +| Single linkage | 73 | 0.49 | 0.42 | 0 | 0.07 | 0.37 | 1 | 1 | +| Spectral_RBF_5 | 72 | 0.7 | 0.32 | 0.01 | 0.42 | 0.83 | 1 | 1 | +| Ward linkage | 73 | 0.64 | 0.29 | 0.02 | 0.41 | 0.63 | 0.95 | 1 | + + +Ranks for ``npa`` (best=1): + + +| | count | mean | std | min | 25% | 50% | 75% | max | +|-------------------|---------|--------|-------|-------|-------|-------|-------|-------| +| Average linkage | 72 | 6.3 | 3.8 | 1 | 2.8 | 7 | 9.2 | 12 | +| Birch_0.01 | 72 | 5.9 | 3.1 | 1 | 4 | 6 | 8 | 12 | +| Complete linkage | 72 | 7.6 | 3.1 | 1 | 6.8 | 8 | 10 | 12 | +| Gaussian mixtures | 72 | 4.3 | 3.9 | 1 | 1 | 3 | 7 | 12 | +| Genie_0.1 | 72 | 4 | 3.6 | 1 | 1 | 2 | 7 | 12 | +| Genie_0.3 | 72 | 3.5 | 3.2 | 1 | 1 | 2 | 6 | 11 | +| Genie_0.5 | 72 | 4.1 | 3.8 | 1 | 1 | 1.5 | 7.2 | 12 | +| ITM | 72 | 5.4 | 3.9 | 1 | 1 | 5 | 9 | 12 | +| K-means | 72 | 5.7 | 3.8 | 1 | 1 | 6.5 | 9 | 12 | +| Single linkage | 72 | 7.5 | 5.1 | 1 | 1 | 11 | 12 | 12 | +| Spectral_RBF_5 | 72 | 5.1 | 3.6 | 1 | 1 | 5 | 8 | 12 | +| Ward linkage | 72 | 6.1 | 3.2 | 1 | 4 | 6 | 8 | 12 | + + +Raw results for ``npa`` (best=1.0): + + +| | Average linkage | Birch_0.01 | Complete linkage | Gaussian mixtures | Genie_0.1 | Genie_0.3 | Genie_0.5 | ITM | K-means | Single linkage | Spectral_RBF_5 | Ward linkage | +|------------------------|-------------------|--------------|--------------------|----------------------|-------------|-------------|-------------|-------|-----------|------------------|------------------|----------------| +| fcps/atom | 0.31 | 0.31 | 0.29 | 0.17 | 1 | 1 | 1 | 1 | 0.43 | 1 | 1 | 0.31 | +| fcps/chainlink | 0.52 | 0.53 | 0.56 | 0.95 | 1 | 1 | 1 | 1 | 0.31 | 1 | 1 | 0.53 | +| fcps/engytime | 0.23 | 0.88 | 0.2 | 0.99 | 0.92 | 0.92 | 0.92 | 0.91 | 0.92 | 0 | 0.96 | 0.86 | +| fcps/hepta | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0.94 | 1 | 1 | 1 | 1 | +| fcps/lsun | 0.58 | 0.62 | 0.62 | 1 | 1 | 1 | 1 | 1 | 0.65 | 1 | 0.9 | 0.58 | +| fcps/target | 0.6 | 0.6 | 0.66 | 0.58 | 1 | 1 | 1 | 1 | 0.55 | 1 | 1 | 0.59 | +| fcps/tetra | 1 | 0.98 | 0.99 | 1 | 1 | 1 | 1 | 1 | 1 | 0.01 | 1 | 0.98 | +| fcps/twodiamonds | 1 | 1 | 0.99 | 1 | 0.99 | 0.99 | 0.99 | 0.99 | 1 | 0 | 1 | 1 | +| fcps/wingnut | 1 | 1 | 1 | 0.93 | 1 | 1 | 1 | 1 | 0.93 | 1 | 0.88 | 0.78 | +| graves/dense | 0.96 | 0.96 | 0.51 | 1 | 0.98 | 0.98 | 0.98 | 1 | 0.9 | 0.03 | 0.96 | 0.96 | +| graves/fuzzyx | 0.86 | 0.95 | 0.97 | 1 | 0.84 | 0.72 | 0.63 | 0.97 | 1 | 0.03 | 0.78 | 0.88 | +| graves/line | 0.01 | 0.01 | 0.02 | 1 | 0.19 | 1 | 1 | 0.35 | 0.14 | 1 | 1 | 0.19 | +| graves/parabolic | 0.77 | 0.76 | 0.77 | 0.73 | 0.9 | 0.9 | 0.9 | 0.8 | 0.77 | 0.02 | 0.81 | 0.79 | +| graves/ring | 0.34 | 0.34 | 0.46 | 0.17 | 1 | 1 | 1 | 1 | 0.01 | 1 | 1 | 0.34 | +| graves/ring_noisy | 0 | 0.34 | 0.52 | 0.16 | 1 | 1 | 1 | 1 | 0.4 | 0 | 1 | 0.38 | +| graves/ring_outliers | 0.58 | 0.57 | 0.53 | 0.55 | 1 | 1 | 1 | 1 | 0.55 | 1 | 1 | 0.58 | +| graves/zigzag | 0.69 | 0.73 | 0.56 | 0.98 | 1 | 1 | 1 | 0.86 | 0.7 | 1 | 0.82 | 0.63 | +| graves/zigzag_noisy | 0.65 | 0.77 | 0.58 | 0.95 | 0.88 | 1 | 1 | 0.67 | 0.64 | 0.5 | 0.78 | 0.79 | +| graves/zigzag_outliers | 0.34 | 0.53 | 0.46 | 0.99 | 1 | 1 | 1 | 0.9 | 0.51 | 0.53 | 0.4 | 0.42 | +| other/chameleon_t4_8k | 0.72 | 0.63 | 0.6 | 0.63 | 0.77 | 1 | 0.89 | 0.83 | 0.63 | 0.09 | 0.64 | 0.62 | +| other/chameleon_t5_8k | 1 | 1 | 0.79 | 1 | 1 | 1 | 0.82 | 0.69 | 1 | 0.01 | 1 | 1 | +| other/chameleon_t7_10k | 0.49 | 0.45 | 0.43 | 0.4 | 0.55 | 0.77 | 1 | 0.61 | 0.44 | 0.21 | 0.41 | 0.51 | +| other/chameleon_t8_8k | 0.41 | 0.46 | 0.39 | 0.51 | 0.6 | 0.6 | 0.79 | 0.58 | 0.4 | 0.09 | 0.4 | 0.4 | +| other/hdbscan | 0.4 | 0.72 | 0.54 | 0.86 | 0.98 | 0.74 | 0.74 | 0.86 | 0.77 | 0.07 | 0.47 | 0.91 | +| other/iris | 0.86 | 0.84 | 0.76 | 0.95 | 0.94 | 0.94 | 0.55 | 0.94 | 0.84 | 0.52 | 0.85 | 0.84 | +| other/iris5 | 0.8 | 0.77 | 0.66 | 0.93 | 0.66 | 0.66 | 0.91 | 0.54 | 0.77 | 0.31 | 0.79 | 0.77 | +| other/square | 0.39 | 0.01 | 0.41 | 0.19 | 1 | 1 | 1 | 1 | 0.17 | 1 | 0.41 | 0.5 | +| sipu/a1 | 0.96 | 0.96 | 0.96 | 0.98 | 0.97 | 0.91 | 0.84 | 0.82 | 0.98 | 0.37 | 0.97 | 0.95 | +| sipu/a2 | 0.97 | 0.97 | 0.95 | 0.98 | 0.97 | 0.94 | 0.83 | 0.83 | 0.98 | 0.3 | 0.93 | 0.96 | +| sipu/a3 | 0.97 | 0.97 | 0.96 | 0.96 | 0.98 | 0.95 | 0.84 | 0.83 | 0.96 | 0.25 | 0.95 | 0.97 | +| sipu/aggregation | 1 | 0.82 | 0.75 | 1 | 0.55 | 0.63 | 0.86 | 0.63 | 0.75 | 0.79 | 1 | 0.81 | +| sipu/compound | 0.94 | 0.81 | 0.94 | 0.94 | 0.75 | 0.76 | 0.89 | 0.66 | 0.73 | 0.94 | 0.82 | 0.81 | +| sipu/d31 | 0.94 | 0.96 | 0.96 | 0.97 | 0.97 | 0.93 | 0.76 | 0.85 | 0.98 | 0.24 | 0.97 | 0.96 | +| sipu/flame | 0.67 | 0.47 | 0.03 | 0.59 | 1 | 1 | 1 | 0.6 | 0.69 | 0.29 | 0.91 | 0.47 | +| sipu/jain | 0.89 | 0.72 | 0.89 | 0.16 | 0.25 | 1 | 1 | 0.57 | 0.57 | 0.62 | 0.72 | 0.72 | +| sipu/pathbased | 0.66 | 0.64 | 0.52 | 0.66 | 0.98 | 0.98 | 0.74 | 0.68 | 0.62 | 0.09 | 0.68 | 0.64 | +| sipu/r15 | 1 | 1 | 1 | 1 | 0.99 | 0.99 | 1 | 0.99 | 1 | 1 | 1 | 1 | +| sipu/s1 | 0.99 | 0.99 | 0.98 | 0.99 | 0.99 | 0.99 | 0.99 | 0.8 | 0.99 | 0.44 | 0.99 | 0.99 | +| sipu/s2 | 0.95 | 0.95 | 0.84 | 0.97 | 0.96 | 0.96 | 0.84 | 0.86 | 0.97 | 0.01 | 0.97 | 0.95 | +| sipu/s3 | 0.67 | 0.8 | 0.57 | 0.85 | 0.82 | 0.77 | 0.63 | 0.75 | 0.84 | 0.01 | 0.84 | 0.81 | +| sipu/s4 | 0.59 | 0.68 | 0.53 | 0.79 | 0.77 | 0.73 | 0.54 | 0.66 | 0.78 | 0.01 | 0.7 | 0.68 | +| sipu/spiral | 0.04 | 0.07 | 0.06 | 0.03 | 1 | 1 | 1 | 0.83 | 0.01 | 1 | 0.02 | 0.07 | +| sipu/unbalance | 1 | 1 | 0.64 | 1 | 0.47 | 0.59 | 0.75 | 0.41 | 1 | 0.98 | 1 | 1 | +| uci/ecoli | 0.73 | 0.59 | 0.68 | 0.66 | 0.46 | 0.51 | 0.65 | 0.46 | 0.57 | 0.37 | 0.58 | 0.59 | +| uci/glass | 0.25 | 0.38 | 0.38 | 0.41 | 0.27 | 0.38 | 0.38 | 0.39 | 0.45 | 0.24 | 0.39 | 0.4 | +| uci/ionosphere | 0.29 | 0.44 | 0.29 | 0.64 | 0.46 | 0.46 | 0.08 | 0.3 | 0.42 | 0.29 | 0.29 | 0.44 | +| uci/sonar | 0.11 | 0.02 | 0.02 | 0.08 | 0.02 | 0.02 | 0.11 | 0.08 | 0.11 | 0.08 | 0.08 | 0.02 | +| uci/statlog | 0 | 0.45 | 0.17 | 0.53 | 0.74 | 0.59 | 0.51 | 0.6 | 0.43 | 0 | nan | 0.41 | +| uci/wdbc | 0.33 | 0.56 | 0.33 | 0.84 | 0.31 | 0.55 | 0.55 | 0.79 | 0.71 | 0.26 | 0.26 | 0.56 | +| uci/wine | 0.42 | 0.54 | 0.51 | 0.91 | 0.57 | 0.57 | 0.3 | 0.58 | 0.55 | 0.14 | 0.44 | 0.54 | +| uci/yeast | 0.25 | 0.32 | 0.28 | 0.29 | 0.24 | 0.36 | 0.3 | 0.21 | 0.32 | 0.24 | 0.25 | 0.29 | +| wut/circles | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | +| wut/cross | 0.03 | 0.17 | 0.32 | 0.48 | 0.87 | 0.68 | 0.38 | 0.44 | 0.39 | 0 | 0.01 | 0.31 | +| wut/graph | 0.56 | 0.58 | 0.56 | 0.93 | 0.63 | 0.59 | 0.45 | 0.63 | 0.59 | 0.07 | 0.58 | 0.62 | +| wut/isolation | 0.03 | 0.04 | 0.02 | 0.01 | 1 | 1 | 1 | 1 | 0.01 | 1 | 0.01 | 0.05 | +| wut/labirynth | 0.5 | 0.59 | 0.46 | 0.65 | 0.55 | 0.61 | 0.72 | 0.72 | 0.44 | 0.72 | 0.51 | 0.46 | +| wut/mk1 | 1 | 1 | 0.98 | 1 | 1 | 1 | 1 | 0.68 | 1 | 0.5 | 1 | 1 | +| wut/mk2 | 0.07 | 0.11 | 0.09 | 0.09 | 1 | 1 | 1 | 1 | 0.09 | 1 | 0.08 | 0.07 | +| wut/mk3 | 0.5 | 0.92 | 0.92 | 0.94 | 0.88 | 0.88 | 0.58 | 0.68 | 0.94 | 0 | 0.94 | 0.93 | +| wut/mk4 | 0.18 | 0.31 | 0.42 | 0.55 | 1 | 1 | 1 | 0.74 | 0.37 | 1 | 0.39 | 0.36 | +| wut/olympic | 0.27 | 0.26 | 0.21 | 0.2 | 0.32 | 0.29 | 0.25 | 0.36 | 0.22 | 0 | 0.26 | 0.21 | +| wut/smile | 0.99 | 0.71 | 0.83 | 0.54 | 0.63 | 1 | 1 | 0.58 | 0.72 | 1 | 1 | 0.77 | +| wut/stripes | 0.05 | 0.04 | 0.11 | 0.11 | 1 | 1 | 1 | 1 | 0.11 | 1 | 0.11 | 0.12 | +| wut/trajectories | 1 | 1 | 0.74 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | +| wut/trapped_lovers | 0.25 | 0.29 | 0.5 | 0.3 | 0.57 | 1 | 1 | 0.56 | 0.36 | 1 | 0.88 | 0.37 | +| wut/twosplashes | 0.05 | 0.53 | 0.47 | 0.82 | 0.71 | 0.71 | 0.71 | 0.86 | 0.53 | 0.01 | 0.53 | 0.44 | +| wut/windows | 0.35 | 0.34 | 0.19 | 0.42 | 0.31 | 0.35 | 1 | 0.33 | 0.3 | 1 | 0.18 | 0.31 | +| wut/x1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | +| wut/x2 | 0.61 | 0.99 | 1 | 0.84 | 0.84 | 0.84 | 0.56 | 0.84 | 0.99 | 0.22 | 0.42 | 0.99 | +| wut/x3 | 0.97 | 0.99 | 0.7 | 0.97 | 0.94 | 0.97 | 0.97 | 0.69 | 1 | 0.39 | 0.58 | 0.99 | +| wut/z1 | 0.32 | 0.27 | 0.36 | 0.11 | 0.5 | 0.5 | 0.3 | 0.5 | 0.31 | 0.06 | 0.33 | 0.27 | +| wut/z2 | 0.59 | 0.67 | 0.59 | 1 | 0.66 | 0.7 | 0.81 | 0.65 | 0.64 | 0.86 | 0.99 | 0.57 | +| wut/z3 | 1 | 1 | 0.96 | 1 | 0.67 | 0.68 | 0.95 | 0.74 | 1 | 0.73 | 0.99 | 1 | + + +### psi + + +Summary statistics for ``psi`` (best=1.0): + + +| | count | mean | std | min | 25% | 50% | 75% | max | +|-------------------|---------|--------|-------|-------|-------|-------|-------|-------| +| Average linkage | 73 | 0.51 | 0.37 | 0 | 0.19 | 0.45 | 0.94 | 1 | +| Birch_0.01 | 73 | 0.56 | 0.34 | 0 | 0.24 | 0.53 | 0.94 | 1 | +| Complete linkage | 73 | 0.49 | 0.33 | 0 | 0.26 | 0.4 | 0.75 | 1 | +| Gaussian mixtures | 73 | 0.64 | 0.37 | 0 | 0.3 | 0.82 | 0.98 | 1 | +| Genie_0.1 | 73 | 0.73 | 0.32 | 0 | 0.47 | 0.89 | 1 | 1 | +| Genie_0.3 | 73 | 0.77 | 0.28 | 0 | 0.52 | 0.94 | 1 | 1 | +| Genie_0.5 | 73 | 0.75 | 0.31 | 0 | 0.51 | 0.9 | 1 | 1 | +| ITM | 73 | 0.67 | 0.28 | 0.01 | 0.49 | 0.72 | 0.99 | 1 | +| K-means | 73 | 0.57 | 0.34 | 0 | 0.28 | 0.53 | 0.96 | 1 | +| Single linkage | 73 | 0.41 | 0.44 | 0 | 0 | 0.2 | 1 | 1 | +| Spectral_RBF_5 | 72 | 0.64 | 0.36 | 0 | 0.3 | 0.76 | 0.99 | 1 | +| Ward linkage | 73 | 0.56 | 0.33 | 0 | 0.25 | 0.51 | 0.93 | 1 | + + +Ranks for ``psi`` (best=1): + + +| | count | mean | std | min | 25% | 50% | 75% | max | +|-------------------|---------|--------|-------|-------|-------|-------|-------|-------| +| Average linkage | 72 | 6.7 | 3.6 | 1 | 4 | 7.5 | 10 | 12 | +| Birch_0.01 | 72 | 5.6 | 3.2 | 1 | 3 | 6 | 8 | 12 | +| Complete linkage | 72 | 7.9 | 2.9 | 1 | 7 | 8 | 10 | 12 | +| Gaussian mixtures | 72 | 4.6 | 4 | 1 | 1 | 3.5 | 9 | 12 | +| Genie_0.1 | 72 | 4 | 3.5 | 1 | 1 | 3 | 7 | 12 | +| Genie_0.3 | 72 | 3.6 | 3.3 | 1 | 1 | 2 | 5.2 | 12 | +| Genie_0.5 | 72 | 4.3 | 4.1 | 1 | 1 | 1 | 9.2 | 12 | +| ITM | 72 | 5.3 | 3.9 | 1 | 1 | 5 | 9 | 12 | +| K-means | 72 | 5 | 3.5 | 1 | 1 | 5 | 8 | 12 | +| Single linkage | 72 | 7.8 | 5 | 1 | 1 | 11 | 12 | 12 | +| Spectral_RBF_5 | 72 | 5.4 | 3.6 | 1 | 1 | 6 | 8.2 | 11 | +| Ward linkage | 72 | 5.9 | 3.3 | 1 | 3 | 6 | 8 | 12 | + + +Raw results for ``psi`` (best=1.0): + + +| | Average linkage | Birch_0.01 | Complete linkage | Gaussian mixtures | Genie_0.1 | Genie_0.3 | Genie_0.5 | ITM | K-means | Single linkage | Spectral_RBF_5 | Ward linkage | +|------------------------|-------------------|--------------|--------------------|----------------------|-------------|-------------|-------------|-------|-----------|------------------|------------------|----------------| +| fcps/atom | 0.19 | 0.19 | 0.17 | 0.09 | 1 | 1 | 1 | 1 | 0.27 | 1 | 1 | 0.19 | +| fcps/chainlink | 0.35 | 0.36 | 0.39 | 0.95 | 1 | 1 | 1 | 1 | 0.31 | 1 | 1 | 0.36 | +| fcps/engytime | 0.13 | 0.88 | 0.11 | 0.98 | 0.85 | 0.85 | 0.85 | 0.91 | 0.88 | 0 | 0.92 | 0.77 | +| fcps/hepta | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0.9 | 1 | 1 | 1 | 1 | +| fcps/lsun | 0.45 | 0.51 | 0.51 | 1 | 1 | 1 | 1 | 1 | 0.54 | 1 | 0.81 | 0.46 | +| fcps/target | 0.79 | 0.25 | 0.82 | 0.17 | 1 | 1 | 1 | 1 | 0.28 | 1 | 1 | 0.33 | +| fcps/tetra | 0.99 | 0.97 | 0.99 | 1 | 1 | 1 | 1 | 1 | 1 | 0.01 | 1 | 0.97 | +| fcps/twodiamonds | 1 | 1 | 0.99 | 1 | 0.98 | 0.98 | 0.98 | 0.99 | 1 | 0 | 1 | 1 | +| fcps/wingnut | 1 | 1 | 1 | 0.93 | 1 | 1 | 1 | 1 | 0.93 | 1 | 0.88 | 0.78 | +| graves/dense | 0.92 | 0.92 | 0.34 | 1 | 0.96 | 0.96 | 0.96 | 1 | 0.82 | 0.02 | 0.92 | 0.92 | +| graves/fuzzyx | 0.76 | 0.9 | 0.94 | 1 | 0.74 | 0.59 | 0.5 | 0.95 | 1 | 0 | 0.67 | 0.79 | +| graves/line | 0 | 0 | 0 | 1 | 0.02 | 1 | 1 | 0.2 | 0 | 1 | 1 | 0 | +| graves/parabolic | 0.75 | 0.6 | 0.67 | 0.7 | 0.84 | 0.84 | 0.84 | 0.78 | 0.76 | 0 | 0.79 | 0.72 | +| graves/ring | 0.2 | 0.21 | 0.3 | 0.09 | 1 | 1 | 1 | 1 | 0.01 | 1 | 1 | 0.21 | +| graves/ring_noisy | 0 | 0.2 | 0.35 | 0.08 | 1 | 1 | 1 | 1 | 0.25 | 0 | 1 | 0.24 | +| graves/ring_outliers | 0.19 | 0.17 | 0.15 | 0.14 | 1 | 1 | 1 | 1 | 0.24 | 1 | 1 | 0.17 | +| graves/zigzag | 0.54 | 0.64 | 0.38 | 0.96 | 1 | 1 | 1 | 0.79 | 0.6 | 1 | 0.71 | 0.53 | +| graves/zigzag_noisy | 0.51 | 0.68 | 0.42 | 0.93 | 0.8 | 1 | 1 | 0.62 | 0.52 | 0.4 | 0.68 | 0.71 | +| graves/zigzag_outliers | 0.17 | 0.4 | 0.29 | 0.98 | 1 | 1 | 1 | 0.83 | 0.33 | 0.4 | 0.22 | 0.25 | +| other/chameleon_t4_8k | 0.54 | 0.51 | 0.48 | 0.47 | 0.64 | 1 | 0.72 | 0.69 | 0.52 | 0 | 0.52 | 0.48 | +| other/chameleon_t5_8k | 1 | 1 | 0.73 | 1 | 1 | 1 | 0.71 | 0.57 | 1 | 0 | 1 | 1 | +| other/chameleon_t7_10k | 0.33 | 0.28 | 0.28 | 0.27 | 0.42 | 0.71 | 1 | 0.46 | 0.31 | 0 | 0.31 | 0.37 | +| other/chameleon_t8_8k | 0.28 | 0.34 | 0.26 | 0.34 | 0.41 | 0.34 | 0.68 | 0.4 | 0.28 | 0 | 0.27 | 0.28 | +| other/hdbscan | 0.19 | 0.56 | 0.39 | 0.74 | 0.97 | 0.69 | 0.69 | 0.78 | 0.72 | 0 | 0.34 | 0.86 | +| other/iris | 0.76 | 0.74 | 0.64 | 0.91 | 0.9 | 0.9 | 0.4 | 0.9 | 0.76 | 0.38 | 0.75 | 0.74 | +| other/iris5 | 0.77 | 0.75 | 0.65 | 0.91 | 0.34 | 0.34 | 0.91 | 0.28 | 0.76 | 0.41 | 0.75 | 0.75 | +| other/square | 0.24 | 0.01 | 0.26 | 0.17 | 1 | 1 | 1 | 1 | 0.15 | 1 | 0.26 | 0.34 | +| sipu/a1 | 0.94 | 0.94 | 0.93 | 0.97 | 0.95 | 0.88 | 0.76 | 0.74 | 0.98 | 0.2 | 0.95 | 0.93 | +| sipu/a2 | 0.95 | 0.95 | 0.92 | 0.97 | 0.96 | 0.92 | 0.75 | 0.77 | 0.98 | 0.14 | 0.9 | 0.93 | +| sipu/a3 | 0.95 | 0.95 | 0.93 | 0.94 | 0.97 | 0.94 | 0.77 | 0.76 | 0.94 | 0.1 | 0.92 | 0.95 | +| sipu/aggregation | 1 | 0.7 | 0.67 | 1 | 0.31 | 0.45 | 0.8 | 0.47 | 0.65 | 0.58 | 0.99 | 0.7 | +| sipu/compound | 0.64 | 0.67 | 0.64 | 0.67 | 0.59 | 0.67 | 0.7 | 0.54 | 0.68 | 0.64 | 0.65 | 0.67 | +| sipu/d31 | 0.91 | 0.95 | 0.95 | 0.96 | 0.95 | 0.9 | 0.69 | 0.81 | 0.97 | 0.15 | 0.95 | 0.94 | +| sipu/flame | 0.48 | 0.17 | 0 | 0.36 | 1 | 1 | 1 | 0.37 | 0.56 | 0.01 | 0.91 | 0.17 | +| sipu/jain | 0.74 | 0.53 | 0.74 | 0 | 0.01 | 1 | 1 | 0.39 | 0.39 | 0.21 | 0.53 | 0.53 | +| sipu/pathbased | 0.4 | 0.49 | 0.35 | 0.4 | 0.97 | 0.97 | 0.6 | 0.57 | 0.42 | 0 | 0.43 | 0.49 | +| sipu/r15 | 1 | 1 | 1 | 1 | 0.99 | 0.99 | 1 | 0.99 | 1 | 1 | 1 | 1 | +| sipu/s1 | 0.99 | 0.99 | 0.97 | 0.99 | 0.99 | 0.99 | 0.99 | 0.73 | 0.99 | 0.28 | 0.99 | 0.99 | +| sipu/s2 | 0.94 | 0.92 | 0.75 | 0.96 | 0.94 | 0.94 | 0.78 | 0.8 | 0.96 | 0 | 0.96 | 0.93 | +| sipu/s3 | 0.55 | 0.77 | 0.46 | 0.82 | 0.76 | 0.7 | 0.51 | 0.69 | 0.82 | 0 | 0.8 | 0.76 | +| sipu/s4 | 0.44 | 0.62 | 0.4 | 0.75 | 0.72 | 0.67 | 0.41 | 0.59 | 0.75 | 0 | 0.59 | 0.63 | +| sipu/spiral | 0.03 | 0.06 | 0.05 | 0.03 | 1 | 1 | 1 | 0.72 | 0.01 | 1 | 0.02 | 0.06 | +| sipu/unbalance | 1 | 1 | 0.74 | 1 | 0.17 | 0.21 | 0.26 | 0.15 | 1 | 0.78 | 1 | 1 | +| uci/ecoli | 0.39 | 0.41 | 0.34 | 0.39 | 0.29 | 0.3 | 0.27 | 0.33 | 0.4 | 0.18 | 0.34 | 0.41 | +| uci/glass | 0.06 | 0.21 | 0.21 | 0.22 | 0.19 | 0.27 | 0.21 | 0.22 | 0.27 | 0.05 | 0.16 | 0.22 | +| uci/ionosphere | 0.01 | 0.35 | 0.01 | 0.53 | 0.4 | 0.4 | 0 | 0.17 | 0.34 | 0.01 | 0.01 | 0.35 | +| uci/sonar | 0.03 | 0 | 0 | 0.01 | 0 | 0 | 0.04 | 0.01 | 0.05 | 0.01 | 0.01 | 0 | +| uci/statlog | 0 | 0.35 | 0.13 | 0.42 | 0.71 | 0.52 | 0.42 | 0.52 | 0.32 | 0 | nan | 0.32 | +| uci/wdbc | 0.06 | 0.3 | 0.06 | 0.73 | 0.06 | 0.3 | 0.3 | 0.73 | 0.5 | 0 | 0 | 0.3 | +| uci/wine | 0.23 | 0.48 | 0.42 | 0.83 | 0.46 | 0.46 | 0.13 | 0.46 | 0.48 | 0.03 | 0.25 | 0.48 | +| uci/yeast | 0.17 | 0.23 | 0.17 | 0.18 | 0.16 | 0.17 | 0.13 | 0.13 | 0.24 | 0.13 | 0.14 | 0.21 | +| wut/circles | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | +| wut/cross | 0.02 | 0.11 | 0.17 | 0.3 | 0.78 | 0.55 | 0.27 | 0.33 | 0.28 | 0 | 0.01 | 0.22 | +| wut/graph | 0.38 | 0.42 | 0.38 | 0.88 | 0.53 | 0.46 | 0.26 | 0.55 | 0.43 | 0.05 | 0.4 | 0.46 | +| wut/isolation | 0.03 | 0.03 | 0.02 | 0.01 | 1 | 1 | 1 | 1 | 0.01 | 1 | 0.01 | 0.04 | +| wut/labirynth | 0.31 | 0.42 | 0.31 | 0.57 | 0.35 | 0.46 | 0.69 | 0.69 | 0.31 | 0.52 | 0.34 | 0.29 | +| wut/mk1 | 0.99 | 0.99 | 0.97 | 0.99 | 0.99 | 0.99 | 0.99 | 0.55 | 0.99 | 0.35 | 0.99 | 0.99 | +| wut/mk2 | 0.07 | 0.09 | 0.09 | 0.08 | 1 | 1 | 1 | 1 | 0.09 | 1 | 0.07 | 0.06 | +| wut/mk3 | 0.35 | 0.89 | 0.87 | 0.92 | 0.82 | 0.82 | 0.43 | 0.56 | 0.93 | 0 | 0.92 | 0.91 | +| wut/mk4 | 0.1 | 0.2 | 0.3 | 0.55 | 1 | 1 | 1 | 0.63 | 0.25 | 1 | 0.27 | 0.24 | +| wut/olympic | 0.22 | 0.24 | 0.2 | 0.18 | 0.25 | 0.26 | 0.21 | 0.31 | 0.21 | 0 | 0.24 | 0.19 | +| wut/smile | 0.98 | 0.65 | 0.58 | 0.43 | 0.47 | 1 | 1 | 0.62 | 0.66 | 1 | 1 | 0.69 | +| wut/stripes | 0.04 | 0.03 | 0.09 | 0.11 | 1 | 1 | 1 | 1 | 0.1 | 1 | 0.11 | 0.08 | +| wut/trajectories | 1 | 1 | 0.62 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | +| wut/trapped_lovers | 0.13 | 0.15 | 0.34 | 0.17 | 0.5 | 1 | 1 | 0.49 | 0.21 | 1 | 0.77 | 0.21 | +| wut/twosplashes | 0.03 | 0.53 | 0.44 | 0.82 | 0.71 | 0.71 | 0.71 | 0.75 | 0.53 | 0.01 | 0.53 | 0.37 | +| wut/windows | 0.28 | 0.28 | 0.06 | 0.34 | 0.29 | 0.39 | 1 | 0.2 | 0.24 | 1 | 0.08 | 0.26 | +| wut/x1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | +| wut/x2 | 0.56 | 0.98 | 1 | 0.74 | 0.74 | 0.74 | 0.5 | 0.75 | 0.98 | 0 | 0.19 | 0.98 | +| wut/x3 | 0.92 | 0.98 | 0.59 | 0.94 | 0.89 | 0.92 | 0.92 | 0.49 | 1 | 0.02 | 0.45 | 0.98 | +| wut/z1 | 0.25 | 0.2 | 0.33 | 0.09 | 0.38 | 0.38 | 0.19 | 0.38 | 0.31 | 0.04 | 0.31 | 0.2 | +| wut/z2 | 0.45 | 0.56 | 0.39 | 1 | 0.56 | 0.47 | 0.63 | 0.56 | 0.51 | 0.47 | 0.99 | 0.51 | +| wut/z3 | 1 | 0.99 | 0.92 | 0.99 | 0.49 | 0.44 | 0.9 | 0.6 | 1 | 0.55 | 0.97 | 1 | + + +### Summary + +Medians and means of the partition similarity scores +(read row-wise, in groups of 2 columns): + +(fig:indices_small)= +```{figure} benchmarks_details-figures/indices_small-1.* +Heat map of median and mean similarity scores +``` + + + + +## Large Datase + +### Results + + + + +### nca + + +Summary statistics for ``nca`` (best=1.0): + + +| | count | mean | std | min | 25% | 50% | 75% | max | +|----------------|---------|--------|-------|-------|-------|-------|-------|-------| +| Genie_0.1 | 6 | 0.75 | 0.22 | 0.43 | 0.6 | 0.76 | 0.92 | 1 | +| Genie_0.3 | 6 | 0.61 | 0.33 | 0.16 | 0.38 | 0.6 | 0.88 | 1 | +| Genie_0.5 | 6 | 0.39 | 0.43 | 0 | 0.06 | 0.25 | 0.71 | 1 | +| ITM | 6 | 0.71 | 0.15 | 0.52 | 0.6 | 0.75 | 0.79 | 0.9 | +| K-means | 6 | 0.68 | 0.26 | 0.42 | 0.47 | 0.62 | 0.91 | 0.99 | +| Single linkage | 6 | 0.1 | 0.24 | 0 | 0 | 0 | 0 | 0.59 | +| Ward linkage | 6 | 0.69 | 0.23 | 0.44 | 0.5 | 0.65 | 0.86 | 1 | + + +Ranks for ``nca`` (best=1): + + +| | count | mean | std | min | 25% | 50% | 75% | max | +|----------------|---------|--------|-------|-------|-------|-------|-------|-------| +| Genie_0.1 | 6 | 1.7 | 0.8 | 1 | 1 | 1.5 | 2 | 3 | +| Genie_0.3 | 6 | 3.5 | 1.8 | 1 | 2.2 | 4 | 5 | 5 | +| Genie_0.5 | 6 | 5 | 2 | 1 | 5.2 | 6 | 6 | 6 | +| ITM | 6 | 3 | 2.1 | 1 | 1.2 | 2.5 | 4.5 | 6 | +| K-means | 6 | 3.5 | 1.6 | 1 | 2.5 | 4 | 4.8 | 5 | +| Single linkage | 6 | 6.8 | 0.4 | 6 | 7 | 7 | 7 | 7 | +| Ward linkage | 6 | 3 | 1.3 | 1 | 2.2 | 3.5 | 4 | 4 | + + +Raw results for ``nca`` (best=1.0): + + +| | Genie_0.1 | Genie_0.3 | Genie_0.5 | ITM | K-means | Single linkage | Ward linkage | +|---------------|-------------|-------------|-------------|-------|-----------|------------------|----------------| +| mnist/digits | 0.69 | 0.34 | 0.06 | 0.9 | 0.48 | 0 | 0.58 | +| mnist/fashion | 0.43 | 0.16 | 0.06 | 0.56 | 0.42 | 0 | 0.44 | +| sipu/birch1 | 0.94 | 0.94 | 0.8 | 0.8 | 0.96 | 0 | 0.91 | +| sipu/birch2 | 1 | 1 | 1 | 0.77 | 0.99 | 0.59 | 1 | +| sipu/worms_2 | 0.57 | 0.49 | 0.44 | 0.52 | 0.46 | 0 | 0.47 | +| sipu/worms_64 | 0.84 | 0.7 | 0 | 0.74 | 0.75 | 0 | 0.73 | + + +### ar + + +Summary statistics for ``ar`` (best=1.0): + + +| | count | mean | std | min | 25% | 50% | 75% | max | +|----------------|---------|--------|-------|-------|-------|-------|-------|-------| +| Genie_0.1 | 6 | 0.66 | 0.26 | 0.31 | 0.48 | 0.65 | 0.85 | 1 | +| Genie_0.3 | 6 | 0.52 | 0.37 | 0.07 | 0.25 | 0.46 | 0.81 | 1 | +| Genie_0.5 | 6 | 0.35 | 0.43 | 0 | 0.02 | 0.17 | 0.63 | 1 | +| ITM | 6 | 0.6 | 0.18 | 0.37 | 0.46 | 0.65 | 0.71 | 0.8 | +| K-means | 6 | 0.61 | 0.31 | 0.32 | 0.35 | 0.52 | 0.88 | 0.99 | +| Single linkage | 6 | 0.07 | 0.18 | 0 | 0 | 0 | 0 | 0.44 | +| Ward linkage | 6 | 0.58 | 0.27 | 0.33 | 0.38 | 0.47 | 0.75 | 1 | + + +Ranks for ``ar`` (best=1): + + +| | count | mean | std | min | 25% | 50% | 75% | max | +|----------------|---------|--------|-------|-------|-------|-------|-------|-------| +| Genie_0.1 | 6 | 1.8 | 1.2 | 1 | 1 | 1.5 | 2 | 4 | +| Genie_0.3 | 6 | 3.2 | 1.7 | 1 | 2 | 3 | 4.8 | 5 | +| Genie_0.5 | 6 | 4.8 | 1.9 | 1 | 5 | 5.5 | 6 | 6 | +| ITM | 6 | 3.3 | 2.3 | 1 | 1.5 | 3 | 5.2 | 6 | +| K-means | 6 | 3.3 | 1.6 | 1 | 2.2 | 3.5 | 4.8 | 5 | +| Single linkage | 6 | 6.8 | 0.4 | 6 | 7 | 7 | 7 | 7 | +| Ward linkage | 6 | 3.2 | 1.5 | 1 | 2.2 | 3.5 | 4 | 5 | + + +Raw results for ``ar`` (best=1.0): + + +| | Genie_0.1 | Genie_0.3 | Genie_0.5 | ITM | K-means | Single linkage | Ward linkage | +|---------------|-------------|-------------|-------------|-------|-----------|------------------|----------------| +| mnist/digits | 0.6 | 0.21 | 0.02 | 0.8 | 0.37 | 0 | 0.53 | +| mnist/fashion | 0.31 | 0.07 | 0.02 | 0.41 | 0.35 | 0 | 0.37 | +| sipu/birch1 | 0.89 | 0.89 | 0.73 | 0.72 | 0.94 | 0 | 0.83 | +| sipu/birch2 | 1 | 1 | 1 | 0.71 | 0.99 | 0.44 | 1 | +| sipu/worms_2 | 0.45 | 0.38 | 0.32 | 0.37 | 0.32 | 0 | 0.33 | +| sipu/worms_64 | 0.7 | 0.54 | 0 | 0.59 | 0.67 | 0 | 0.42 | + + +### fm + + +Summary statistics for ``fm`` (best=1.0): + + +| | count | mean | std | min | 25% | 50% | 75% | max | +|----------------|---------|--------|-------|-------|-------|-------|-------|-------| +| Genie_0.1 | 6 | 0.69 | 0.23 | 0.42 | 0.51 | 0.68 | 0.85 | 1 | +| Genie_0.3 | 6 | 0.6 | 0.29 | 0.31 | 0.4 | 0.49 | 0.82 | 1 | +| Genie_0.5 | 6 | 0.49 | 0.31 | 0.2 | 0.31 | 0.34 | 0.65 | 1 | +| ITM | 6 | 0.62 | 0.16 | 0.39 | 0.51 | 0.66 | 0.72 | 0.82 | +| K-means | 6 | 0.64 | 0.28 | 0.35 | 0.42 | 0.56 | 0.88 | 0.99 | +| Single linkage | 6 | 0.28 | 0.15 | 0.1 | 0.19 | 0.26 | 0.32 | 0.53 | +| Ward linkage | 6 | 0.61 | 0.25 | 0.36 | 0.45 | 0.52 | 0.77 | 1 | + + +Ranks for ``fm`` (best=1): + + +| | count | mean | std | min | 25% | 50% | 75% | max | +|----------------|---------|--------|-------|-------|-------|-------|-------|-------| +| Genie_0.1 | 6 | 1.7 | 0.8 | 1 | 1 | 1.5 | 2 | 3 | +| Genie_0.3 | 6 | 3.3 | 2 | 1 | 2 | 3 | 4.8 | 6 | +| Genie_0.5 | 6 | 4.8 | 2.1 | 1 | 4.2 | 5.5 | 6 | 7 | +| ITM | 6 | 3.3 | 2.3 | 1 | 1.5 | 3 | 5.2 | 6 | +| K-means | 6 | 3.5 | 1.9 | 1 | 2.2 | 3.5 | 4.8 | 6 | +| Single linkage | 6 | 6.3 | 0.8 | 5 | 6 | 6.5 | 7 | 7 | +| Ward linkage | 6 | 3.2 | 1.5 | 1 | 2.2 | 3.5 | 4 | 5 | + + +Raw results for ``fm`` (best=1.0): + + +| | Genie_0.1 | Genie_0.3 | Genie_0.5 | ITM | K-means | Single linkage | Ward linkage | +|---------------|-------------|-------------|-------------|-------|-----------|------------------|----------------| +| mnist/digits | 0.65 | 0.41 | 0.31 | 0.82 | 0.43 | 0.32 | 0.58 | +| mnist/fashion | 0.42 | 0.31 | 0.31 | 0.47 | 0.42 | 0.32 | 0.44 | +| sipu/birch1 | 0.89 | 0.89 | 0.75 | 0.72 | 0.94 | 0.1 | 0.83 | +| sipu/birch2 | 1 | 1 | 1 | 0.72 | 0.99 | 0.53 | 1 | +| sipu/worms_2 | 0.47 | 0.4 | 0.36 | 0.39 | 0.35 | 0.19 | 0.36 | +| sipu/worms_64 | 0.72 | 0.58 | 0.2 | 0.61 | 0.69 | 0.2 | 0.47 | + + +### ami + + +Summary statistics for ``ami`` (best=1.0): + + +| | count | mean | std | min | 25% | 50% | 75% | max | +|----------------|---------|--------|-------|-------|-------|-------|-------|-------| +| Genie_0.1 | 6 | 0.77 | 0.17 | 0.57 | 0.67 | 0.74 | 0.9 | 1 | +| Genie_0.3 | 6 | 0.7 | 0.24 | 0.37 | 0.59 | 0.66 | 0.88 | 1 | +| Genie_0.5 | 6 | 0.49 | 0.42 | 0 | 0.19 | 0.41 | 0.84 | 1 | +| ITM | 6 | 0.75 | 0.15 | 0.56 | 0.63 | 0.74 | 0.87 | 0.91 | +| K-means | 6 | 0.72 | 0.22 | 0.5 | 0.53 | 0.66 | 0.91 | 1 | +| Single linkage | 6 | 0.15 | 0.36 | 0 | 0 | 0 | 0 | 0.89 | +| Ward linkage | 6 | 0.75 | 0.17 | 0.56 | 0.62 | 0.7 | 0.87 | 1 | + + +Ranks for ``ami`` (best=1): + + +| | count | mean | std | min | 25% | 50% | 75% | max | +|----------------|---------|--------|-------|-------|-------|-------|-------|-------| +| Genie_0.1 | 6 | 1.3 | 0.5 | 1 | 1 | 1 | 1.8 | 2 | +| Genie_0.3 | 6 | 3 | 1.5 | 1 | 2 | 3 | 4 | 5 | +| Genie_0.5 | 6 | 4.5 | 2 | 1 | 4 | 5 | 6 | 6 | +| ITM | 6 | 3.8 | 2.1 | 1 | 2.2 | 4 | 5.8 | 6 | +| K-means | 6 | 2.8 | 2 | 1 | 1 | 2.5 | 4.8 | 5 | +| Single linkage | 6 | 6.8 | 0.4 | 6 | 7 | 7 | 7 | 7 | +| Ward linkage | 6 | 2.7 | 1.6 | 1 | 1.2 | 2.5 | 3.8 | 5 | + + +Raw results for ``ami`` (best=1.0): + + +| | Genie_0.1 | Genie_0.3 | Genie_0.5 | ITM | K-means | Single linkage | Ward linkage | +|---------------|-------------|-------------|-------------|-------|-----------|------------------|----------------| +| mnist/digits | 0.75 | 0.57 | 0.18 | 0.83 | 0.5 | 0 | 0.68 | +| mnist/fashion | 0.57 | 0.37 | 0.21 | 0.56 | 0.51 | 0 | 0.56 | +| sipu/birch1 | 0.94 | 0.94 | 0.92 | 0.89 | 0.98 | 0 | 0.92 | +| sipu/birch2 | 1 | 1 | 1 | 0.91 | 1 | 0.89 | 1 | +| sipu/worms_2 | 0.65 | 0.63 | 0.61 | 0.62 | 0.6 | 0 | 0.6 | +| sipu/worms_64 | 0.72 | 0.68 | 0 | 0.66 | 0.72 | 0 | 0.72 | + + +### nmi + + +Summary statistics for ``nmi`` (best=1.0): + + +| | count | mean | std | min | 25% | 50% | 75% | max | +|----------------|---------|--------|-------|-------|-------|-------|-------|-------| +| Genie_0.1 | 6 | 0.77 | 0.17 | 0.57 | 0.67 | 0.74 | 0.9 | 1 | +| Genie_0.3 | 6 | 0.7 | 0.24 | 0.37 | 0.59 | 0.66 | 0.88 | 1 | +| Genie_0.5 | 6 | 0.49 | 0.42 | 0 | 0.19 | 0.41 | 0.84 | 1 | +| ITM | 6 | 0.75 | 0.15 | 0.56 | 0.63 | 0.75 | 0.87 | 0.91 | +| K-means | 6 | 0.72 | 0.22 | 0.5 | 0.53 | 0.66 | 0.91 | 1 | +| Single linkage | 6 | 0.15 | 0.36 | 0 | 0 | 0 | 0 | 0.89 | +| Ward linkage | 6 | 0.75 | 0.17 | 0.56 | 0.62 | 0.7 | 0.87 | 1 | + + +Ranks for ``nmi`` (best=1): + + +| | count | mean | std | min | 25% | 50% | 75% | max | +|----------------|---------|--------|-------|-------|-------|-------|-------|-------| +| Genie_0.1 | 6 | 1.5 | 0.5 | 1 | 1 | 1.5 | 2 | 2 | +| Genie_0.3 | 6 | 3 | 1.5 | 1 | 2 | 3 | 4 | 5 | +| Genie_0.5 | 6 | 4.5 | 2 | 1 | 4 | 5 | 6 | 6 | +| ITM | 6 | 3.8 | 2.1 | 1 | 2.2 | 4 | 5.8 | 6 | +| K-means | 6 | 3 | 1.9 | 1 | 1.2 | 3 | 4.8 | 5 | +| Single linkage | 6 | 6.8 | 0.4 | 6 | 7 | 7 | 7 | 7 | +| Ward linkage | 6 | 2.7 | 1.6 | 1 | 1.2 | 2.5 | 3.8 | 5 | + + +Raw results for ``nmi`` (best=1.0): + + +| | Genie_0.1 | Genie_0.3 | Genie_0.5 | ITM | K-means | Single linkage | Ward linkage | +|---------------|-------------|-------------|-------------|-------|-----------|------------------|----------------| +| mnist/digits | 0.75 | 0.57 | 0.18 | 0.83 | 0.5 | 0 | 0.68 | +| mnist/fashion | 0.57 | 0.37 | 0.21 | 0.56 | 0.51 | 0 | 0.56 | +| sipu/birch1 | 0.94 | 0.94 | 0.92 | 0.89 | 0.98 | 0 | 0.92 | +| sipu/birch2 | 1 | 1 | 1 | 0.91 | 1 | 0.89 | 1 | +| sipu/worms_2 | 0.65 | 0.63 | 0.61 | 0.62 | 0.6 | 0 | 0.6 | +| sipu/worms_64 | 0.72 | 0.68 | 0 | 0.66 | 0.72 | 0 | 0.73 | + + +### npa + + +Summary statistics for ``npa`` (best=1.0): + + +| | count | mean | std | min | 25% | 50% | 75% | max | +|----------------|---------|--------|-------|-------|-------|-------|-------|-------| +| Genie_0.1 | 6 | 0.75 | 0.22 | 0.43 | 0.61 | 0.77 | 0.92 | 1 | +| Genie_0.3 | 6 | 0.61 | 0.33 | 0.16 | 0.39 | 0.61 | 0.88 | 1 | +| Genie_0.5 | 6 | 0.4 | 0.43 | 0 | 0.06 | 0.26 | 0.71 | 1 | +| ITM | 6 | 0.71 | 0.15 | 0.49 | 0.6 | 0.75 | 0.79 | 0.89 | +| K-means | 6 | 0.67 | 0.26 | 0.42 | 0.44 | 0.62 | 0.91 | 0.99 | +| Single linkage | 6 | 0.1 | 0.24 | 0 | 0 | 0.01 | 0.02 | 0.59 | +| Ward linkage | 6 | 0.68 | 0.23 | 0.44 | 0.48 | 0.65 | 0.86 | 1 | + + +Ranks for ``npa`` (best=1): + + +| | count | mean | std | min | 25% | 50% | 75% | max | +|----------------|---------|--------|-------|-------|-------|-------|-------|-------| +| Genie_0.1 | 6 | 1.7 | 0.8 | 1 | 1 | 1.5 | 2 | 3 | +| Genie_0.3 | 6 | 3.3 | 1.9 | 1 | 2 | 3.5 | 5 | 5 | +| Genie_0.5 | 6 | 4.8 | 1.9 | 1 | 5 | 5.5 | 6 | 6 | +| ITM | 6 | 3.2 | 2 | 1 | 1.5 | 3 | 4.5 | 6 | +| K-means | 6 | 3.7 | 1.9 | 1 | 2.5 | 4 | 4.8 | 6 | +| Single linkage | 6 | 6.8 | 0.4 | 6 | 7 | 7 | 7 | 7 | +| Ward linkage | 6 | 3 | 1.3 | 1 | 2.2 | 3.5 | 4 | 4 | + + +Raw results for ``npa`` (best=1.0): + + +| | Genie_0.1 | Genie_0.3 | Genie_0.5 | ITM | K-means | Single linkage | Ward linkage | +|---------------|-------------|-------------|-------------|-------|-----------|------------------|----------------| +| mnist/digits | 0.7 | 0.35 | 0.07 | 0.89 | 0.48 | 0.01 | 0.58 | +| mnist/fashion | 0.43 | 0.16 | 0.06 | 0.56 | 0.42 | 0 | 0.44 | +| sipu/birch1 | 0.94 | 0.94 | 0.8 | 0.8 | 0.96 | 0 | 0.91 | +| sipu/birch2 | 1 | 1 | 1 | 0.77 | 0.99 | 0.59 | 1 | +| sipu/worms_2 | 0.58 | 0.51 | 0.44 | 0.49 | 0.43 | 0.03 | 0.45 | +| sipu/worms_64 | 0.84 | 0.7 | 0 | 0.74 | 0.75 | 0 | 0.73 | + + +### psi + + +Summary statistics for ``psi`` (best=1.0): + + +| | count | mean | std | min | 25% | 50% | 75% | max | +|----------------|---------|--------|-------|-------|-------|-------|-------|-------| +| Genie_0.1 | 6 | 0.7 | 0.25 | 0.37 | 0.51 | 0.72 | 0.9 | 1 | +| Genie_0.3 | 6 | 0.57 | 0.35 | 0.13 | 0.31 | 0.53 | 0.86 | 1 | +| Genie_0.5 | 6 | 0.36 | 0.42 | 0 | 0.04 | 0.18 | 0.63 | 1 | +| ITM | 6 | 0.65 | 0.16 | 0.39 | 0.57 | 0.69 | 0.73 | 0.85 | +| K-means | 6 | 0.62 | 0.3 | 0.31 | 0.36 | 0.57 | 0.88 | 0.98 | +| Single linkage | 6 | 0.09 | 0.21 | 0 | 0 | 0 | 0 | 0.52 | +| Ward linkage | 6 | 0.62 | 0.28 | 0.31 | 0.39 | 0.6 | 0.83 | 1 | + + +Ranks for ``psi`` (best=1): + + +| | count | mean | std | min | 25% | 50% | 75% | max | +|----------------|---------|--------|-------|-------|-------|-------|-------|-------| +| Genie_0.1 | 6 | 1.5 | 0.5 | 1 | 1 | 1.5 | 2 | 2 | +| Genie_0.3 | 6 | 3.3 | 1.9 | 1 | 2 | 3.5 | 5 | 5 | +| Genie_0.5 | 6 | 4.8 | 2 | 1 | 4.5 | 6 | 6 | 6 | +| ITM | 6 | 3.2 | 2 | 1 | 1.5 | 3 | 4.5 | 6 | +| K-means | 6 | 3.7 | 1.5 | 1 | 3.2 | 4 | 4.8 | 5 | +| Single linkage | 6 | 6.8 | 0.4 | 6 | 7 | 7 | 7 | 7 | +| Ward linkage | 6 | 3 | 1.4 | 1 | 2.2 | 3 | 3.8 | 5 | + + +Raw results for ``psi`` (best=1.0): + + +| | Genie_0.1 | Genie_0.3 | Genie_0.5 | ITM | K-means | Single linkage | Ward linkage | +|---------------|-------------|-------------|-------------|-------|-----------|------------------|----------------| +| mnist/digits | 0.6 | 0.28 | 0.05 | 0.85 | 0.43 | 0 | 0.5 | +| mnist/fashion | 0.37 | 0.13 | 0.04 | 0.53 | 0.33 | 0 | 0.35 | +| sipu/birch1 | 0.92 | 0.92 | 0.73 | 0.75 | 0.94 | 0 | 0.87 | +| sipu/birch2 | 1 | 1 | 1 | 0.69 | 0.98 | 0.52 | 1 | +| sipu/worms_2 | 0.48 | 0.4 | 0.32 | 0.39 | 0.31 | 0 | 0.31 | +| sipu/worms_64 | 0.84 | 0.66 | 0 | 0.7 | 0.7 | 0 | 0.71 | + + + +### Summary + +Medians and means of the partition similarity scores: + +(fig:indices_large)= +```{figure} benchmarks_details-figures/indices_large-3.* +Heat map of median and mean similarity scores +``` diff --git a/.devel/sphinx/weave/benchmarks_details.rst b/.devel/sphinx/weave/benchmarks_details.rst deleted file mode 100644 index 66efc87b..00000000 --- a/.devel/sphinx/weave/benchmarks_details.rst +++ /dev/null @@ -1,1326 +0,0 @@ -Benchmarks — Detailed Results -============================= - -In one of the :any:`above sections ` -we have summarised the AR indices based on the datasets from -the `Benchmark Suite for Clustering Algorithms – Version 1 `_ -:cite:`clustering_benchmarks_v1`. -In this section we present more detailed results for -some other partition similarity measures implemented in the `genieclust` -package — Fowlkes–Mallows :cite:`fm`, adjusted Rand :cite:`comparing_partitions`, -adjusted and normalised mutual information :cite:`nmi`, -normalised pivoted accuracy (which is based on set-matching classification rate), -normalised clustering accuracy :cite:`nca`, -and pair sets index :cite:`psi`, -see the API documentation of :mod:`genieclust.compare_partitions` for more details. -In each case, a score of 1.0 denotes perfect agreement between the clustering -results and the reference partitions. - -At the preprocessing stage, features with variance of 0 were removed. -Every dataset has been centred at **0** and scaled so that is has total -variance of 1. Then, a tiny bit of Gaussian noise has been added to each -item. Clustering is performed with respect to the Euclidean distance -(wherever applicable). - -All raw results can be found `here `_. - - - - - -Small Datasets --------------- - - - - - - - - - - -nca -^^^ - -Summary statistics for ``nca`` (best=1.0): - - -================= ======= ====== ===== ===== ===== ===== ===== ===== -.. count mean std min 25% 50% 75% max -================= ======= ====== ===== ===== ===== ===== ===== ===== -Average linkage 73 0.6 0.34 0 0.34 0.64 0.96 1 -Birch_0.01 73 0.67 0.3 0.01 0.44 0.76 0.96 1 -Complete linkage 73 0.6 0.3 0.02 0.41 0.57 0.83 1 -Gaussian mixtures 73 0.72 0.32 0.01 0.53 0.85 0.99 1 -Genie_0.1 73 0.81 0.24 0 0.7 0.93 1 1 -Genie_0.3 73 0.82 0.24 0 0.64 0.95 1 1 -Genie_0.5 73 0.79 0.26 0.15 0.63 0.94 1 1 -ITM 73 0.77 0.22 0.09 0.67 0.8 0.99 1 -K-means 73 0.67 0.3 0.01 0.46 0.7 0.97 1 -Single linkage 73 0.44 0.43 0 0.01 0.3 1 1 -Spectral_RBF_5 72 0.69 0.34 0 0.44 0.84 1 1 -Ward linkage 73 0.67 0.29 0.05 0.44 0.78 0.95 1 -================= ======= ====== ===== ===== ===== ===== ===== ===== - - -Ranks for ``nca`` (best=1): - - -================= ======= ====== ===== ===== ===== ===== ===== ===== -.. count mean std min 25% 50% 75% max -================= ======= ====== ===== ===== ===== ===== ===== ===== -Average linkage 72 6.7 3.7 1 4 8 10 12 -Birch_0.01 72 5.5 3.2 1 2 6 8 12 -Complete linkage 72 7.8 2.8 1 7 8 10 12 -Gaussian mixtures 72 4.6 3.9 1 1 3.5 7.2 12 -Genie_0.1 72 3.5 3.3 1 1 2 4.2 11 -Genie_0.3 72 3.8 3.5 1 1 2 7 12 -Genie_0.5 72 4.4 4 1 1 2 9 11 -ITM 72 5.1 3.7 1 1 5 8.2 12 -K-means 72 5.2 3.6 1 1 5.5 8 12 -Single linkage 72 8.1 5 1 1 12 12 12 -Spectral_RBF_5 72 5.4 3.8 1 1 6 9 12 -Ward linkage 72 5.8 3.4 1 2.8 6 8 12 -================= ======= ====== ===== ===== ===== ===== ===== ===== - - -Raw results for ``nca`` (best=1.0): - - -====================== ================= ============ ================== ==================== =========== =========== =========== ===== ========= ================ ================ ============== -.. Average linkage Birch_0.01 Complete linkage Gaussian mixtures Genie_0.1 Genie_0.3 Genie_0.5 ITM K-means Single linkage Spectral_RBF_5 Ward linkage -====================== ================= ============ ================== ==================== =========== =========== =========== ===== ========= ================ ================ ============== -fcps/atom 0.31 0.31 0.29 0.17 1 1 1 1 0.43 1 1 0.31 -fcps/chainlink 0.52 0.53 0.56 0.95 1 1 1 1 0.31 1 1 0.53 -fcps/engytime 0.23 0.88 0.21 0.99 0.92 0.92 0.92 0.91 0.92 0 0.96 0.86 -fcps/hepta 1 1 1 1 1 1 1 0.94 1 1 1 1 -fcps/lsun 0.61 0.66 0.66 1 1 1 1 1 0.7 1 0.86 0.64 -fcps/target 0.86 0.84 0.88 0.84 1 1 1 1 0.84 1 1 0.83 -fcps/tetra 1 0.98 0.99 1 1 1 1 1 1 0.01 1 0.98 -fcps/twodiamonds 1 1 0.99 1 0.99 0.99 0.99 0.99 1 0 1 1 -fcps/wingnut 1 1 1 0.93 1 1 1 1 0.93 1 0.88 0.78 -graves/dense 0.96 0.96 0.51 1 0.98 0.98 0.98 1 0.9 0.03 0.96 0.96 -graves/fuzzyx 0.86 0.95 0.97 1 0.84 0.72 0.63 0.97 1 0 0.78 0.88 -graves/line 0.37 0.37 0.36 1 0.5 1 1 0.6 0.46 1 1 0.25 -graves/parabolic 0.77 0.76 0.77 0.73 0.9 0.9 0.9 0.8 0.77 0 0.81 0.79 -graves/ring 0.34 0.34 0.46 0.17 1 1 1 1 0.01 1 1 0.34 -graves/ring_noisy 0 0.34 0.52 0.16 1 1 1 1 0.41 0 1 0.39 -graves/ring_outliers 0.58 0.33 0.56 0.31 1 1 1 1 0.57 1 1 0.34 -graves/zigzag 0.69 0.74 0.57 0.98 1 1 1 0.87 0.7 1 0.8 0.64 -graves/zigzag_noisy 0.65 0.77 0.58 0.95 0.88 1 1 0.75 0.64 0.5 0.78 0.79 -graves/zigzag_outliers 0.5 0.68 0.57 0.99 1 1 1 0.91 0.62 0.5 0.4 0.62 -other/chameleon_t4_8k 0.64 0.62 0.53 0.59 0.72 1 0.8 0.78 0.61 0 0.61 0.58 -other/chameleon_t5_8k 1 1 0.8 1 1 1 0.8 0.69 1 0 1 1 -other/chameleon_t7_10k 0.59 0.62 0.56 0.59 0.73 0.87 1 0.75 0.54 0 0.55 0.64 -other/chameleon_t8_8k 0.45 0.49 0.48 0.59 0.67 0.48 0.73 0.62 0.52 0 0.5 0.48 -other/hdbscan 0.38 0.69 0.52 0.83 0.98 0.78 0.79 0.89 0.77 0 0.45 0.9 -other/iris 0.86 0.84 0.76 0.95 0.94 0.94 0.55 0.94 0.84 0.52 0.85 0.84 -other/iris5 0.86 0.84 0.76 0.95 0.41 0.41 0.94 0.28 0.84 0.52 0.85 0.84 -other/square 0.39 0.01 0.41 0.19 1 1 1 1 0.17 1 0.41 0.5 -sipu/a1 0.96 0.96 0.96 0.98 0.97 0.91 0.84 0.82 0.98 0.37 0.97 0.95 -sipu/a2 0.97 0.97 0.95 0.98 0.97 0.94 0.83 0.83 0.98 0.3 0.93 0.96 -sipu/a3 0.97 0.97 0.96 0.96 0.98 0.95 0.84 0.83 0.96 0.25 0.95 0.97 -sipu/aggregation 1 0.78 0.75 1 0.58 0.58 0.88 0.66 0.75 0.66 1 0.78 -sipu/compound 0.67 0.87 0.67 0.84 0.84 0.87 0.76 0.79 0.81 0.67 0.77 0.87 -sipu/d31 0.94 0.96 0.96 0.97 0.97 0.93 0.76 0.85 0.98 0.24 0.97 0.96 -sipu/flame 0.74 0.59 0.21 0.68 1 1 1 0.68 0.74 0.02 0.91 0.59 -sipu/jain 0.79 0.81 0.79 0.43 0.49 1 1 0.71 0.7 0.27 0.81 0.81 -sipu/pathbased 0.63 0.67 0.56 0.61 0.98 0.98 0.76 0.68 0.65 0.01 0.65 0.67 -sipu/r15 1 1 1 1 0.99 0.99 1 0.99 1 1 1 1 -sipu/s1 0.99 0.99 0.98 0.99 0.99 0.99 0.99 0.8 0.99 0.43 0.99 0.99 -sipu/s2 0.95 0.95 0.83 0.97 0.95 0.95 0.83 0.86 0.97 0 0.97 0.95 -sipu/s3 0.66 0.8 0.57 0.85 0.82 0.76 0.63 0.76 0.84 0 0.84 0.81 -sipu/s4 0.59 0.68 0.52 0.79 0.77 0.73 0.54 0.67 0.78 0 0.69 0.68 -sipu/spiral 0.04 0.07 0.06 0.03 1 1 1 0.83 0.01 1 0.02 0.07 -sipu/unbalance 1 1 0.8 1 0.24 0.29 0.35 0.22 1 0.86 1 1 -uci/ecoli 0.5 0.52 0.47 0.51 0.42 0.44 0.4 0.46 0.57 0.2 0.48 0.52 -uci/glass 0.08 0.31 0.28 0.3 0.39 0.42 0.32 0.43 0.37 0.06 0.23 0.34 -uci/ionosphere 0.01 0.44 0.02 0.65 0.41 0.41 0.15 0.36 0.44 0.01 0.01 0.44 -uci/sonar 0.05 0.05 0.04 0.09 0 0 0.16 0.09 0.11 0.01 0.01 0.05 -uci/statlog 0 0.45 0.17 0.53 0.74 0.59 0.51 0.6 0.43 0 nan 0.41 -uci/wdbc 0.09 0.41 0.09 0.8 0.45 0.4 0.4 0.8 0.61 0 0 0.41 -uci/wine 0.34 0.53 0.48 0.92 0.58 0.58 0.22 0.57 0.54 0.04 0.36 0.53 -uci/yeast 0.19 0.37 0.3 0.32 0.34 0.33 0.28 0.33 0.38 0.14 0.16 0.35 -wut/circles 1 1 1 1 1 1 1 1 1 1 1 1 -wut/cross 0.03 0.17 0.32 0.48 0.87 0.68 0.38 0.44 0.39 0 0.01 0.31 -wut/graph 0.56 0.58 0.56 0.93 0.63 0.59 0.45 0.63 0.59 0.07 0.58 0.62 -wut/isolation 0.03 0.04 0.02 0.01 1 1 1 1 0.01 1 0.01 0.05 -wut/labirynth 0.59 0.8 0.7 0.66 0.74 0.62 0.71 0.7 0.56 0.6 0.56 0.55 -wut/mk1 1 1 0.98 1 1 1 1 0.68 1 0.5 1 1 -wut/mk2 0.07 0.11 0.09 0.09 1 1 1 1 0.09 1 0.08 0.07 -wut/mk3 0.5 0.92 0.92 0.94 0.88 0.88 0.58 0.68 0.94 0 0.94 0.93 -wut/mk4 0.18 0.31 0.42 0.55 1 1 1 0.74 0.37 1 0.39 0.36 -wut/olympic 0.27 0.26 0.21 0.2 0.32 0.29 0.25 0.36 0.22 0 0.26 0.21 -wut/smile 0.99 0.86 0.63 0.5 0.55 1 1 0.7 0.86 1 1 0.88 -wut/stripes 0.05 0.04 0.11 0.11 1 1 1 1 0.11 1 0.11 0.12 -wut/trajectories 1 1 0.74 1 1 1 1 1 1 1 1 1 -wut/trapped_lovers 0.58 0.61 0.72 0.61 0.76 1 1 0.76 0.64 1 0.93 0.65 -wut/twosplashes 0.05 0.53 0.47 0.82 0.71 0.71 0.71 0.86 0.53 0.01 0.53 0.44 -wut/windows 0.8 0.8 0.32 0.82 0.57 0.57 1 0.36 0.69 1 0.3 0.79 -wut/x1 1 1 1 1 1 1 1 1 1 1 1 1 -wut/x2 0.65 0.98 1 0.87 0.87 0.87 0.59 0.87 0.98 0.01 0.34 0.98 -wut/x3 0.94 0.99 0.74 0.96 0.93 0.94 0.94 0.6 1 0.03 0.54 0.99 -wut/z1 0.32 0.27 0.36 0.11 0.5 0.5 0.3 0.5 0.31 0.06 0.33 0.27 -wut/z2 0.75 0.88 0.73 1 0.88 0.64 0.64 0.88 0.86 0.5 0.99 0.84 -wut/z3 1 0.99 0.95 0.99 0.7 0.49 0.94 0.83 1 0.66 0.98 1 -====================== ================= ============ ================== ==================== =========== =========== =========== ===== ========= ================ ================ ============== - - -ar -^^ - -Summary statistics for ``ar`` (best=1.0): - - -================= ======= ====== ===== ===== ===== ===== ===== ===== -.. count mean std min 25% 50% 75% max -================= ======= ====== ===== ===== ===== ===== ===== ===== -Average linkage 73 0.51 0.38 0 0.11 0.51 0.93 1 -Birch_0.01 73 0.55 0.36 0 0.22 0.56 0.93 1 -Complete linkage 73 0.48 0.34 0 0.21 0.4 0.79 1 -Gaussian mixtures 73 0.65 0.37 0 0.4 0.82 0.98 1 -Genie_0.1 73 0.74 0.31 0 0.53 0.88 1 1 -Genie_0.3 73 0.78 0.27 0 0.59 0.94 1 1 -Genie_0.5 73 0.77 0.3 0 0.66 0.92 1 1 -ITM 73 0.68 0.27 0 0.53 0.69 0.99 1 -K-means 73 0.55 0.35 0 0.2 0.51 0.95 1 -Single linkage 73 0.44 0.45 0 0 0.32 1 1 -Spectral_RBF_5 72 0.63 0.37 0 0.33 0.73 0.99 1 -Ward linkage 73 0.54 0.35 0 0.22 0.54 0.91 1 -================= ======= ====== ===== ===== ===== ===== ===== ===== - - -Ranks for ``ar`` (best=1): - - -================= ======= ====== ===== ===== ===== ===== ===== ===== -.. count mean std min 25% 50% 75% max -================= ======= ====== ===== ===== ===== ===== ===== ===== -Average linkage 72 6.6 3.5 1 4.8 7 9.2 12 -Birch_0.01 72 5.8 2.9 1 4 6 8 12 -Complete linkage 72 7.7 3.2 1 6 8 11 12 -Gaussian mixtures 72 4.2 3.6 1 1 3 7 12 -Genie_0.1 72 3.8 3.3 1 1 3 6 12 -Genie_0.3 72 3.3 3 1 1 2 5 11 -Genie_0.5 72 4.2 3.9 1 1 2 8 11 -ITM 72 5.4 3.9 1 1 5 9 12 -K-means 72 5.6 3.8 1 1 6 9 12 -Single linkage 72 7.4 5.1 1 1 11 12 12 -Spectral_RBF_5 72 5.2 3.5 1 1 6 8 11 -Ward linkage 72 6 3 1 4 6 8 12 -================= ======= ====== ===== ===== ===== ===== ===== ===== - - -Raw results for ``ar`` (best=1.0): - - -====================== ================= ============ ================== ==================== =========== =========== =========== ===== ========= ================ ================ ============== -.. Average linkage Birch_0.01 Complete linkage Gaussian mixtures Genie_0.1 Genie_0.3 Genie_0.5 ITM K-means Single linkage Spectral_RBF_5 Ward linkage -====================== ================= ============ ================== ==================== =========== =========== =========== ===== ========= ================ ================ ============== -fcps/atom 0.1 0.1 0.08 0.03 1 1 1 1 0.18 1 1 0.1 -fcps/chainlink 0.27 0.28 0.31 0.91 1 1 1 1 0.09 1 1 0.28 -fcps/engytime 0.05 0.78 0.04 0.98 0.84 0.84 0.84 0.83 0.85 0 0.92 0.75 -fcps/hepta 1 1 1 1 1 1 1 0.9 1 1 1 1 -fcps/lsun 0.36 0.4 0.4 1 1 1 1 1 0.44 1 0.8 0.37 -fcps/target 0.15 0.64 0.21 0.65 1 1 1 1 0.63 1 1 0.64 -fcps/tetra 0.99 0.97 0.99 1 1 1 1 1 1 0 1 0.97 -fcps/twodiamonds 0.99 0.99 0.99 1 0.98 0.98 0.98 0.99 1 0 1 1 -fcps/wingnut 1 1 1 0.86 1 1 1 1 0.86 1 0.78 0.6 -graves/dense 0.92 0.92 0.26 1 0.96 0.96 0.96 1 0.81 0 0.92 0.92 -graves/fuzzyx 0.74 0.9 0.94 1 0.73 0.5 0.4 0.95 1 0 0.59 0.78 -graves/line 0 0 0 1 0.02 1 1 0.12 0 1 1 0 -graves/parabolic 0.6 0.57 0.59 0.54 0.81 0.81 0.81 0.64 0.59 0 0.66 0.62 -graves/ring 0.11 0.12 0.21 0.03 1 1 1 1 0 1 1 0.12 -graves/ring_noisy 0 0.11 0.27 0.02 1 1 1 1 0.16 0 1 0.15 -graves/ring_outliers 0.63 0.63 0.34 0.62 1 1 1 1 0.62 1 1 0.63 -graves/zigzag 0.53 0.62 0.36 0.96 1 1 1 0.78 0.53 1 0.68 0.54 -graves/zigzag_noisy 0.52 0.64 0.52 0.9 0.77 1 1 0.54 0.51 0.47 0.63 0.66 -graves/zigzag_outliers 0.31 0.51 0.33 0.98 1 1 1 0.83 0.44 0.48 0.34 0.39 -other/chameleon_t4_8k 0.64 0.62 0.55 0.56 0.83 1 0.93 0.84 0.6 0 0.63 0.61 -other/chameleon_t5_8k 1 1 0.73 1 1 1 0.83 0.59 1 0 1 1 -other/chameleon_t7_10k 0.45 0.44 0.37 0.4 0.53 0.7 1 0.53 0.42 0 0.38 0.43 -other/chameleon_t8_8k 0.37 0.39 0.33 0.44 0.61 0.64 0.71 0.57 0.37 0 0.36 0.37 -other/hdbscan 0.43 0.63 0.46 0.82 0.97 0.71 0.71 0.75 0.64 0 0.33 0.84 -other/iris 0.76 0.73 0.64 0.9 0.89 0.89 0.56 0.89 0.73 0.56 0.75 0.73 -other/iris5 0.56 0.51 0.34 0.82 0.59 0.59 0.79 0.52 0.51 0.15 0.53 0.51 -other/square 0.15 0 0.17 0.04 1 1 1 1 0.03 1 0.17 0.25 -sipu/a1 0.93 0.93 0.92 0.96 0.94 0.9 0.83 0.77 0.97 0.44 0.94 0.91 -sipu/a2 0.93 0.94 0.91 0.96 0.95 0.92 0.83 0.77 0.97 0.35 0.91 0.92 -sipu/a3 0.94 0.94 0.92 0.95 0.96 0.94 0.82 0.77 0.95 0.32 0.93 0.94 -sipu/aggregation 1 0.82 0.78 1 0.48 0.57 0.88 0.61 0.76 0.8 0.99 0.81 -sipu/compound 0.91 0.88 0.91 0.91 0.78 0.78 0.88 0.62 0.76 0.93 0.87 0.88 -sipu/d31 0.91 0.93 0.92 0.95 0.94 0.9 0.71 0.8 0.95 0.17 0.94 0.92 -sipu/flame 0.44 0.22 0 0.34 1 1 1 0.35 0.48 0.01 0.83 0.22 -sipu/jain 0.78 0.51 0.78 0 0.04 1 1 0.32 0.32 0.26 0.51 0.51 -sipu/pathbased 0.59 0.54 0.41 0.6 0.97 0.97 0.7 0.54 0.5 0 0.6 0.54 -sipu/r15 1 1 1 1 0.99 0.99 1 0.99 1 1 1 1 -sipu/s1 0.98 0.99 0.97 0.99 0.99 0.99 0.99 0.76 0.99 0.46 0.99 0.98 -sipu/s2 0.91 0.9 0.79 0.94 0.92 0.92 0.78 0.77 0.94 0 0.94 0.91 -sipu/s3 0.6 0.68 0.51 0.73 0.69 0.67 0.56 0.61 0.72 0 0.71 0.68 -sipu/s4 0.49 0.56 0.44 0.64 0.62 0.59 0.47 0.55 0.63 0 0.57 0.55 -sipu/spiral 0 0 0 0 1 1 1 0.73 0 1 0 0 -sipu/unbalance 1 1 0.61 1 0.57 0.62 0.78 0.53 1 1 1 1 -uci/ecoli 0.74 0.49 0.62 0.61 0.36 0.46 0.66 0.33 0.46 0.04 0.35 0.49 -uci/glass 0.02 0.25 0.23 0.24 0.12 0.25 0.22 0.23 0.27 0.01 0.22 0.26 -uci/ionosphere 0 0.19 0.01 0.4 0.21 0.21 0 0.09 0.18 0 0 0.19 -uci/sonar 0.01 0 0 0 0 0 0.01 0 0.01 0 0 0 -uci/statlog 0 0.33 0.1 0.47 0.62 0.52 0.47 0.53 0.36 0 nan 0.31 -uci/wdbc 0.05 0.29 0.05 0.71 0.09 0.28 0.28 0.63 0.49 0 0 0.29 -uci/wine 0.29 0.37 0.37 0.82 0.36 0.36 0.25 0.39 0.37 0.01 0.32 0.37 -uci/yeast 0.01 0.12 0.09 0.05 0.11 0.18 0.08 0.08 0.14 0.01 0.01 0.13 -wut/circles 1 1 1 1 1 1 1 1 1 1 1 1 -wut/cross 0 0.02 0.36 0.47 0.76 0.46 0.1 0.53 0.11 0 0 0.07 -wut/graph 0.41 0.44 0.4 0.86 0.48 0.49 0.39 0.52 0.44 0.03 0.41 0.46 -wut/isolation 0 0 0 0 1 1 1 1 0 1 0 0 -wut/labirynth 0.36 0.49 0.3 0.62 0.5 0.59 0.72 0.72 0.29 0.76 0.39 0.34 -wut/mk1 0.99 0.99 0.97 0.99 0.99 0.99 0.99 0.53 0.99 0.56 0.99 0.99 -wut/mk2 0 0.01 0.01 0.01 1 1 1 1 0.01 1 0 0 -wut/mk3 0.56 0.85 0.84 0.88 0.8 0.8 0.56 0.54 0.89 0 0.88 0.86 -wut/mk4 0.04 0.14 0.28 0.5 1 1 1 0.59 0.2 1 0.2 0.19 -wut/olympic 0.14 0.15 0.12 0.14 0.17 0.15 0.09 0.21 0.11 0 0.13 0.13 -wut/smile 0.99 0.61 0.77 0.61 0.64 1 1 0.62 0.61 1 1 0.65 -wut/stripes 0 0 0.01 0.01 1 1 1 1 0.01 1 0.01 0.01 -wut/trajectories 1 1 0.71 1 1 1 1 1 1 1 1 1 -wut/trapped_lovers 0.13 0.14 0.25 0.14 0.39 1 1 0.39 0.15 1 0.75 0.16 -wut/twosplashes 0 0.28 0.22 0.67 0.5 0.5 0.5 0.73 0.28 0 0.28 0.19 -wut/windows 0.1 0.1 0.09 0.14 0.14 0.2 1 0.35 0.09 1 0.15 0.1 -wut/x1 1 1 1 1 1 1 1 1 1 1 1 1 -wut/x2 0.51 0.97 1 0.69 0.81 0.81 0.5 0.69 0.97 0 0.27 0.97 -wut/x3 0.96 0.98 0.47 0.93 0.88 0.96 0.96 0.65 1 0.02 0.51 0.98 -wut/z1 0.21 0.1 0.19 0.01 0.39 0.39 0.14 0.39 0.2 0 0.15 0.1 -wut/z2 0.51 0.5 0.44 1 0.5 0.63 0.82 0.49 0.47 0.73 0.99 0.43 -wut/z3 1 1 0.93 1 0.63 0.66 0.92 0.65 1 0.74 0.98 1 -====================== ================= ============ ================== ==================== =========== =========== =========== ===== ========= ================ ================ ============== - - -fm -^^ - -Summary statistics for ``fm`` (best=1.0): - - -================= ======= ====== ===== ===== ===== ===== ===== ===== -.. count mean std min 25% 50% 75% max -================= ======= ====== ===== ===== ===== ===== ===== ===== -Average linkage 73 0.73 0.21 0.34 0.55 0.72 0.94 1 -Birch_0.01 73 0.73 0.21 0.28 0.58 0.7 0.94 1 -Complete linkage 73 0.69 0.2 0.3 0.55 0.65 0.9 1 -Gaussian mixtures 73 0.79 0.22 0.26 0.66 0.87 0.99 1 -Genie_0.1 73 0.82 0.2 0.25 0.66 0.92 1 1 -Genie_0.3 73 0.85 0.18 0.37 0.73 0.94 1 1 -Genie_0.5 73 0.86 0.18 0.36 0.75 0.94 1 1 -ITM 73 0.78 0.18 0.23 0.65 0.78 0.99 1 -K-means 73 0.72 0.22 0.29 0.51 0.7 0.95 1 -Single linkage 73 0.73 0.24 0.26 0.53 0.71 1 1 -Spectral_RBF_5 72 0.78 0.21 0.33 0.61 0.83 0.99 1 -Ward linkage 73 0.72 0.21 0.29 0.58 0.7 0.92 1 -================= ======= ====== ===== ===== ===== ===== ===== ===== - - -Ranks for ``fm`` (best=1): - - -================= ======= ====== ===== ===== ===== ===== ===== ===== -.. count mean std min 25% 50% 75% max -================= ======= ====== ===== ===== ===== ===== ===== ===== -Average linkage 72 5.9 3.3 1 3 6 8.2 12 -Birch_0.01 72 6 3 1 4.8 6.5 8 12 -Complete linkage 72 7.7 3.5 1 5.8 8 11 12 -Gaussian mixtures 72 4.5 3.6 1 1 3 8 12 -Genie_0.1 72 4.1 3.6 1 1 3 7 12 -Genie_0.3 72 3.5 3.1 1 1 2 5 11 -Genie_0.5 72 3.9 3.6 1 1 2 6.2 12 -ITM 72 5.9 4.2 1 1 5 10 12 -K-means 72 6.3 4 1 1.8 8 9 12 -Single linkage 72 6.1 4.9 1 1 6 12 12 -Spectral_RBF_5 72 4.8 3.4 1 1 5 8 11 -Ward linkage 72 6.3 3.1 1 5 7 9 12 -================= ======= ====== ===== ===== ===== ===== ===== ===== - - -Raw results for ``fm`` (best=1.0): - - -====================== ================= ============ ================== ==================== =========== =========== =========== ===== ========= ================ ================ ============== -.. Average linkage Birch_0.01 Complete linkage Gaussian mixtures Genie_0.1 Genie_0.3 Genie_0.5 ITM K-means Single linkage Spectral_RBF_5 Ward linkage -====================== ================= ============ ================== ==================== =========== =========== =========== ===== ========= ================ ================ ============== -fcps/atom 0.65 0.65 0.65 0.66 1 1 1 1 0.65 1 1 0.65 -fcps/chainlink 0.68 0.68 0.69 0.95 1 1 1 1 0.55 1 1 0.68 -fcps/engytime 0.65 0.89 0.65 0.99 0.92 0.92 0.92 0.92 0.92 0.71 0.96 0.87 -fcps/hepta 1 1 1 1 1 1 1 0.91 1 1 1 1 -fcps/lsun 0.6 0.63 0.63 1 1 1 1 1 0.65 1 0.88 0.61 -fcps/target 0.71 0.8 0.71 0.8 1 1 1 1 0.79 1 1 0.79 -fcps/tetra 0.99 0.98 0.99 1 1 1 1 1 1 0.49 1 0.98 -fcps/twodiamonds 1 1 0.99 1 0.99 0.99 0.99 0.99 1 0.71 1 1 -fcps/wingnut 1 1 1 0.93 1 1 1 1 0.93 1 0.89 0.8 -graves/dense 0.96 0.96 0.67 1 0.98 0.98 0.98 1 0.9 0.69 0.96 0.96 -graves/fuzzyx 0.8 0.95 0.97 1 0.8 0.66 0.66 0.96 1 0.71 0.7 0.89 -graves/line 0.6 0.6 0.61 1 0.6 1 1 0.63 0.6 1 1 0.64 -graves/parabolic 0.8 0.79 0.8 0.77 0.91 0.91 0.91 0.82 0.79 0.71 0.83 0.81 -graves/ring 0.65 0.65 0.66 0.66 1 1 1 1 0.5 1 1 0.65 -graves/ring_noisy 0.71 0.65 0.68 0.66 1 1 1 1 0.65 0.71 1 0.65 -graves/ring_outliers 0.78 0.78 0.65 0.78 1 1 1 1 0.78 1 1 0.78 -graves/zigzag 0.63 0.7 0.53 0.97 1 1 1 0.82 0.64 1 0.75 0.64 -graves/zigzag_noisy 0.62 0.71 0.63 0.92 0.82 1 1 0.63 0.62 0.67 0.72 0.73 -graves/zigzag_outliers 0.55 0.61 0.52 0.98 1 1 1 0.86 0.59 0.67 0.66 0.58 -other/chameleon_t4_8k 0.72 0.69 0.63 0.64 0.86 1 0.94 0.87 0.67 0.44 0.69 0.68 -other/chameleon_t5_8k 1 1 0.78 1 1 1 0.87 0.67 1 0.41 1 1 -other/chameleon_t7_10k 0.54 0.54 0.47 0.5 0.61 0.76 1 0.62 0.51 0.43 0.48 0.52 -other/chameleon_t8_8k 0.47 0.49 0.43 0.53 0.68 0.7 0.77 0.64 0.46 0.41 0.46 0.47 -other/hdbscan 0.6 0.7 0.58 0.85 0.97 0.79 0.79 0.8 0.7 0.42 0.53 0.87 -other/iris 0.84 0.82 0.77 0.94 0.92 0.92 0.75 0.92 0.82 0.76 0.83 0.82 -other/iris5 0.77 0.74 0.67 0.9 0.76 0.76 0.89 0.72 0.73 0.69 0.75 0.74 -other/square 0.65 0.5 0.65 0.52 1 1 1 1 0.52 1 0.65 0.67 -sipu/a1 0.93 0.94 0.92 0.96 0.94 0.9 0.85 0.78 0.97 0.56 0.94 0.92 -sipu/a2 0.94 0.94 0.91 0.96 0.95 0.92 0.84 0.78 0.97 0.48 0.91 0.92 -sipu/a3 0.94 0.94 0.92 0.95 0.96 0.94 0.84 0.77 0.95 0.45 0.93 0.94 -sipu/aggregation 1 0.86 0.83 1 0.58 0.66 0.91 0.69 0.82 0.86 0.99 0.86 -sipu/compound 0.94 0.92 0.94 0.94 0.85 0.85 0.92 0.74 0.83 0.95 0.91 0.92 -sipu/d31 0.91 0.93 0.93 0.95 0.94 0.9 0.74 0.81 0.96 0.35 0.94 0.92 -sipu/flame 0.73 0.63 0.62 0.68 1 1 1 0.69 0.75 0.73 0.92 0.63 -sipu/jain 0.92 0.79 0.92 0.59 0.59 1 1 0.7 0.7 0.8 0.79 0.79 -sipu/pathbased 0.73 0.67 0.6 0.74 0.98 0.98 0.8 0.69 0.66 0.57 0.74 0.67 -sipu/r15 1 1 1 1 0.99 0.99 1 0.99 1 1 1 1 -sipu/s1 0.98 0.99 0.97 0.99 0.99 0.99 0.99 0.77 0.99 0.59 0.99 0.98 -sipu/s2 0.92 0.91 0.81 0.95 0.92 0.92 0.8 0.78 0.94 0.26 0.94 0.91 -sipu/s3 0.64 0.7 0.55 0.75 0.71 0.69 0.61 0.64 0.74 0.26 0.73 0.7 -sipu/s4 0.55 0.59 0.49 0.67 0.64 0.62 0.53 0.58 0.66 0.26 0.61 0.58 -sipu/spiral 0.36 0.34 0.34 0.33 1 1 1 0.82 0.33 1 0.33 0.34 -sipu/unbalance 1 1 0.77 1 0.69 0.73 0.84 0.66 1 1 1 1 -uci/ecoli 0.82 0.61 0.72 0.71 0.51 0.59 0.75 0.48 0.59 0.53 0.62 0.61 -uci/glass 0.49 0.5 0.55 0.47 0.33 0.48 0.48 0.41 0.51 0.51 0.47 0.51 -uci/ionosphere 0.73 0.61 0.73 0.71 0.64 0.64 0.64 0.56 0.61 0.73 0.73 0.61 -uci/sonar 0.65 0.53 0.53 0.51 0.52 0.52 0.64 0.51 0.5 0.7 0.7 0.53 -uci/statlog 0.37 0.47 0.43 0.57 0.68 0.6 0.58 0.6 0.48 0.38 nan 0.45 -uci/wdbc 0.72 0.74 0.72 0.87 0.6 0.74 0.74 0.82 0.79 0.73 0.73 0.74 -uci/wine 0.62 0.58 0.59 0.88 0.58 0.58 0.59 0.6 0.58 0.56 0.63 0.58 -uci/yeast 0.46 0.28 0.42 0.26 0.25 0.39 0.39 0.23 0.3 0.47 0.47 0.29 -wut/circles 1 1 1 1 1 1 1 1 1 1 1 1 -wut/cross 0.49 0.45 0.6 0.64 0.82 0.62 0.44 0.67 0.44 0.5 0.5 0.44 -wut/graph 0.49 0.51 0.48 0.88 0.54 0.55 0.49 0.57 0.51 0.31 0.49 0.53 -wut/isolation 0.34 0.35 0.34 0.33 1 1 1 1 0.33 1 0.33 0.34 -wut/labirynth 0.51 0.61 0.46 0.72 0.62 0.69 0.79 0.79 0.45 0.85 0.53 0.49 -wut/mk1 0.99 0.99 0.98 0.99 0.99 0.99 0.99 0.7 0.99 0.77 0.99 0.99 -wut/mk2 0.5 0.51 0.5 0.5 1 1 1 1 0.5 1 0.5 0.51 -wut/mk3 0.77 0.9 0.9 0.92 0.86 0.86 0.75 0.7 0.93 0.57 0.92 0.91 -wut/mk4 0.49 0.49 0.55 0.67 1 1 1 0.73 0.5 1 0.51 0.5 -wut/olympic 0.35 0.33 0.3 0.32 0.37 0.37 0.36 0.38 0.29 0.45 0.33 0.31 -wut/smile 0.99 0.73 0.86 0.73 0.75 1 1 0.72 0.73 1 1 0.76 -wut/stripes 0.51 0.54 0.53 0.51 1 1 1 1 0.51 1 0.51 0.55 -wut/trajectories 1 1 0.8 1 1 1 1 1 1 1 1 1 -wut/trapped_lovers 0.5 0.5 0.54 0.5 0.64 1 1 0.64 0.5 1 0.86 0.5 -wut/twosplashes 0.69 0.64 0.61 0.84 0.75 0.75 0.75 0.87 0.64 0.7 0.64 0.6 -wut/windows 0.39 0.39 0.39 0.41 0.43 0.5 1 0.58 0.37 1 0.43 0.4 -wut/x1 1 1 1 1 1 1 1 1 1 1 1 1 -wut/x2 0.73 0.98 1 0.8 0.87 0.87 0.72 0.79 0.98 0.57 0.6 0.98 -wut/x3 0.97 0.99 0.62 0.96 0.91 0.97 0.97 0.76 1 0.66 0.69 0.99 -wut/z1 0.51 0.42 0.46 0.36 0.6 0.6 0.5 0.6 0.47 0.55 0.43 0.42 -wut/z2 0.68 0.66 0.62 1 0.66 0.76 0.89 0.65 0.64 0.86 0.99 0.61 -wut/z3 1 1 0.95 1 0.74 0.77 0.94 0.74 1 0.84 0.99 1 -====================== ================= ============ ================== ==================== =========== =========== =========== ===== ========= ================ ================ ============== - - -ami -^^^ - -Summary statistics for ``ami`` (best=1.0): - - -================= ======= ====== ===== ===== ===== ===== ===== ===== -.. count mean std min 25% 50% 75% max -================= ======= ====== ===== ===== ===== ===== ===== ===== -Average linkage 73 0.57 0.36 0 0.27 0.65 0.95 1 -Birch_0.01 73 0.61 0.33 0 0.34 0.68 0.95 1 -Complete linkage 73 0.56 0.32 0 0.35 0.56 0.88 1 -Gaussian mixtures 73 0.68 0.34 0 0.43 0.8 0.98 1 -Genie_0.1 73 0.79 0.26 0 0.71 0.87 1 1 -Genie_0.3 73 0.82 0.24 0 0.74 0.95 1 1 -Genie_0.5 73 0.82 0.25 0.06 0.74 0.94 1 1 -ITM 73 0.75 0.24 0 0.63 0.77 0.97 1 -K-means 73 0.6 0.33 0 0.36 0.65 0.97 1 -Single linkage 73 0.49 0.46 0 0 0.71 1 1 -Spectral_RBF_5 72 0.67 0.34 0 0.47 0.76 0.99 1 -Ward linkage 73 0.61 0.32 0 0.35 0.65 0.93 1 -================= ======= ====== ===== ===== ===== ===== ===== ===== - - -Ranks for ``ami`` (best=1): - - -================= ======= ====== ===== ===== ===== ===== ===== ===== -.. count mean std min 25% 50% 75% max -================= ======= ====== ===== ===== ===== ===== ===== ===== -Average linkage 72 6.5 3.6 1 3.8 6 9 12 -Birch_0.01 72 5.7 3 1 3.8 6 8 12 -Complete linkage 72 7.6 3.3 1 6 8 11 12 -Gaussian mixtures 72 4.4 3.7 1 1 3 7 12 -Genie_0.1 72 3.7 3.2 1 1 3 5 12 -Genie_0.3 72 3.1 2.9 1 1 1.5 4.2 11 -Genie_0.5 72 3.9 3.6 1 1 1.5 7.2 12 -ITM 72 5.6 3.9 1 1.8 5 9 12 -K-means 72 5.6 3.9 1 1 6 9 12 -Single linkage 72 7.5 5 1 1 11 12 12 -Spectral_RBF_5 72 5 3.5 1 1 4.5 8 11 -Ward linkage 72 5.9 3.1 1 3.8 6 8 12 -================= ======= ====== ===== ===== ===== ===== ===== ===== - - -Raw results for ``ami`` (best=1.0): - - -====================== ================= ============ ================== ==================== =========== =========== =========== ===== ========= ================ ================ ============== -.. Average linkage Birch_0.01 Complete linkage Gaussian mixtures Genie_0.1 Genie_0.3 Genie_0.5 ITM K-means Single linkage Spectral_RBF_5 Ward linkage -====================== ================= ============ ================== ==================== =========== =========== =========== ===== ========= ================ ================ ============== -fcps/atom 0.22 0.22 0.2 0.13 1 1 1 1 0.29 1 1 0.22 -fcps/chainlink 0.36 0.37 0.39 0.84 1 1 1 1 0.07 1 1 0.37 -fcps/engytime 0.16 0.68 0.15 0.96 0.79 0.79 0.79 0.74 0.77 0 0.87 0.68 -fcps/hepta 1 1 1 1 1 1 1 0.94 1 1 1 1 -fcps/lsun 0.5 0.53 0.53 1 1 1 1 1 0.54 1 0.82 0.51 -fcps/target 0.33 0.64 0.38 0.65 1 1 1 1 0.63 1 1 0.63 -fcps/tetra 0.99 0.96 0.98 1 1 1 1 1 1 0 1 0.96 -fcps/twodiamonds 0.99 0.99 0.97 1 0.95 0.95 0.95 0.97 1 0 1 1 -fcps/wingnut 1 1 1 0.78 1 1 1 1 0.77 1 0.68 0.49 -graves/dense 0.88 0.88 0.35 1 0.93 0.93 0.93 1 0.76 0.02 0.88 0.88 -graves/fuzzyx 0.8 0.84 0.9 0.99 0.79 0.64 0.59 0.94 0.99 0 0.69 0.75 -graves/line 0.14 0.14 0.14 1 0.2 1 1 0.25 0.18 1 1 0.1 -graves/parabolic 0.49 0.57 0.51 0.43 0.74 0.74 0.74 0.61 0.48 0 0.55 0.52 -graves/ring 0.23 0.24 0.31 0.13 1 1 1 1 0 1 1 0.24 -graves/ring_noisy 0 0.23 0.36 0.12 1 1 1 1 0.28 0 1 0.26 -graves/ring_outliers 0.65 0.65 0.41 0.65 1 1 1 1 0.65 1 1 0.65 -graves/zigzag 0.67 0.76 0.56 0.96 1 1 1 0.85 0.71 1 0.8 0.71 -graves/zigzag_noisy 0.66 0.75 0.67 0.89 0.85 1 1 0.68 0.66 0.74 0.78 0.76 -graves/zigzag_outliers 0.49 0.65 0.53 0.97 1 1 1 0.9 0.61 0.74 0.53 0.57 -other/chameleon_t4_8k 0.76 0.73 0.62 0.69 0.91 1 0.95 0.87 0.7 0 0.72 0.73 -other/chameleon_t5_8k 1 1 0.83 1 1 1 0.93 0.75 1 0 1 1 -other/chameleon_t7_10k 0.69 0.71 0.6 0.68 0.78 0.87 1 0.76 0.66 0 0.64 0.69 -other/chameleon_t8_8k 0.59 0.58 0.55 0.64 0.79 0.79 0.86 0.76 0.59 0 0.58 0.59 -other/hdbscan 0.62 0.75 0.61 0.82 0.97 0.87 0.87 0.85 0.73 0 0.55 0.86 -other/iris 0.8 0.77 0.72 0.9 0.87 0.87 0.7 0.87 0.76 0.71 0.8 0.77 -other/iris5 0.63 0.56 0.46 0.81 0.58 0.58 0.76 0.54 0.54 0.34 0.61 0.56 -other/square 0.27 0 0.28 0.03 1 1 1 1 0.02 1 0.28 0.35 -sipu/a1 0.95 0.96 0.95 0.97 0.96 0.95 0.94 0.89 0.97 0.78 0.96 0.95 -sipu/a2 0.96 0.96 0.95 0.98 0.97 0.96 0.94 0.9 0.98 0.76 0.96 0.96 -sipu/a3 0.97 0.97 0.96 0.97 0.97 0.97 0.95 0.91 0.97 0.76 0.96 0.97 -sipu/aggregation 1 0.92 0.9 1 0.7 0.76 0.92 0.78 0.88 0.88 0.99 0.92 -sipu/compound 0.93 0.88 0.93 0.93 0.85 0.85 0.88 0.74 0.83 0.93 0.86 0.88 -sipu/d31 0.95 0.95 0.95 0.96 0.96 0.95 0.9 0.91 0.97 0.63 0.96 0.95 -sipu/flame 0.48 0.35 0.12 0.42 1 1 1 0.43 0.43 0.02 0.73 0.35 -sipu/jain 0.7 0.5 0.7 0.2 0.23 1 1 0.39 0.37 0.24 0.5 0.5 -sipu/pathbased 0.64 0.59 0.5 0.66 0.95 0.95 0.81 0.61 0.58 0 0.67 0.59 -sipu/r15 1 1 1 1 0.99 0.99 1 0.99 1 1 1 1 -sipu/s1 0.98 0.99 0.98 0.99 0.99 0.99 0.99 0.88 0.99 0.79 0.99 0.98 -sipu/s2 0.93 0.92 0.88 0.95 0.93 0.93 0.91 0.86 0.95 0 0.94 0.93 -sipu/s3 0.75 0.77 0.7 0.8 0.78 0.77 0.75 0.74 0.79 0 0.79 0.77 -sipu/s4 0.66 0.69 0.63 0.73 0.71 0.7 0.66 0.68 0.72 0 0.71 0.69 -sipu/spiral 0 0 0 0 1 1 1 0.78 0 1 0 0 -sipu/unbalance 1 1 0.82 1 0.75 0.77 0.82 0.75 1 0.99 1 1 -uci/ecoli 0.71 0.62 0.64 0.57 0.49 0.54 0.57 0.49 0.58 0.11 0.48 0.62 -uci/glass 0.07 0.34 0.35 0.33 0.25 0.38 0.34 0.33 0.4 0.03 0.31 0.37 -uci/ionosphere 0 0.14 0.01 0.32 0.13 0.13 0.06 0.09 0.13 0 0 0.14 -uci/sonar 0 0 0 0 0 0 0.07 0 0.01 0 0 0 -uci/statlog 0.01 0.53 0.35 0.61 0.68 0.68 0.7 0.63 0.52 0 nan 0.49 -uci/wdbc 0.09 0.32 0.09 0.61 0.24 0.31 0.31 0.51 0.46 0 0 0.32 -uci/wine 0.4 0.41 0.44 0.82 0.41 0.41 0.38 0.37 0.42 0.04 0.42 0.41 -uci/yeast 0.05 0.22 0.18 0.14 0.22 0.25 0.19 0.19 0.26 0.05 0.06 0.23 -wut/circles 1 1 1 1 1 1 1 1 1 1 1 1 -wut/cross 0.04 0.18 0.48 0.61 0.81 0.62 0.36 0.7 0.36 0 0.01 0.31 -wut/graph 0.62 0.63 0.6 0.89 0.68 0.69 0.62 0.69 0.64 0.23 0.62 0.65 -wut/isolation 0 0 0 0 1 1 1 1 0 1 0 0 -wut/labirynth 0.59 0.71 0.54 0.76 0.71 0.78 0.88 0.87 0.5 0.85 0.6 0.58 -wut/mk1 0.98 0.98 0.95 0.98 0.98 0.98 0.98 0.61 0.98 0.72 0.98 0.98 -wut/mk2 0 0.01 0.01 0 1 1 1 1 0.01 1 0 0 -wut/mk3 0.71 0.83 0.83 0.85 0.8 0.8 0.69 0.61 0.86 0 0.85 0.84 -wut/mk4 0.11 0.2 0.3 0.58 1 1 1 0.65 0.25 1 0.25 0.24 -wut/olympic 0.31 0.27 0.21 0.25 0.33 0.33 0.31 0.31 0.2 0 0.31 0.23 -wut/smile 0.98 0.79 0.83 0.79 0.8 1 1 0.85 0.79 1 1 0.8 -wut/stripes 0 0 0.01 0.01 1 1 1 1 0.01 1 0.01 0.01 -wut/trajectories 1 1 0.83 1 1 1 1 1 1 1 1 1 -wut/trapped_lovers 0.35 0.36 0.45 0.36 0.62 1 1 0.62 0.38 1 0.74 0.39 -wut/twosplashes 0.04 0.21 0.17 0.56 0.4 0.4 0.4 0.69 0.21 0.01 0.21 0.15 -wut/windows 0.4 0.4 0.4 0.43 0.48 0.56 1 0.65 0.35 1 0.43 0.4 -wut/x1 1 1 1 1 1 1 1 1 1 1 1 1 -wut/x2 0.72 0.97 1 0.75 0.84 0.84 0.72 0.77 0.97 0 0.41 0.97 -wut/x3 0.91 0.97 0.63 0.91 0.87 0.91 0.91 0.69 1 0.01 0.68 0.97 -wut/z1 0.32 0.13 0.26 0.03 0.47 0.47 0.24 0.46 0.27 0.05 0.19 0.13 -wut/z2 0.72 0.72 0.64 1 0.72 0.74 0.86 0.71 0.68 0.81 0.98 0.69 -wut/z3 0.99 0.99 0.93 0.99 0.74 0.74 0.91 0.75 1 0.84 0.97 1 -====================== ================= ============ ================== ==================== =========== =========== =========== ===== ========= ================ ================ ============== - - -nmi -^^^ - -Summary statistics for ``nmi`` (best=1.0): - - -================= ======= ====== ===== ===== ===== ===== ===== ===== -.. count mean std min 25% 50% 75% max -================= ======= ====== ===== ===== ===== ===== ===== ===== -Average linkage 73 0.58 0.36 0 0.27 0.66 0.95 1 -Birch_0.01 73 0.61 0.33 0 0.35 0.68 0.96 1 -Complete linkage 73 0.57 0.32 0 0.35 0.57 0.88 1 -Gaussian mixtures 73 0.68 0.34 0 0.43 0.8 0.98 1 -Genie_0.1 73 0.79 0.26 0 0.71 0.87 1 1 -Genie_0.3 73 0.83 0.24 0 0.74 0.95 1 1 -Genie_0.5 73 0.82 0.25 0.07 0.74 0.94 1 1 -ITM 73 0.75 0.24 0.01 0.63 0.78 0.97 1 -K-means 73 0.61 0.33 0 0.37 0.65 0.97 1 -Single linkage 73 0.5 0.45 0 0.01 0.72 1 1 -Spectral_RBF_5 72 0.67 0.34 0 0.49 0.76 0.99 1 -Ward linkage 73 0.61 0.32 0 0.35 0.66 0.93 1 -================= ======= ====== ===== ===== ===== ===== ===== ===== - - -Ranks for ``nmi`` (best=1): - - -================= ======= ====== ===== ===== ===== ===== ===== ===== -.. count mean std min 25% 50% 75% max -================= ======= ====== ===== ===== ===== ===== ===== ===== -Average linkage 72 6.6 3.6 1 4 6.5 9.2 12 -Birch_0.01 72 5.8 2.9 1 4 6 8 12 -Complete linkage 72 7.7 3.3 1 6 8.5 11 12 -Gaussian mixtures 72 4.4 3.7 1 1 3 7.2 12 -Genie_0.1 72 3.8 3.3 1 1 2.5 5.2 12 -Genie_0.3 72 3.1 2.9 1 1 1.5 4 11 -Genie_0.5 72 3.8 3.6 1 1 1.5 7.2 12 -ITM 72 5.5 3.9 1 1 5.5 9 12 -K-means 72 5.7 3.9 1 1 6 9 12 -Single linkage 72 7.5 5 1 1 10.5 12 12 -Spectral_RBF_5 72 5 3.5 1 1 4 8 11 -Ward linkage 72 6 3.1 1 4 6 8 12 -================= ======= ====== ===== ===== ===== ===== ===== ===== - - -Raw results for ``nmi`` (best=1.0): - - -====================== ================= ============ ================== ==================== =========== =========== =========== ===== ========= ================ ================ ============== -.. Average linkage Birch_0.01 Complete linkage Gaussian mixtures Genie_0.1 Genie_0.3 Genie_0.5 ITM K-means Single linkage Spectral_RBF_5 Ward linkage -====================== ================= ============ ================== ==================== =========== =========== =========== ===== ========= ================ ================ ============== -fcps/atom 0.22 0.22 0.2 0.13 1 1 1 1 0.29 1 1 0.22 -fcps/chainlink 0.36 0.37 0.39 0.84 1 1 1 1 0.07 1 1 0.37 -fcps/engytime 0.16 0.68 0.15 0.96 0.79 0.79 0.79 0.74 0.77 0 0.87 0.68 -fcps/hepta 1 1 1 1 1 1 1 0.95 1 1 1 1 -fcps/lsun 0.5 0.53 0.53 1 1 1 1 1 0.54 1 0.82 0.51 -fcps/target 0.34 0.64 0.38 0.65 1 1 1 1 0.64 1 1 0.64 -fcps/tetra 0.99 0.96 0.98 1 1 1 1 1 1 0.01 1 0.96 -fcps/twodiamonds 0.99 0.99 0.97 1 0.95 0.95 0.95 0.97 1 0 1 1 -fcps/wingnut 1 1 1 0.78 1 1 1 1 0.77 1 0.68 0.49 -graves/dense 0.88 0.88 0.35 1 0.93 0.93 0.93 1 0.76 0.03 0.88 0.88 -graves/fuzzyx 0.8 0.84 0.9 0.99 0.79 0.64 0.59 0.94 0.99 0.01 0.69 0.75 -graves/line 0.14 0.14 0.14 1 0.2 1 1 0.25 0.18 1 1 0.1 -graves/parabolic 0.49 0.57 0.51 0.43 0.74 0.74 0.74 0.61 0.48 0.01 0.55 0.52 -graves/ring 0.23 0.24 0.31 0.13 1 1 1 1 0 1 1 0.24 -graves/ring_noisy 0 0.24 0.36 0.12 1 1 1 1 0.28 0 1 0.27 -graves/ring_outliers 0.66 0.66 0.41 0.65 1 1 1 1 0.65 1 1 0.66 -graves/zigzag 0.67 0.76 0.57 0.96 1 1 1 0.85 0.71 1 0.81 0.71 -graves/zigzag_noisy 0.67 0.76 0.68 0.89 0.85 1 1 0.68 0.67 0.74 0.79 0.77 -graves/zigzag_outliers 0.5 0.66 0.54 0.98 1 1 1 0.9 0.62 0.74 0.53 0.57 -other/chameleon_t4_8k 0.76 0.73 0.62 0.69 0.91 1 0.95 0.87 0.7 0 0.72 0.73 -other/chameleon_t5_8k 1 1 0.83 1 1 1 0.93 0.75 1 0 1 1 -other/chameleon_t7_10k 0.69 0.71 0.6 0.68 0.78 0.87 1 0.76 0.66 0 0.64 0.69 -other/chameleon_t8_8k 0.59 0.58 0.55 0.64 0.79 0.79 0.86 0.76 0.59 0 0.58 0.59 -other/hdbscan 0.62 0.75 0.61 0.82 0.97 0.87 0.88 0.85 0.73 0 0.55 0.86 -other/iris 0.81 0.77 0.72 0.9 0.87 0.87 0.71 0.87 0.76 0.72 0.8 0.77 -other/iris5 0.64 0.57 0.47 0.81 0.59 0.59 0.76 0.55 0.55 0.36 0.62 0.57 -other/square 0.27 0 0.28 0.03 1 1 1 1 0.02 1 0.28 0.35 -sipu/a1 0.95 0.96 0.95 0.97 0.96 0.95 0.94 0.89 0.97 0.79 0.96 0.95 -sipu/a2 0.96 0.96 0.95 0.98 0.97 0.96 0.95 0.91 0.98 0.77 0.96 0.96 -sipu/a3 0.97 0.97 0.96 0.98 0.98 0.97 0.95 0.91 0.98 0.76 0.97 0.97 -sipu/aggregation 1 0.92 0.9 1 0.71 0.76 0.92 0.78 0.88 0.88 0.99 0.92 -sipu/compound 0.93 0.88 0.93 0.93 0.85 0.85 0.89 0.75 0.83 0.93 0.86 0.88 -sipu/d31 0.95 0.96 0.95 0.96 0.96 0.95 0.91 0.91 0.97 0.64 0.96 0.95 -sipu/flame 0.48 0.35 0.13 0.42 1 1 1 0.43 0.43 0.02 0.73 0.35 -sipu/jain 0.7 0.51 0.7 0.2 0.23 1 1 0.39 0.37 0.25 0.51 0.51 -sipu/pathbased 0.64 0.6 0.51 0.66 0.95 0.95 0.81 0.61 0.59 0.02 0.67 0.6 -sipu/r15 1 1 1 1 0.99 0.99 1 0.99 1 1 1 1 -sipu/s1 0.98 0.99 0.98 0.99 0.99 0.99 0.99 0.88 0.99 0.79 0.99 0.98 -sipu/s2 0.93 0.92 0.88 0.95 0.93 0.93 0.91 0.86 0.95 0.01 0.94 0.93 -sipu/s3 0.75 0.77 0.71 0.8 0.78 0.78 0.75 0.75 0.79 0.01 0.79 0.77 -sipu/s4 0.67 0.69 0.63 0.73 0.72 0.71 0.67 0.68 0.72 0.01 0.71 0.69 -sipu/spiral 0 0.01 0.01 0 1 1 1 0.79 0 1 0 0.01 -sipu/unbalance 1 1 0.82 1 0.75 0.77 0.82 0.75 1 0.99 1 1 -uci/ecoli 0.72 0.63 0.65 0.59 0.51 0.56 0.59 0.51 0.6 0.15 0.51 0.63 -uci/glass 0.11 0.37 0.38 0.36 0.28 0.41 0.37 0.35 0.43 0.07 0.34 0.39 -uci/ionosphere 0.01 0.14 0.02 0.32 0.13 0.13 0.07 0.09 0.13 0.01 0.01 0.14 -uci/sonar 0.01 0 0 0.01 0 0 0.08 0.01 0.01 0.01 0.01 0 -uci/statlog 0.02 0.53 0.35 0.62 0.68 0.68 0.7 0.63 0.52 0.01 nan 0.49 -uci/wdbc 0.09 0.32 0.09 0.61 0.24 0.32 0.32 0.51 0.46 0.01 0.01 0.32 -uci/wine 0.4 0.42 0.44 0.82 0.42 0.42 0.39 0.38 0.43 0.06 0.43 0.42 -uci/yeast 0.07 0.23 0.19 0.15 0.23 0.27 0.2 0.2 0.27 0.07 0.07 0.24 -wut/circles 1 1 1 1 1 1 1 1 1 1 1 1 -wut/cross 0.04 0.19 0.48 0.61 0.81 0.62 0.36 0.7 0.37 0 0.01 0.31 -wut/graph 0.62 0.63 0.61 0.89 0.68 0.69 0.62 0.7 0.64 0.24 0.63 0.66 -wut/isolation 0 0 0 0 1 1 1 1 0 1 0 0 -wut/labirynth 0.59 0.71 0.54 0.76 0.71 0.78 0.88 0.87 0.5 0.85 0.6 0.58 -wut/mk1 0.98 0.98 0.95 0.98 0.98 0.98 0.98 0.62 0.98 0.73 0.98 0.98 -wut/mk2 0 0.01 0.01 0.01 1 1 1 1 0.01 1 0 0 -wut/mk3 0.71 0.83 0.83 0.85 0.8 0.8 0.69 0.61 0.86 0.01 0.85 0.84 -wut/mk4 0.12 0.2 0.3 0.58 1 1 1 0.65 0.25 1 0.25 0.24 -wut/olympic 0.31 0.27 0.21 0.25 0.33 0.34 0.31 0.31 0.2 0 0.31 0.23 -wut/smile 0.98 0.79 0.83 0.79 0.8 1 1 0.85 0.79 1 1 0.81 -wut/stripes 0 0 0.01 0.01 1 1 1 1 0.01 1 0.01 0.01 -wut/trajectories 1 1 0.83 1 1 1 1 1 1 1 1 1 -wut/trapped_lovers 0.35 0.36 0.45 0.36 0.63 1 1 0.62 0.38 1 0.74 0.39 -wut/twosplashes 0.04 0.21 0.17 0.56 0.4 0.4 0.4 0.69 0.21 0.01 0.21 0.15 -wut/windows 0.41 0.41 0.4 0.43 0.48 0.56 1 0.65 0.35 1 0.43 0.4 -wut/x1 1 1 1 1 1 1 1 1 1 1 1 1 -wut/x2 0.73 0.97 1 0.75 0.85 0.85 0.73 0.78 0.97 0.03 0.43 0.97 -wut/x3 0.91 0.97 0.64 0.91 0.87 0.91 0.91 0.7 1 0.04 0.69 0.97 -wut/z1 0.33 0.14 0.27 0.04 0.47 0.47 0.24 0.47 0.27 0.07 0.19 0.14 -wut/z2 0.72 0.72 0.64 1 0.72 0.74 0.86 0.72 0.68 0.81 0.98 0.69 -wut/z3 0.99 0.99 0.93 0.99 0.74 0.75 0.91 0.75 1 0.84 0.97 1 -====================== ================= ============ ================== ==================== =========== =========== =========== ===== ========= ================ ================ ============== - - -npa -^^^ - -Summary statistics for ``npa`` (best=1.0): - - -================= ======= ====== ===== ===== ===== ===== ===== ===== -.. count mean std min 25% 50% 75% max -================= ======= ====== ===== ===== ===== ===== ===== ===== -Average linkage 73 0.59 0.34 0 0.32 0.6 0.96 1 -Birch_0.01 73 0.64 0.31 0.01 0.44 0.67 0.96 1 -Complete linkage 73 0.59 0.3 0.02 0.39 0.56 0.89 1 -Gaussian mixtures 73 0.71 0.33 0.01 0.51 0.86 0.99 1 -Genie_0.1 73 0.79 0.26 0.02 0.63 0.94 1 1 -Genie_0.3 73 0.83 0.22 0.02 0.68 0.96 1 1 -Genie_0.5 73 0.81 0.25 0.08 0.71 0.92 1 1 -ITM 73 0.75 0.23 0.08 0.6 0.8 0.99 1 -K-means 73 0.64 0.31 0.01 0.42 0.65 0.97 1 -Single linkage 73 0.49 0.42 0 0.07 0.37 1 1 -Spectral_RBF_5 72 0.7 0.32 0.01 0.42 0.83 1 1 -Ward linkage 73 0.64 0.29 0.02 0.41 0.63 0.95 1 -================= ======= ====== ===== ===== ===== ===== ===== ===== - - -Ranks for ``npa`` (best=1): - - -================= ======= ====== ===== ===== ===== ===== ===== ===== -.. count mean std min 25% 50% 75% max -================= ======= ====== ===== ===== ===== ===== ===== ===== -Average linkage 72 6.3 3.8 1 2.8 7 9.2 12 -Birch_0.01 72 5.9 3.1 1 4 6 8 12 -Complete linkage 72 7.6 3.1 1 6.8 8 10 12 -Gaussian mixtures 72 4.3 3.9 1 1 3 7 12 -Genie_0.1 72 4 3.6 1 1 2 7 12 -Genie_0.3 72 3.5 3.2 1 1 2 6 11 -Genie_0.5 72 4.1 3.8 1 1 1.5 7.2 12 -ITM 72 5.4 3.9 1 1 5 9 12 -K-means 72 5.7 3.8 1 1 6.5 9 12 -Single linkage 72 7.5 5.1 1 1 11 12 12 -Spectral_RBF_5 72 5.1 3.6 1 1 5 8 12 -Ward linkage 72 6.1 3.2 1 4 6 8 12 -================= ======= ====== ===== ===== ===== ===== ===== ===== - - -Raw results for ``npa`` (best=1.0): - - -====================== ================= ============ ================== ==================== =========== =========== =========== ===== ========= ================ ================ ============== -.. Average linkage Birch_0.01 Complete linkage Gaussian mixtures Genie_0.1 Genie_0.3 Genie_0.5 ITM K-means Single linkage Spectral_RBF_5 Ward linkage -====================== ================= ============ ================== ==================== =========== =========== =========== ===== ========= ================ ================ ============== -fcps/atom 0.31 0.31 0.29 0.17 1 1 1 1 0.43 1 1 0.31 -fcps/chainlink 0.52 0.53 0.56 0.95 1 1 1 1 0.31 1 1 0.53 -fcps/engytime 0.23 0.88 0.2 0.99 0.92 0.92 0.92 0.91 0.92 0 0.96 0.86 -fcps/hepta 1 1 1 1 1 1 1 0.94 1 1 1 1 -fcps/lsun 0.58 0.62 0.62 1 1 1 1 1 0.65 1 0.9 0.58 -fcps/target 0.6 0.6 0.66 0.58 1 1 1 1 0.55 1 1 0.59 -fcps/tetra 1 0.98 0.99 1 1 1 1 1 1 0.01 1 0.98 -fcps/twodiamonds 1 1 0.99 1 0.99 0.99 0.99 0.99 1 0 1 1 -fcps/wingnut 1 1 1 0.93 1 1 1 1 0.93 1 0.88 0.78 -graves/dense 0.96 0.96 0.51 1 0.98 0.98 0.98 1 0.9 0.03 0.96 0.96 -graves/fuzzyx 0.86 0.95 0.97 1 0.84 0.72 0.63 0.97 1 0.03 0.78 0.88 -graves/line 0.01 0.01 0.02 1 0.19 1 1 0.35 0.14 1 1 0.19 -graves/parabolic 0.77 0.76 0.77 0.73 0.9 0.9 0.9 0.8 0.77 0.02 0.81 0.79 -graves/ring 0.34 0.34 0.46 0.17 1 1 1 1 0.01 1 1 0.34 -graves/ring_noisy 0 0.34 0.52 0.16 1 1 1 1 0.4 0 1 0.38 -graves/ring_outliers 0.58 0.57 0.53 0.55 1 1 1 1 0.55 1 1 0.58 -graves/zigzag 0.69 0.73 0.56 0.98 1 1 1 0.86 0.7 1 0.82 0.63 -graves/zigzag_noisy 0.65 0.77 0.58 0.95 0.88 1 1 0.67 0.64 0.5 0.78 0.79 -graves/zigzag_outliers 0.34 0.53 0.46 0.99 1 1 1 0.9 0.51 0.53 0.4 0.42 -other/chameleon_t4_8k 0.72 0.63 0.6 0.63 0.77 1 0.89 0.83 0.63 0.09 0.64 0.62 -other/chameleon_t5_8k 1 1 0.79 1 1 1 0.82 0.69 1 0.01 1 1 -other/chameleon_t7_10k 0.49 0.45 0.43 0.4 0.55 0.77 1 0.61 0.44 0.21 0.41 0.51 -other/chameleon_t8_8k 0.41 0.46 0.39 0.51 0.6 0.6 0.79 0.58 0.4 0.09 0.4 0.4 -other/hdbscan 0.4 0.72 0.54 0.86 0.98 0.74 0.74 0.86 0.77 0.07 0.47 0.91 -other/iris 0.86 0.84 0.76 0.95 0.94 0.94 0.55 0.94 0.84 0.52 0.85 0.84 -other/iris5 0.8 0.77 0.66 0.93 0.66 0.66 0.91 0.54 0.77 0.31 0.79 0.77 -other/square 0.39 0.01 0.41 0.19 1 1 1 1 0.17 1 0.41 0.5 -sipu/a1 0.96 0.96 0.96 0.98 0.97 0.91 0.84 0.82 0.98 0.37 0.97 0.95 -sipu/a2 0.97 0.97 0.95 0.98 0.97 0.94 0.83 0.83 0.98 0.3 0.93 0.96 -sipu/a3 0.97 0.97 0.96 0.96 0.98 0.95 0.84 0.83 0.96 0.25 0.95 0.97 -sipu/aggregation 1 0.82 0.75 1 0.55 0.63 0.86 0.63 0.75 0.79 1 0.81 -sipu/compound 0.94 0.81 0.94 0.94 0.75 0.76 0.89 0.66 0.73 0.94 0.82 0.81 -sipu/d31 0.94 0.96 0.96 0.97 0.97 0.93 0.76 0.85 0.98 0.24 0.97 0.96 -sipu/flame 0.67 0.47 0.03 0.59 1 1 1 0.6 0.69 0.29 0.91 0.47 -sipu/jain 0.89 0.72 0.89 0.16 0.25 1 1 0.57 0.57 0.62 0.72 0.72 -sipu/pathbased 0.66 0.64 0.52 0.66 0.98 0.98 0.74 0.68 0.62 0.09 0.68 0.64 -sipu/r15 1 1 1 1 0.99 0.99 1 0.99 1 1 1 1 -sipu/s1 0.99 0.99 0.98 0.99 0.99 0.99 0.99 0.8 0.99 0.44 0.99 0.99 -sipu/s2 0.95 0.95 0.84 0.97 0.96 0.96 0.84 0.86 0.97 0.01 0.97 0.95 -sipu/s3 0.67 0.8 0.57 0.85 0.82 0.77 0.63 0.75 0.84 0.01 0.84 0.81 -sipu/s4 0.59 0.68 0.53 0.79 0.77 0.73 0.54 0.66 0.78 0.01 0.7 0.68 -sipu/spiral 0.04 0.07 0.06 0.03 1 1 1 0.83 0.01 1 0.02 0.07 -sipu/unbalance 1 1 0.64 1 0.47 0.59 0.75 0.41 1 0.98 1 1 -uci/ecoli 0.73 0.59 0.68 0.66 0.46 0.51 0.65 0.46 0.57 0.37 0.58 0.59 -uci/glass 0.25 0.38 0.38 0.41 0.27 0.38 0.38 0.39 0.45 0.24 0.39 0.4 -uci/ionosphere 0.29 0.44 0.29 0.64 0.46 0.46 0.08 0.3 0.42 0.29 0.29 0.44 -uci/sonar 0.11 0.02 0.02 0.08 0.02 0.02 0.11 0.08 0.11 0.08 0.08 0.02 -uci/statlog 0 0.45 0.17 0.53 0.74 0.59 0.51 0.6 0.43 0 nan 0.41 -uci/wdbc 0.33 0.56 0.33 0.84 0.31 0.55 0.55 0.79 0.71 0.26 0.26 0.56 -uci/wine 0.42 0.54 0.51 0.91 0.57 0.57 0.3 0.58 0.55 0.14 0.44 0.54 -uci/yeast 0.25 0.32 0.28 0.29 0.24 0.36 0.3 0.21 0.32 0.24 0.25 0.29 -wut/circles 1 1 1 1 1 1 1 1 1 1 1 1 -wut/cross 0.03 0.17 0.32 0.48 0.87 0.68 0.38 0.44 0.39 0 0.01 0.31 -wut/graph 0.56 0.58 0.56 0.93 0.63 0.59 0.45 0.63 0.59 0.07 0.58 0.62 -wut/isolation 0.03 0.04 0.02 0.01 1 1 1 1 0.01 1 0.01 0.05 -wut/labirynth 0.5 0.59 0.46 0.65 0.55 0.61 0.72 0.72 0.44 0.72 0.51 0.46 -wut/mk1 1 1 0.98 1 1 1 1 0.68 1 0.5 1 1 -wut/mk2 0.07 0.11 0.09 0.09 1 1 1 1 0.09 1 0.08 0.07 -wut/mk3 0.5 0.92 0.92 0.94 0.88 0.88 0.58 0.68 0.94 0 0.94 0.93 -wut/mk4 0.18 0.31 0.42 0.55 1 1 1 0.74 0.37 1 0.39 0.36 -wut/olympic 0.27 0.26 0.21 0.2 0.32 0.29 0.25 0.36 0.22 0 0.26 0.21 -wut/smile 0.99 0.71 0.83 0.54 0.63 1 1 0.58 0.72 1 1 0.77 -wut/stripes 0.05 0.04 0.11 0.11 1 1 1 1 0.11 1 0.11 0.12 -wut/trajectories 1 1 0.74 1 1 1 1 1 1 1 1 1 -wut/trapped_lovers 0.25 0.29 0.5 0.3 0.57 1 1 0.56 0.36 1 0.88 0.37 -wut/twosplashes 0.05 0.53 0.47 0.82 0.71 0.71 0.71 0.86 0.53 0.01 0.53 0.44 -wut/windows 0.35 0.34 0.19 0.42 0.31 0.35 1 0.33 0.3 1 0.18 0.31 -wut/x1 1 1 1 1 1 1 1 1 1 1 1 1 -wut/x2 0.61 0.99 1 0.84 0.84 0.84 0.56 0.84 0.99 0.22 0.42 0.99 -wut/x3 0.97 0.99 0.7 0.97 0.94 0.97 0.97 0.69 1 0.39 0.58 0.99 -wut/z1 0.32 0.27 0.36 0.11 0.5 0.5 0.3 0.5 0.31 0.06 0.33 0.27 -wut/z2 0.59 0.67 0.59 1 0.66 0.7 0.81 0.65 0.64 0.86 0.99 0.57 -wut/z3 1 1 0.96 1 0.67 0.68 0.95 0.74 1 0.73 0.99 1 -====================== ================= ============ ================== ==================== =========== =========== =========== ===== ========= ================ ================ ============== - - -psi -^^^ - -Summary statistics for ``psi`` (best=1.0): - - -================= ======= ====== ===== ===== ===== ===== ===== ===== -.. count mean std min 25% 50% 75% max -================= ======= ====== ===== ===== ===== ===== ===== ===== -Average linkage 73 0.51 0.37 0 0.19 0.45 0.94 1 -Birch_0.01 73 0.56 0.34 0 0.24 0.53 0.94 1 -Complete linkage 73 0.49 0.33 0 0.26 0.4 0.75 1 -Gaussian mixtures 73 0.64 0.37 0 0.3 0.82 0.98 1 -Genie_0.1 73 0.73 0.32 0 0.47 0.89 1 1 -Genie_0.3 73 0.77 0.28 0 0.52 0.94 1 1 -Genie_0.5 73 0.75 0.31 0 0.51 0.9 1 1 -ITM 73 0.67 0.28 0.01 0.49 0.72 0.99 1 -K-means 73 0.57 0.34 0 0.28 0.53 0.96 1 -Single linkage 73 0.41 0.44 0 0 0.2 1 1 -Spectral_RBF_5 72 0.64 0.36 0 0.3 0.76 0.99 1 -Ward linkage 73 0.56 0.33 0 0.25 0.51 0.93 1 -================= ======= ====== ===== ===== ===== ===== ===== ===== - - -Ranks for ``psi`` (best=1): - - -================= ======= ====== ===== ===== ===== ===== ===== ===== -.. count mean std min 25% 50% 75% max -================= ======= ====== ===== ===== ===== ===== ===== ===== -Average linkage 72 6.7 3.6 1 4 7.5 10 12 -Birch_0.01 72 5.6 3.2 1 3 6 8 12 -Complete linkage 72 7.9 2.9 1 7 8 10 12 -Gaussian mixtures 72 4.6 4 1 1 3.5 9 12 -Genie_0.1 72 4 3.5 1 1 3 7 12 -Genie_0.3 72 3.6 3.3 1 1 2 5.2 12 -Genie_0.5 72 4.3 4.1 1 1 1 9.2 12 -ITM 72 5.3 3.9 1 1 5 9 12 -K-means 72 5 3.5 1 1 5 8 12 -Single linkage 72 7.8 5 1 1 11 12 12 -Spectral_RBF_5 72 5.4 3.6 1 1 6 8.2 11 -Ward linkage 72 5.9 3.3 1 3 6 8 12 -================= ======= ====== ===== ===== ===== ===== ===== ===== - - -Raw results for ``psi`` (best=1.0): - - -====================== ================= ============ ================== ==================== =========== =========== =========== ===== ========= ================ ================ ============== -.. Average linkage Birch_0.01 Complete linkage Gaussian mixtures Genie_0.1 Genie_0.3 Genie_0.5 ITM K-means Single linkage Spectral_RBF_5 Ward linkage -====================== ================= ============ ================== ==================== =========== =========== =========== ===== ========= ================ ================ ============== -fcps/atom 0.19 0.19 0.17 0.09 1 1 1 1 0.27 1 1 0.19 -fcps/chainlink 0.35 0.36 0.39 0.95 1 1 1 1 0.31 1 1 0.36 -fcps/engytime 0.13 0.88 0.11 0.98 0.85 0.85 0.85 0.91 0.88 0 0.92 0.77 -fcps/hepta 1 1 1 1 1 1 1 0.9 1 1 1 1 -fcps/lsun 0.45 0.51 0.51 1 1 1 1 1 0.54 1 0.81 0.46 -fcps/target 0.79 0.25 0.82 0.17 1 1 1 1 0.28 1 1 0.33 -fcps/tetra 0.99 0.97 0.99 1 1 1 1 1 1 0.01 1 0.97 -fcps/twodiamonds 1 1 0.99 1 0.98 0.98 0.98 0.99 1 0 1 1 -fcps/wingnut 1 1 1 0.93 1 1 1 1 0.93 1 0.88 0.78 -graves/dense 0.92 0.92 0.34 1 0.96 0.96 0.96 1 0.82 0.02 0.92 0.92 -graves/fuzzyx 0.76 0.9 0.94 1 0.74 0.59 0.5 0.95 1 0 0.67 0.79 -graves/line 0 0 0 1 0.02 1 1 0.2 0 1 1 0 -graves/parabolic 0.75 0.6 0.67 0.7 0.84 0.84 0.84 0.78 0.76 0 0.79 0.72 -graves/ring 0.2 0.21 0.3 0.09 1 1 1 1 0.01 1 1 0.21 -graves/ring_noisy 0 0.2 0.35 0.08 1 1 1 1 0.25 0 1 0.24 -graves/ring_outliers 0.19 0.17 0.15 0.14 1 1 1 1 0.24 1 1 0.17 -graves/zigzag 0.54 0.64 0.38 0.96 1 1 1 0.79 0.6 1 0.71 0.53 -graves/zigzag_noisy 0.51 0.68 0.42 0.93 0.8 1 1 0.62 0.52 0.4 0.68 0.71 -graves/zigzag_outliers 0.17 0.4 0.29 0.98 1 1 1 0.83 0.33 0.4 0.22 0.25 -other/chameleon_t4_8k 0.54 0.51 0.48 0.47 0.64 1 0.72 0.69 0.52 0 0.52 0.48 -other/chameleon_t5_8k 1 1 0.73 1 1 1 0.71 0.57 1 0 1 1 -other/chameleon_t7_10k 0.33 0.28 0.28 0.27 0.42 0.71 1 0.46 0.31 0 0.31 0.37 -other/chameleon_t8_8k 0.28 0.34 0.26 0.34 0.41 0.34 0.68 0.4 0.28 0 0.27 0.28 -other/hdbscan 0.19 0.56 0.39 0.74 0.97 0.69 0.69 0.78 0.72 0 0.34 0.86 -other/iris 0.76 0.74 0.64 0.91 0.9 0.9 0.4 0.9 0.76 0.38 0.75 0.74 -other/iris5 0.77 0.75 0.65 0.91 0.34 0.34 0.91 0.28 0.76 0.41 0.75 0.75 -other/square 0.24 0.01 0.26 0.17 1 1 1 1 0.15 1 0.26 0.34 -sipu/a1 0.94 0.94 0.93 0.97 0.95 0.88 0.76 0.74 0.98 0.2 0.95 0.93 -sipu/a2 0.95 0.95 0.92 0.97 0.96 0.92 0.75 0.77 0.98 0.14 0.9 0.93 -sipu/a3 0.95 0.95 0.93 0.94 0.97 0.94 0.77 0.76 0.94 0.1 0.92 0.95 -sipu/aggregation 1 0.7 0.67 1 0.31 0.45 0.8 0.47 0.65 0.58 0.99 0.7 -sipu/compound 0.64 0.67 0.64 0.67 0.59 0.67 0.7 0.54 0.68 0.64 0.65 0.67 -sipu/d31 0.91 0.95 0.95 0.96 0.95 0.9 0.69 0.81 0.97 0.15 0.95 0.94 -sipu/flame 0.48 0.17 0 0.36 1 1 1 0.37 0.56 0.01 0.91 0.17 -sipu/jain 0.74 0.53 0.74 0 0.01 1 1 0.39 0.39 0.21 0.53 0.53 -sipu/pathbased 0.4 0.49 0.35 0.4 0.97 0.97 0.6 0.57 0.42 0 0.43 0.49 -sipu/r15 1 1 1 1 0.99 0.99 1 0.99 1 1 1 1 -sipu/s1 0.99 0.99 0.97 0.99 0.99 0.99 0.99 0.73 0.99 0.28 0.99 0.99 -sipu/s2 0.94 0.92 0.75 0.96 0.94 0.94 0.78 0.8 0.96 0 0.96 0.93 -sipu/s3 0.55 0.77 0.46 0.82 0.76 0.7 0.51 0.69 0.82 0 0.8 0.76 -sipu/s4 0.44 0.62 0.4 0.75 0.72 0.67 0.41 0.59 0.75 0 0.59 0.63 -sipu/spiral 0.03 0.06 0.05 0.03 1 1 1 0.72 0.01 1 0.02 0.06 -sipu/unbalance 1 1 0.74 1 0.17 0.21 0.26 0.15 1 0.78 1 1 -uci/ecoli 0.39 0.41 0.34 0.39 0.29 0.3 0.27 0.33 0.4 0.18 0.34 0.41 -uci/glass 0.06 0.21 0.21 0.22 0.19 0.27 0.21 0.22 0.27 0.05 0.16 0.22 -uci/ionosphere 0.01 0.35 0.01 0.53 0.4 0.4 0 0.17 0.34 0.01 0.01 0.35 -uci/sonar 0.03 0 0 0.01 0 0 0.04 0.01 0.05 0.01 0.01 0 -uci/statlog 0 0.35 0.13 0.42 0.71 0.52 0.42 0.52 0.32 0 nan 0.32 -uci/wdbc 0.06 0.3 0.06 0.73 0.06 0.3 0.3 0.73 0.5 0 0 0.3 -uci/wine 0.23 0.48 0.42 0.83 0.46 0.46 0.13 0.46 0.48 0.03 0.25 0.48 -uci/yeast 0.17 0.23 0.17 0.18 0.16 0.17 0.13 0.13 0.24 0.13 0.14 0.21 -wut/circles 1 1 1 1 1 1 1 1 1 1 1 1 -wut/cross 0.02 0.11 0.17 0.3 0.78 0.55 0.27 0.33 0.28 0 0.01 0.22 -wut/graph 0.38 0.42 0.38 0.88 0.53 0.46 0.26 0.55 0.43 0.05 0.4 0.46 -wut/isolation 0.03 0.03 0.02 0.01 1 1 1 1 0.01 1 0.01 0.04 -wut/labirynth 0.31 0.42 0.31 0.57 0.35 0.46 0.69 0.69 0.31 0.52 0.34 0.29 -wut/mk1 0.99 0.99 0.97 0.99 0.99 0.99 0.99 0.55 0.99 0.35 0.99 0.99 -wut/mk2 0.07 0.09 0.09 0.08 1 1 1 1 0.09 1 0.07 0.06 -wut/mk3 0.35 0.89 0.87 0.92 0.82 0.82 0.43 0.56 0.93 0 0.92 0.91 -wut/mk4 0.1 0.2 0.3 0.55 1 1 1 0.63 0.25 1 0.27 0.24 -wut/olympic 0.22 0.24 0.2 0.18 0.25 0.26 0.21 0.31 0.21 0 0.24 0.19 -wut/smile 0.98 0.65 0.58 0.43 0.47 1 1 0.62 0.66 1 1 0.69 -wut/stripes 0.04 0.03 0.09 0.11 1 1 1 1 0.1 1 0.11 0.08 -wut/trajectories 1 1 0.62 1 1 1 1 1 1 1 1 1 -wut/trapped_lovers 0.13 0.15 0.34 0.17 0.5 1 1 0.49 0.21 1 0.77 0.21 -wut/twosplashes 0.03 0.53 0.44 0.82 0.71 0.71 0.71 0.75 0.53 0.01 0.53 0.37 -wut/windows 0.28 0.28 0.06 0.34 0.29 0.39 1 0.2 0.24 1 0.08 0.26 -wut/x1 1 1 1 1 1 1 1 1 1 1 1 1 -wut/x2 0.56 0.98 1 0.74 0.74 0.74 0.5 0.75 0.98 0 0.19 0.98 -wut/x3 0.92 0.98 0.59 0.94 0.89 0.92 0.92 0.49 1 0.02 0.45 0.98 -wut/z1 0.25 0.2 0.33 0.09 0.38 0.38 0.19 0.38 0.31 0.04 0.31 0.2 -wut/z2 0.45 0.56 0.39 1 0.56 0.47 0.63 0.56 0.51 0.47 0.99 0.51 -wut/z3 1 0.99 0.92 0.99 0.49 0.44 0.9 0.6 1 0.55 0.97 1 -====================== ================= ============ ================== ==================== =========== =========== =========== ===== ========= ================ ================ ============== - - - - - -Summary -^^^^^^^ - -Medians and means of the partition similarity scores -(read row-wise, in groups of 2 columns): - - -.. figure:: figures/benchmarks_details_indices_small_1.png - :width: 15 cm - - Heat map of median and mean similarity scores - - - - - - -Large Datasets --------------- - - - - - - - -nca -^^^ - -Summary statistics for ``nca`` (best=1.0): - - -============== ======= ====== ===== ===== ===== ===== ===== ===== -.. count mean std min 25% 50% 75% max -============== ======= ====== ===== ===== ===== ===== ===== ===== -Genie_0.1 6 0.75 0.22 0.43 0.6 0.76 0.92 1 -Genie_0.3 6 0.61 0.33 0.16 0.38 0.6 0.88 1 -Genie_0.5 6 0.39 0.43 0 0.06 0.25 0.71 1 -ITM 6 0.71 0.15 0.52 0.6 0.75 0.79 0.9 -K-means 6 0.68 0.26 0.42 0.47 0.62 0.91 0.99 -Single linkage 6 0.1 0.24 0 0 0 0 0.59 -Ward linkage 6 0.69 0.23 0.44 0.5 0.65 0.86 1 -============== ======= ====== ===== ===== ===== ===== ===== ===== - - -Ranks for ``nca`` (best=1): - - -============== ======= ====== ===== ===== ===== ===== ===== ===== -.. count mean std min 25% 50% 75% max -============== ======= ====== ===== ===== ===== ===== ===== ===== -Genie_0.1 6 1.7 0.8 1 1 1.5 2 3 -Genie_0.3 6 3.5 1.8 1 2.2 4 5 5 -Genie_0.5 6 5 2 1 5.2 6 6 6 -ITM 6 3 2.1 1 1.2 2.5 4.5 6 -K-means 6 3.5 1.6 1 2.5 4 4.8 5 -Single linkage 6 6.8 0.4 6 7 7 7 7 -Ward linkage 6 3 1.3 1 2.2 3.5 4 4 -============== ======= ====== ===== ===== ===== ===== ===== ===== - - -Raw results for ``nca`` (best=1.0): - - -============= =========== =========== =========== ===== ========= ================ ============== -.. Genie_0.1 Genie_0.3 Genie_0.5 ITM K-means Single linkage Ward linkage -============= =========== =========== =========== ===== ========= ================ ============== -mnist/digits 0.69 0.34 0.06 0.9 0.48 0 0.58 -mnist/fashion 0.43 0.16 0.06 0.56 0.42 0 0.44 -sipu/birch1 0.94 0.94 0.8 0.8 0.96 0 0.91 -sipu/birch2 1 1 1 0.77 0.99 0.59 1 -sipu/worms_2 0.57 0.49 0.44 0.52 0.46 0 0.47 -sipu/worms_64 0.84 0.7 0 0.74 0.75 0 0.73 -============= =========== =========== =========== ===== ========= ================ ============== - - -ar -^^ - -Summary statistics for ``ar`` (best=1.0): - - -============== ======= ====== ===== ===== ===== ===== ===== ===== -.. count mean std min 25% 50% 75% max -============== ======= ====== ===== ===== ===== ===== ===== ===== -Genie_0.1 6 0.66 0.26 0.31 0.48 0.65 0.85 1 -Genie_0.3 6 0.52 0.37 0.07 0.25 0.46 0.81 1 -Genie_0.5 6 0.35 0.43 0 0.02 0.17 0.63 1 -ITM 6 0.6 0.18 0.37 0.46 0.65 0.71 0.8 -K-means 6 0.61 0.31 0.32 0.35 0.52 0.88 0.99 -Single linkage 6 0.07 0.18 0 0 0 0 0.44 -Ward linkage 6 0.58 0.27 0.33 0.38 0.47 0.75 1 -============== ======= ====== ===== ===== ===== ===== ===== ===== - - -Ranks for ``ar`` (best=1): - - -============== ======= ====== ===== ===== ===== ===== ===== ===== -.. count mean std min 25% 50% 75% max -============== ======= ====== ===== ===== ===== ===== ===== ===== -Genie_0.1 6 1.8 1.2 1 1 1.5 2 4 -Genie_0.3 6 3.2 1.7 1 2 3 4.8 5 -Genie_0.5 6 4.8 1.9 1 5 5.5 6 6 -ITM 6 3.3 2.3 1 1.5 3 5.2 6 -K-means 6 3.3 1.6 1 2.2 3.5 4.8 5 -Single linkage 6 6.8 0.4 6 7 7 7 7 -Ward linkage 6 3.2 1.5 1 2.2 3.5 4 5 -============== ======= ====== ===== ===== ===== ===== ===== ===== - - -Raw results for ``ar`` (best=1.0): - - -============= =========== =========== =========== ===== ========= ================ ============== -.. Genie_0.1 Genie_0.3 Genie_0.5 ITM K-means Single linkage Ward linkage -============= =========== =========== =========== ===== ========= ================ ============== -mnist/digits 0.6 0.21 0.02 0.8 0.37 0 0.53 -mnist/fashion 0.31 0.07 0.02 0.41 0.35 0 0.37 -sipu/birch1 0.89 0.89 0.73 0.72 0.94 0 0.83 -sipu/birch2 1 1 1 0.71 0.99 0.44 1 -sipu/worms_2 0.45 0.38 0.32 0.37 0.32 0 0.33 -sipu/worms_64 0.7 0.54 0 0.59 0.67 0 0.42 -============= =========== =========== =========== ===== ========= ================ ============== - - -fm -^^ - -Summary statistics for ``fm`` (best=1.0): - - -============== ======= ====== ===== ===== ===== ===== ===== ===== -.. count mean std min 25% 50% 75% max -============== ======= ====== ===== ===== ===== ===== ===== ===== -Genie_0.1 6 0.69 0.23 0.42 0.51 0.68 0.85 1 -Genie_0.3 6 0.6 0.29 0.31 0.4 0.49 0.82 1 -Genie_0.5 6 0.49 0.31 0.2 0.31 0.34 0.65 1 -ITM 6 0.62 0.16 0.39 0.51 0.66 0.72 0.82 -K-means 6 0.64 0.28 0.35 0.42 0.56 0.88 0.99 -Single linkage 6 0.28 0.15 0.1 0.19 0.26 0.32 0.53 -Ward linkage 6 0.61 0.25 0.36 0.45 0.52 0.77 1 -============== ======= ====== ===== ===== ===== ===== ===== ===== - - -Ranks for ``fm`` (best=1): - - -============== ======= ====== ===== ===== ===== ===== ===== ===== -.. count mean std min 25% 50% 75% max -============== ======= ====== ===== ===== ===== ===== ===== ===== -Genie_0.1 6 1.7 0.8 1 1 1.5 2 3 -Genie_0.3 6 3.3 2 1 2 3 4.8 6 -Genie_0.5 6 4.8 2.1 1 4.2 5.5 6 7 -ITM 6 3.3 2.3 1 1.5 3 5.2 6 -K-means 6 3.5 1.9 1 2.2 3.5 4.8 6 -Single linkage 6 6.3 0.8 5 6 6.5 7 7 -Ward linkage 6 3.2 1.5 1 2.2 3.5 4 5 -============== ======= ====== ===== ===== ===== ===== ===== ===== - - -Raw results for ``fm`` (best=1.0): - - -============= =========== =========== =========== ===== ========= ================ ============== -.. Genie_0.1 Genie_0.3 Genie_0.5 ITM K-means Single linkage Ward linkage -============= =========== =========== =========== ===== ========= ================ ============== -mnist/digits 0.65 0.41 0.31 0.82 0.43 0.32 0.58 -mnist/fashion 0.42 0.31 0.31 0.47 0.42 0.32 0.44 -sipu/birch1 0.89 0.89 0.75 0.72 0.94 0.1 0.83 -sipu/birch2 1 1 1 0.72 0.99 0.53 1 -sipu/worms_2 0.47 0.4 0.36 0.39 0.35 0.19 0.36 -sipu/worms_64 0.72 0.58 0.2 0.61 0.69 0.2 0.47 -============= =========== =========== =========== ===== ========= ================ ============== - - -ami -^^^ - -Summary statistics for ``ami`` (best=1.0): - - -============== ======= ====== ===== ===== ===== ===== ===== ===== -.. count mean std min 25% 50% 75% max -============== ======= ====== ===== ===== ===== ===== ===== ===== -Genie_0.1 6 0.77 0.17 0.57 0.67 0.74 0.9 1 -Genie_0.3 6 0.7 0.24 0.37 0.59 0.66 0.88 1 -Genie_0.5 6 0.49 0.42 0 0.19 0.41 0.84 1 -ITM 6 0.75 0.15 0.56 0.63 0.74 0.87 0.91 -K-means 6 0.72 0.22 0.5 0.53 0.66 0.91 1 -Single linkage 6 0.15 0.36 0 0 0 0 0.89 -Ward linkage 6 0.75 0.17 0.56 0.62 0.7 0.87 1 -============== ======= ====== ===== ===== ===== ===== ===== ===== - - -Ranks for ``ami`` (best=1): - - -============== ======= ====== ===== ===== ===== ===== ===== ===== -.. count mean std min 25% 50% 75% max -============== ======= ====== ===== ===== ===== ===== ===== ===== -Genie_0.1 6 1.3 0.5 1 1 1 1.8 2 -Genie_0.3 6 3 1.5 1 2 3 4 5 -Genie_0.5 6 4.5 2 1 4 5 6 6 -ITM 6 3.8 2.1 1 2.2 4 5.8 6 -K-means 6 2.8 2 1 1 2.5 4.8 5 -Single linkage 6 6.8 0.4 6 7 7 7 7 -Ward linkage 6 2.7 1.6 1 1.2 2.5 3.8 5 -============== ======= ====== ===== ===== ===== ===== ===== ===== - - -Raw results for ``ami`` (best=1.0): - - -============= =========== =========== =========== ===== ========= ================ ============== -.. Genie_0.1 Genie_0.3 Genie_0.5 ITM K-means Single linkage Ward linkage -============= =========== =========== =========== ===== ========= ================ ============== -mnist/digits 0.75 0.57 0.18 0.83 0.5 0 0.68 -mnist/fashion 0.57 0.37 0.21 0.56 0.51 0 0.56 -sipu/birch1 0.94 0.94 0.92 0.89 0.98 0 0.92 -sipu/birch2 1 1 1 0.91 1 0.89 1 -sipu/worms_2 0.65 0.63 0.61 0.62 0.6 0 0.6 -sipu/worms_64 0.72 0.68 0 0.66 0.72 0 0.72 -============= =========== =========== =========== ===== ========= ================ ============== - - -nmi -^^^ - -Summary statistics for ``nmi`` (best=1.0): - - -============== ======= ====== ===== ===== ===== ===== ===== ===== -.. count mean std min 25% 50% 75% max -============== ======= ====== ===== ===== ===== ===== ===== ===== -Genie_0.1 6 0.77 0.17 0.57 0.67 0.74 0.9 1 -Genie_0.3 6 0.7 0.24 0.37 0.59 0.66 0.88 1 -Genie_0.5 6 0.49 0.42 0 0.19 0.41 0.84 1 -ITM 6 0.75 0.15 0.56 0.63 0.75 0.87 0.91 -K-means 6 0.72 0.22 0.5 0.53 0.66 0.91 1 -Single linkage 6 0.15 0.36 0 0 0 0 0.89 -Ward linkage 6 0.75 0.17 0.56 0.62 0.7 0.87 1 -============== ======= ====== ===== ===== ===== ===== ===== ===== - - -Ranks for ``nmi`` (best=1): - - -============== ======= ====== ===== ===== ===== ===== ===== ===== -.. count mean std min 25% 50% 75% max -============== ======= ====== ===== ===== ===== ===== ===== ===== -Genie_0.1 6 1.5 0.5 1 1 1.5 2 2 -Genie_0.3 6 3 1.5 1 2 3 4 5 -Genie_0.5 6 4.5 2 1 4 5 6 6 -ITM 6 3.8 2.1 1 2.2 4 5.8 6 -K-means 6 3 1.9 1 1.2 3 4.8 5 -Single linkage 6 6.8 0.4 6 7 7 7 7 -Ward linkage 6 2.7 1.6 1 1.2 2.5 3.8 5 -============== ======= ====== ===== ===== ===== ===== ===== ===== - - -Raw results for ``nmi`` (best=1.0): - - -============= =========== =========== =========== ===== ========= ================ ============== -.. Genie_0.1 Genie_0.3 Genie_0.5 ITM K-means Single linkage Ward linkage -============= =========== =========== =========== ===== ========= ================ ============== -mnist/digits 0.75 0.57 0.18 0.83 0.5 0 0.68 -mnist/fashion 0.57 0.37 0.21 0.56 0.51 0 0.56 -sipu/birch1 0.94 0.94 0.92 0.89 0.98 0 0.92 -sipu/birch2 1 1 1 0.91 1 0.89 1 -sipu/worms_2 0.65 0.63 0.61 0.62 0.6 0 0.6 -sipu/worms_64 0.72 0.68 0 0.66 0.72 0 0.73 -============= =========== =========== =========== ===== ========= ================ ============== - - -npa -^^^ - -Summary statistics for ``npa`` (best=1.0): - - -============== ======= ====== ===== ===== ===== ===== ===== ===== -.. count mean std min 25% 50% 75% max -============== ======= ====== ===== ===== ===== ===== ===== ===== -Genie_0.1 6 0.75 0.22 0.43 0.61 0.77 0.92 1 -Genie_0.3 6 0.61 0.33 0.16 0.39 0.61 0.88 1 -Genie_0.5 6 0.4 0.43 0 0.06 0.26 0.71 1 -ITM 6 0.71 0.15 0.49 0.6 0.75 0.79 0.89 -K-means 6 0.67 0.26 0.42 0.44 0.62 0.91 0.99 -Single linkage 6 0.1 0.24 0 0 0.01 0.02 0.59 -Ward linkage 6 0.68 0.23 0.44 0.48 0.65 0.86 1 -============== ======= ====== ===== ===== ===== ===== ===== ===== - - -Ranks for ``npa`` (best=1): - - -============== ======= ====== ===== ===== ===== ===== ===== ===== -.. count mean std min 25% 50% 75% max -============== ======= ====== ===== ===== ===== ===== ===== ===== -Genie_0.1 6 1.7 0.8 1 1 1.5 2 3 -Genie_0.3 6 3.3 1.9 1 2 3.5 5 5 -Genie_0.5 6 4.8 1.9 1 5 5.5 6 6 -ITM 6 3.2 2 1 1.5 3 4.5 6 -K-means 6 3.7 1.9 1 2.5 4 4.8 6 -Single linkage 6 6.8 0.4 6 7 7 7 7 -Ward linkage 6 3 1.3 1 2.2 3.5 4 4 -============== ======= ====== ===== ===== ===== ===== ===== ===== - - -Raw results for ``npa`` (best=1.0): - - -============= =========== =========== =========== ===== ========= ================ ============== -.. Genie_0.1 Genie_0.3 Genie_0.5 ITM K-means Single linkage Ward linkage -============= =========== =========== =========== ===== ========= ================ ============== -mnist/digits 0.7 0.35 0.07 0.89 0.48 0.01 0.58 -mnist/fashion 0.43 0.16 0.06 0.56 0.42 0 0.44 -sipu/birch1 0.94 0.94 0.8 0.8 0.96 0 0.91 -sipu/birch2 1 1 1 0.77 0.99 0.59 1 -sipu/worms_2 0.58 0.51 0.44 0.49 0.43 0.03 0.45 -sipu/worms_64 0.84 0.7 0 0.74 0.75 0 0.73 -============= =========== =========== =========== ===== ========= ================ ============== - - -psi -^^^ - -Summary statistics for ``psi`` (best=1.0): - - -============== ======= ====== ===== ===== ===== ===== ===== ===== -.. count mean std min 25% 50% 75% max -============== ======= ====== ===== ===== ===== ===== ===== ===== -Genie_0.1 6 0.7 0.25 0.37 0.51 0.72 0.9 1 -Genie_0.3 6 0.57 0.35 0.13 0.31 0.53 0.86 1 -Genie_0.5 6 0.36 0.42 0 0.04 0.18 0.63 1 -ITM 6 0.65 0.16 0.39 0.57 0.69 0.73 0.85 -K-means 6 0.62 0.3 0.31 0.36 0.57 0.88 0.98 -Single linkage 6 0.09 0.21 0 0 0 0 0.52 -Ward linkage 6 0.62 0.28 0.31 0.39 0.6 0.83 1 -============== ======= ====== ===== ===== ===== ===== ===== ===== - - -Ranks for ``psi`` (best=1): - - -============== ======= ====== ===== ===== ===== ===== ===== ===== -.. count mean std min 25% 50% 75% max -============== ======= ====== ===== ===== ===== ===== ===== ===== -Genie_0.1 6 1.5 0.5 1 1 1.5 2 2 -Genie_0.3 6 3.3 1.9 1 2 3.5 5 5 -Genie_0.5 6 4.8 2 1 4.5 6 6 6 -ITM 6 3.2 2 1 1.5 3 4.5 6 -K-means 6 3.7 1.5 1 3.2 4 4.8 5 -Single linkage 6 6.8 0.4 6 7 7 7 7 -Ward linkage 6 3 1.4 1 2.2 3 3.8 5 -============== ======= ====== ===== ===== ===== ===== ===== ===== - - -Raw results for ``psi`` (best=1.0): - - -============= =========== =========== =========== ===== ========= ================ ============== -.. Genie_0.1 Genie_0.3 Genie_0.5 ITM K-means Single linkage Ward linkage -============= =========== =========== =========== ===== ========= ================ ============== -mnist/digits 0.6 0.28 0.05 0.85 0.43 0 0.5 -mnist/fashion 0.37 0.13 0.04 0.53 0.33 0 0.35 -sipu/birch1 0.92 0.92 0.73 0.75 0.94 0 0.87 -sipu/birch2 1 1 1 0.69 0.98 0.52 1 -sipu/worms_2 0.48 0.4 0.32 0.39 0.31 0 0.31 -sipu/worms_64 0.84 0.66 0 0.7 0.7 0 0.71 -============= =========== =========== =========== ===== ========= ================ ============== - - - - - - -Summary -^^^^^^^ - -Medians and means of the partition similarity scores: - - -.. figure:: figures/benchmarks_details_indices_large_1.png - :width: 15 cm - - Heat map of median and mean similarity scores - - diff --git a/.devel/sphinx/weave/noise-figures/noise-Genie1-3.pdf b/.devel/sphinx/weave/noise-figures/noise-Genie1-3.pdf new file mode 100644 index 00000000..0b3d715e Binary files /dev/null and b/.devel/sphinx/weave/noise-figures/noise-Genie1-3.pdf differ diff --git a/.devel/sphinx/weave/noise-figures/noise-Genie1-3.png b/.devel/sphinx/weave/noise-figures/noise-Genie1-3.png new file mode 100644 index 00000000..006cd697 Binary files /dev/null and b/.devel/sphinx/weave/noise-figures/noise-Genie1-3.png differ diff --git a/.devel/sphinx/weave/noise-figures/noise-Genie2-5.pdf b/.devel/sphinx/weave/noise-figures/noise-Genie2-5.pdf new file mode 100644 index 00000000..74883578 Binary files /dev/null and b/.devel/sphinx/weave/noise-figures/noise-Genie2-5.pdf differ diff --git a/.devel/sphinx/weave/noise-figures/noise-Genie2-5.png b/.devel/sphinx/weave/noise-figures/noise-Genie2-5.png new file mode 100644 index 00000000..c9eab6ae Binary files /dev/null and b/.devel/sphinx/weave/noise-figures/noise-Genie2-5.png differ diff --git a/.devel/sphinx/weave/noise-figures/noise-Genie3-7.pdf b/.devel/sphinx/weave/noise-figures/noise-Genie3-7.pdf new file mode 100644 index 00000000..0376327a Binary files /dev/null and b/.devel/sphinx/weave/noise-figures/noise-Genie3-7.pdf differ diff --git a/.devel/sphinx/weave/noise-figures/noise-Genie3-7.png b/.devel/sphinx/weave/noise-figures/noise-Genie3-7.png new file mode 100644 index 00000000..2cfbf8a2 Binary files /dev/null and b/.devel/sphinx/weave/noise-figures/noise-Genie3-7.png differ diff --git a/.devel/sphinx/weave/noise-figures/noise-HDBSCAN1-9.pdf b/.devel/sphinx/weave/noise-figures/noise-HDBSCAN1-9.pdf new file mode 100644 index 00000000..2b9e1532 Binary files /dev/null and b/.devel/sphinx/weave/noise-figures/noise-HDBSCAN1-9.pdf differ diff --git a/.devel/sphinx/weave/noise-figures/noise-HDBSCAN1-9.png b/.devel/sphinx/weave/noise-figures/noise-HDBSCAN1-9.png new file mode 100644 index 00000000..17219ff0 Binary files /dev/null and b/.devel/sphinx/weave/noise-figures/noise-HDBSCAN1-9.png differ diff --git a/.devel/sphinx/weave/noise-figures/noise-HDBSCAN2-11.pdf b/.devel/sphinx/weave/noise-figures/noise-HDBSCAN2-11.pdf new file mode 100644 index 00000000..7a0e1820 Binary files /dev/null and b/.devel/sphinx/weave/noise-figures/noise-HDBSCAN2-11.pdf differ diff --git a/.devel/sphinx/weave/noise-figures/noise-HDBSCAN2-11.png b/.devel/sphinx/weave/noise-figures/noise-HDBSCAN2-11.png new file mode 100644 index 00000000..81399e37 Binary files /dev/null and b/.devel/sphinx/weave/noise-figures/noise-HDBSCAN2-11.png differ diff --git a/.devel/sphinx/weave/noise-figures/noise-scatter-1.pdf b/.devel/sphinx/weave/noise-figures/noise-scatter-1.pdf new file mode 100644 index 00000000..f4ae59a4 Binary files /dev/null and b/.devel/sphinx/weave/noise-figures/noise-scatter-1.pdf differ diff --git a/.devel/sphinx/weave/noise-figures/noise-scatter-1.png b/.devel/sphinx/weave/noise-figures/noise-scatter-1.png new file mode 100644 index 00000000..e6dd8d15 Binary files /dev/null and b/.devel/sphinx/weave/noise-figures/noise-scatter-1.png differ diff --git a/.devel/sphinx/weave/noise.rstw b/.devel/sphinx/weave/noise.Rmd similarity index 63% rename from .devel/sphinx/weave/noise.rstw rename to .devel/sphinx/weave/noise.Rmd index 630605aa..bfa7ae05 100644 --- a/.devel/sphinx/weave/noise.rstw +++ b/.devel/sphinx/weave/noise.Rmd @@ -1,69 +1,60 @@ -Clustering with Noise Points Detection -====================================== +# Clustering with Noise Points Detection -<>= +```{python imports,results="hide"} import numpy as np import pandas as pd import matplotlib.pyplot as plt import genieclust -@ +``` -<>= -import pweave +```{python settings,results="hide",echo=FALSE} from tabulate import tabulate np.set_printoptions(precision=3, threshold=50, edgeitems=50) pd.set_option("display.min_rows", 200) -plt.style.use("bmh") -plt.rcParams.update({ - 'font.size': 9, - 'font.family': 'sans-serif', - 'font.sans-serif': ['Ubuntu Condensed', 'Alegreya', 'Alegreya Sans']}) -@ +``` Let's load an example dataset that can be found -the on `hdbscan `_ -:cite:`hdbscanpkg` package's project site: +the at [hdbscan](https://github.com/scikit-learn-contrib/hdbscan) +{cite}`hdbscanpkg` package's project site: -<>= +```{python noise-load} dataset = "hdbscan" X = np.loadtxt("%s.data.gz" % dataset, ndmin=2) -labels_true = np.loadtxt("%s.labels0.gz" % dataset, dtype=np.intp)-1 +labels_true = np.loadtxt("%s.labels0.gz" % dataset, dtype=np.intp) - 1 n_clusters = len(np.unique(labels_true[labels_true>=0])) -@ +``` Here are the "reference" labels as identified by an expert (of course, each dataset might reveal many different clusterings that a user might find useful for whatever their goal is). -Labels -1 denote noise points (light grey markers). +The `-1` labels denote noise points (light grey markers). -<>= +```{python noise-scatter,results="hide",fig.cap="Reference labels."} genieclust.plots.plot_scatter(X, labels=labels_true, alpha=0.5) plt.title("(n=%d, true n_clusters=%d)" % (X.shape[0], n_clusters)) plt.axis("equal") plt.show() -@ +``` -Smoothing Factor ----------------- +## Smoothing Factor The `genieclust` package allows for clustering with respect -to a mutual reachability distance, -:math:`d_M`, -known from the HDBSCAN\* algorithm :cite:`hdbscan`\ . -It is parameterised with *a smoothing factor*, ``M``, which +to a mutual reachability distance, $d_M$, +known from the HDBSCAN\* algorithm {cite}`hdbscan`. +It is parameterised by *a smoothing factor*, `M`, which controls how eagerly we tend to classify points as noise. Here are the effects of playing with the `M` parameter (we keep the default `gini_threshold`): -<>= +```{python noise-Genie1,results="hide",fig.cap="Labels predicted by Genie with noise point detection.",fig.height=5.9375} Ms = [2, 5, 10, 25] for i in range(len(Ms)): g = genieclust.Genie(n_clusters=n_clusters, M=Ms[i]) @@ -73,15 +64,15 @@ for i in range(len(Ms)): plt.title("(gini_threshold=%g, M=%d)"%(g.gini_threshold, g.M)) plt.axis("equal") plt.show() -@ +``` For a more natural look-and-feel, it can be a good idea to first identify -the noise points with Genie, remove them from the data set (or at least -temporarily disable), and then apply the clustering procedure once again +the noise points with Genie, remove them from the data set (at least temporarily), +and then apply the clustering procedure once again (did we mention that our algorithm is fast?) but now with respect to the original distance (here: Euclidean): -<>= +```{python noise-Genie2,results="hide",fig.cap="Labels predicted by Genie when noise points were removed from the dataset."} # Step 1: Noise point identification g1 = genieclust.Genie(n_clusters=n_clusters, M=50) labels_noise = g1.fit_predict(X) @@ -96,20 +87,20 @@ genieclust.plots.plot_scatter(X, labels=labels_noise, alpha=0.5) plt.title("(gini_threshold=%g, noise points removed first; M=%d)"%(g2.gini_threshold, g1.M)) plt.axis("equal") plt.show() -@ +``` -Contrary to an excellent implementation of HDBSCAN\* -that is featured in the `hdbscan `_ -package :cite:`hdbscanpkg` and which also relies on a minimum spanning tree -with respect to :math:`d_M`, -we still have the hierarchical Genie :cite:`genieins` algorithm under the hood here. -This means we can ask for any number of clusters and get what we asked for. +Contrary to the excellent implementation of HDBSCAN\* +that is featured in the [hdbscan](https://github.com/scikit-learn-contrib/hdbscan) +package {cite}`hdbscanpkg` and which also relies on a minimum spanning tree +with respect to $d_M$, here, +we still have the hierarchical Genie {cite}`genieins` algorithm under the hood. +It means that we can request a *specific* number of clusters. Moreover, we can easily switch between partitions of finer or coarser granularity. -<>= +```{python noise-Genie3,results="hide",fig.cap="Labels predicted by Genie when noise points were removed from the dataset – different number of clusters requested.",fig.height=5.9375} ncs = [5, 6, 7, 8, 10, 15] for i in range(len(ncs)): g = genieclust.Genie(n_clusters=ncs[i]) @@ -120,21 +111,20 @@ for i in range(len(ncs)): plt.title("(n_clusters=%d)"%(g.n_clusters)) plt.axis("equal") plt.show() -@ +``` -A Comparision with HDBSCAN\* ----------------------------- +## A Comparision with HDBSCAN\* Here are the results returned by `hdbscan` with default parameters: -<>= +```{python noise-import-HDBSCAN} import hdbscan -@ +``` -<>= +```{python noise-HDBSCAN1,results="hide",fig.cap="Labels predicted by HDBSCAN\\*."} h = hdbscan.HDBSCAN() labels_hdbscan = h.fit_predict(X) genieclust.plots.plot_scatter(X, labels=labels_hdbscan, alpha=0.5) @@ -142,15 +132,15 @@ plt.title("(min_cluster_size=%d, min_samples=%d)" % ( h.min_cluster_size, h.min_samples or h.min_cluster_size)) plt.axis("equal") plt.show() -@ +``` -By tuning `min_cluster_size` and/or `min_samples` (which corresponds to our `M` parameter; +By tuning up `min_cluster_size` and/or `min_samples` (which corresponds to our `M` parameter; by the way, `min_samples` defaults to `min_cluster_size` if not provided explicitly), we can obtain a partition that is even closer to the reference one: -<>= +```{python noise-HDBSCAN2,results="hide",fig.cap="Labels predicted by HDBSCAN\\* – different settings.",fig.height=5.9375} mcss = [5, 10, 25] mss = [5, 10] for i in range(len(mcss)): @@ -163,7 +153,7 @@ for i in range(len(mcss)): h.min_cluster_size, h.min_samples or h.min_cluster_size)) plt.axis("equal") plt.show() -@ +``` Neat. diff --git a/.devel/sphinx/weave/noise.md b/.devel/sphinx/weave/noise.md new file mode 100644 index 00000000..af5e6f92 --- /dev/null +++ b/.devel/sphinx/weave/noise.md @@ -0,0 +1,202 @@ + + + + +# Clustering with Noise Points Detection + + + +```python +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +import genieclust +``` + + + + + +Let's load an example dataset that can be found +the at [hdbscan](https://github.com/scikit-learn-contrib/hdbscan) +{cite}`hdbscanpkg` package's project site: + + + +```python +dataset = "hdbscan" +X = np.loadtxt("%s.data.gz" % dataset, ndmin=2) +labels_true = np.loadtxt("%s.labels0.gz" % dataset, dtype=np.intp) - 1 +n_clusters = len(np.unique(labels_true[labels_true>=0])) +``` + + + +Here are the "reference" labels as identified by an expert (of course, +each dataset might reveal many different clusterings that a user might +find useful for whatever their goal is). +The `-1` labels denote noise points (light grey markers). + + + +```python +genieclust.plots.plot_scatter(X, labels=labels_true, alpha=0.5) +plt.title("(n=%d, true n_clusters=%d)" % (X.shape[0], n_clusters)) +plt.axis("equal") +plt.show() +``` + +(fig:noise-scatter)= +```{figure} noise-figures/noise-scatter-1.* +Reference labels. +``` + + + +## Smoothing Factor + + +The `genieclust` package allows for clustering with respect +to a mutual reachability distance, $d_M$, +known from the HDBSCAN\* algorithm {cite}`hdbscan`. +It is parameterised by *a smoothing factor*, `M`, which +controls how eagerly we tend to classify points as noise. + +Here are the effects of playing with the `M` parameter +(we keep the default `gini_threshold`): + + + +```python +Ms = [2, 5, 10, 25] +for i in range(len(Ms)): + g = genieclust.Genie(n_clusters=n_clusters, M=Ms[i]) + labels_genie = g.fit_predict(X) + plt.subplot(2, 2, i+1) + genieclust.plots.plot_scatter(X, labels=labels_genie, alpha=0.5) + plt.title("(gini_threshold=%g, M=%d)"%(g.gini_threshold, g.M)) + plt.axis("equal") +plt.show() +``` + +(fig:noise-Genie1)= +```{figure} noise-figures/noise-Genie1-3.* +Labels predicted by Genie with noise point detection. +``` + +For a more natural look-and-feel, it can be a good idea to first identify +the noise points with Genie, remove them from the data set (at least temporarily), +and then apply the clustering procedure once again +(did we mention that our algorithm is fast?) +but now with respect to the original distance (here: Euclidean): + + + +```python +# Step 1: Noise point identification +g1 = genieclust.Genie(n_clusters=n_clusters, M=50) +labels_noise = g1.fit_predict(X) +non_noise = (labels_noise >= 0) # True == non-noise point +# Step 2: Clustering of non-noise points: +g2 = genieclust.Genie(n_clusters=n_clusters) +labels_genie = g2.fit_predict(X[non_noise, :]) +# Replace old labels with the new ones: +labels_noise[non_noise] = labels_genie +# Scatter plot: +genieclust.plots.plot_scatter(X, labels=labels_noise, alpha=0.5) +plt.title("(gini_threshold=%g, noise points removed first; M=%d)"%(g2.gini_threshold, g1.M)) +plt.axis("equal") +plt.show() +``` + +(fig:noise-Genie2)= +```{figure} noise-figures/noise-Genie2-5.* +Labels predicted by Genie when noise points were removed from the dataset. +``` + + +Contrary to the excellent implementation of HDBSCAN\* +that is featured in the [hdbscan](https://github.com/scikit-learn-contrib/hdbscan) +package {cite}`hdbscanpkg` and which also relies on a minimum spanning tree +with respect to $d_M$, here, +we still have the hierarchical Genie {cite}`genieins` algorithm under the hood. +It means that we can request a *specific* number of clusters. +Moreover, we can easily switch between partitions +of finer or coarser granularity. + + + + +```python +ncs = [5, 6, 7, 8, 10, 15] +for i in range(len(ncs)): + g = genieclust.Genie(n_clusters=ncs[i]) + labels_genie = g.fit_predict(X[non_noise, :]) + plt.subplot(3, 2, i+1) + labels_noise[non_noise] = labels_genie + genieclust.plots.plot_scatter(X, labels=labels_noise, alpha=0.5) + plt.title("(n_clusters=%d)"%(g.n_clusters)) + plt.axis("equal") +plt.show() +``` + +(fig:noise-Genie3)= +```{figure} noise-figures/noise-Genie3-7.* +Labels predicted by Genie when noise points were removed from the dataset – different number of clusters requested. +``` + + + +## A Comparision with HDBSCAN\* + + +Here are the results returned by `hdbscan` with default parameters: + + + +```python +import hdbscan +h = hdbscan.HDBSCAN() +labels_hdbscan = h.fit_predict(X) +genieclust.plots.plot_scatter(X, labels=labels_hdbscan, alpha=0.5) +plt.title("(min_cluster_size=%d, min_samples=%d)" % ( + h.min_cluster_size, h.min_samples or h.min_cluster_size)) +plt.axis("equal") +plt.show() +``` + +(fig:noise-HDBSCAN1)= +```{figure} noise-figures/noise-HDBSCAN1-9.* +Labels predicted by HDBSCAN\*. +``` + + +By tuning up `min_cluster_size` and/or `min_samples` (which corresponds to our `M` parameter; +by the way, `min_samples` defaults to `min_cluster_size` if not provided explicitly), +we can obtain a partition that is even closer to the reference one: + + + + +```python +mcss = [5, 10, 25] +mss = [5, 10] +for i in range(len(mcss)): + for j in range(len(mss)): + h = hdbscan.HDBSCAN(min_cluster_size=mcss[i], min_samples=mss[j]) + labels_hdbscan = h.fit_predict(X) + plt.subplot(3, 2, i*len(mss)+j+1) + genieclust.plots.plot_scatter(X, labels=labels_hdbscan, alpha=0.5) + plt.title("(min_cluster_size=%d, min_samples=%d)" % ( + h.min_cluster_size, h.min_samples or h.min_cluster_size)) + plt.axis("equal") +plt.show() +``` + +(fig:noise-HDBSCAN2)= +```{figure} noise-figures/noise-HDBSCAN2-11.* +Labels predicted by HDBSCAN\* – different settings. +``` + +Neat. + diff --git a/.devel/sphinx/weave/noise.rst b/.devel/sphinx/weave/noise.rst deleted file mode 100644 index e79ec479..00000000 --- a/.devel/sphinx/weave/noise.rst +++ /dev/null @@ -1,226 +0,0 @@ -Clustering with Noise Points Detection -====================================== - - -.. code-block:: python - - import numpy as np - import pandas as pd - import matplotlib.pyplot as plt - import genieclust - - - - - - - - - -Let's load an example dataset that can be found -the on `hdbscan `_ -:cite:`hdbscanpkg` package's project site: - - -.. code-block:: python - - dataset = "hdbscan" - X = np.loadtxt("%s.data.gz" % dataset, ndmin=2) - labels_true = np.loadtxt("%s.labels0.gz" % dataset, dtype=np.intp)-1 - n_clusters = len(np.unique(labels_true[labels_true>=0])) - - - - - - -Here are the "reference" labels as identified by an expert (of course, -each dataset might reveal many different clusterings that a user might -find useful for whatever their goal is). -Labels -1 denote noise points (light grey markers). - - -.. code-block:: python - - genieclust.plots.plot_scatter(X, labels=labels_true, alpha=0.5) - plt.title("(n=%d, true n_clusters=%d)" % (X.shape[0], n_clusters)) - plt.axis("equal") - plt.show() - - -.. figure:: figures/noise_noise-scatter_1.png - :width: 15 cm - - Reference labels. - - - - - -Smoothing Factor ----------------- - - -The `genieclust` package allows for clustering with respect -to a mutual reachability distance, -:math:`d_M`, -known from the HDBSCAN\* algorithm :cite:`hdbscan`\ . -It is parameterised with *a smoothing factor*, ``M``, which -controls how eagerly we tend to classify points as noise. - -Here are the effects of playing with the `M` parameter -(we keep the default `gini_threshold`): - - -.. code-block:: python - - Ms = [2, 5, 10, 25] - for i in range(len(Ms)): - g = genieclust.Genie(n_clusters=n_clusters, M=Ms[i]) - labels_genie = g.fit_predict(X) - plt.subplot(2, 2, i+1) - genieclust.plots.plot_scatter(X, labels=labels_genie, alpha=0.5) - plt.title("(gini_threshold=%g, M=%d)"%(g.gini_threshold, g.M)) - plt.axis("equal") - plt.show() - - -.. figure:: figures/noise_noise-Genie1_1.png - :width: 15 cm - - Labels predicted by Genie with noise point detection. - - - -For a more natural look-and-feel, it can be a good idea to first identify -the noise points with Genie, remove them from the data set (or at least -temporarily disable), and then apply the clustering procedure once again -(did we mention that our algorithm is fast?) -but now with respect to the original distance (here: Euclidean): - - -.. code-block:: python - - # Step 1: Noise point identification - g1 = genieclust.Genie(n_clusters=n_clusters, M=50) - labels_noise = g1.fit_predict(X) - non_noise = (labels_noise >= 0) # True == non-noise point - # Step 2: Clustering of non-noise points: - g2 = genieclust.Genie(n_clusters=n_clusters) - labels_genie = g2.fit_predict(X[non_noise, :]) - # Replace old labels with the new ones: - labels_noise[non_noise] = labels_genie - # Scatter plot: - genieclust.plots.plot_scatter(X, labels=labels_noise, alpha=0.5) - plt.title("(gini_threshold=%g, noise points removed first; M=%d)"%(g2.gini_threshold, g1.M)) - plt.axis("equal") - plt.show() - - -.. figure:: figures/noise_noise-Genie2_1.png - :width: 15 cm - - Labels predicted by Genie when noise points were removed from the dataset. - - - - -Contrary to an excellent implementation of HDBSCAN\* -that is featured in the `hdbscan `_ -package :cite:`hdbscanpkg` and which also relies on a minimum spanning tree -with respect to :math:`d_M`, -we still have the hierarchical Genie :cite:`genieins` algorithm under the hood here. -This means we can ask for any number of clusters and get what we asked for. -Moreover, we can easily switch between partitions -of finer or coarser granularity. - - - -.. code-block:: python - - ncs = [5, 6, 7, 8, 10, 15] - for i in range(len(ncs)): - g = genieclust.Genie(n_clusters=ncs[i]) - labels_genie = g.fit_predict(X[non_noise, :]) - plt.subplot(3, 2, i+1) - labels_noise[non_noise] = labels_genie - genieclust.plots.plot_scatter(X, labels=labels_noise, alpha=0.5) - plt.title("(n_clusters=%d)"%(g.n_clusters)) - plt.axis("equal") - plt.show() - - -.. figure:: figures/noise_noise-Genie3_1.png - :width: 15 cm - - Labels predicted by Genie when noise points were removed from the dataset. - - - - - -A Comparision with HDBSCAN\* ----------------------------- - - -Here are the results returned by `hdbscan` with default parameters: - - -.. code-block:: python - - import hdbscan - - - - - -.. code-block:: python - - h = hdbscan.HDBSCAN() - labels_hdbscan = h.fit_predict(X) - genieclust.plots.plot_scatter(X, labels=labels_hdbscan, alpha=0.5) - plt.title("(min_cluster_size=%d, min_samples=%d)" % ( - h.min_cluster_size, h.min_samples or h.min_cluster_size)) - plt.axis("equal") - plt.show() - - -.. figure:: figures/noise_noise-HDBSCAN1_1.png - :width: 15 cm - - Labels predicted by HDBSCAN\*. - - - - -By tuning `min_cluster_size` and/or `min_samples` (which corresponds to our `M` parameter; -by the way, `min_samples` defaults to `min_cluster_size` if not provided explicitly), -we can obtain a partition that is even closer to the reference one: - - - -.. code-block:: python - - mcss = [5, 10, 25] - mss = [5, 10] - for i in range(len(mcss)): - for j in range(len(mss)): - h = hdbscan.HDBSCAN(min_cluster_size=mcss[i], min_samples=mss[j]) - labels_hdbscan = h.fit_predict(X) - plt.subplot(3, 2, i*len(mss)+j+1) - genieclust.plots.plot_scatter(X, labels=labels_hdbscan, alpha=0.5) - plt.title("(min_cluster_size=%d, min_samples=%d)" % ( - h.min_cluster_size, h.min_samples or h.min_cluster_size)) - plt.axis("equal") - plt.show() - - -.. figure:: figures/noise_noise-HDBSCAN2_1.png - :width: 15 cm - - Labels predicted by HDBSCAN\*. - - - -Neat. - diff --git a/.devel/sphinx/weave/sparse.Rmd b/.devel/sphinx/weave/sparse.Rmd new file mode 100644 index 00000000..662e86dc --- /dev/null +++ b/.devel/sphinx/weave/sparse.Rmd @@ -0,0 +1,140 @@ +# Example: Sparse Data and Movie Recommendation + +To illustrate how *genieclust* handles +[sparse data](https://en.wikipedia.org/wiki/Sparse_matrix), +let's perform a simple exercise in movie recommendation based on +the [MovieLens](https://grouplens.org/datasets/movielens/latest/) data. + +::::{important} +Make sure that the [*nmslib*](https://github.com/nmslib/nmslib) +package (an optional dependency) is installed. + +```{python} +import nmslib +``` +:::: + +```{python sparse-example-imports} +import numpy as np +import scipy.sparse +import pandas as pd +``` + +```{python settings,results="hide",echo=FALSE} +#import pweave +from tabulate import tabulate +np.set_printoptions(precision=3, threshold=50, edgeitems=50) +pd.set_option("display.min_rows", 200) +#pd.set_option("display.width", 80) +#pd.set_option("display.max_colwidth", 30) +#plt.style.use("bmh") +#plt.rcParams.update({ +# 'font.size': 9, +# 'font.family': 'sans-serif', +# 'font.sans-serif': ['Ubuntu Condensed', 'Alegreya', 'Alegreya Sans']}) +``` + + +First, we load the `ratings` data frame +and map the movie IDs to consecutive integers. + +```{python sparse-example-ratings} +ratings = pd.read_csv("ml-9-2018-small/ratings.csv") +ratings["movieId"] -= 1 +ratings["userId"] -= 1 +old_movieId_map = np.unique(ratings["movieId"]) +ratings["movieId"] = np.searchsorted(old_movieId_map, ratings["movieId"]) +ratings.head() +``` + +Then, we read the movie metadata and transform the movie IDs +in the same way: + +```{python sparse-example-movies} +movies = pd.read_csv("ml-9-2018-small/movies.csv") +movies["movieId"] -= 1 +movies = movies.loc[movies.movieId.isin(old_movieId_map), :] +movies["movieId"] = np.searchsorted(old_movieId_map, movies["movieId"]) +movies.iloc[:, :2].head() +``` + + +Conversion of ratings to a CSR-format sparse matrix: + +```{python sparse-example-create} +n = ratings.movieId.max()+1 +d = ratings.userId.max()+1 +X = scipy.sparse.dok_matrix((n,d), dtype=np.float32) +X[ratings.movieId, ratings.userId] = ratings.rating +X = X.tocsr() +print(repr(X)) +``` + + +First few observations: + +```{python sparse-example-show} +X[:5, :10].todense() +``` + +Let's extract 200 clusters with Genie using the cosine similarity between films' ratings +as given by users (two movies considered similar if they get similar reviews). +Sparse inputs are supported by the approximate version of the algorithm +which relies on the near-neighbour search routines implemented in the *nmslib* package. + + +```{python sparse-example-cluster} +import genieclust +g = genieclust.Genie(n_clusters=200, exact=False, affinity="cosinesimil_sparse") +movies["cluster"] = g.fit_predict(X) +``` + +Here are the members of an example cluster: + +```{python sparse-example-report} +movies["cluster"] = g.fit_predict(X) +which_cluster = movies.cluster[movies.title=="Monty Python's The Meaning of Life (1983)"] +movies.loc[movies.cluster == int(which_cluster)].title.sort_values() +``` + + + + +The above was performed on an abridged version of the MovieLens dataset. +The project's [website](https://grouplens.org/datasets/movielens/latest/) +also features a full database that yields a 53,889x283,228 ratings table +(with 27,753,444 non-zero elements). Such a matrix would definitely +not fit into our RAM if it was given in the dense form. +Determining the whole cluster hierarchy takes only 144 seconds. +Here is one of the 500 clusters extracted: + +``` +## 13327 Blackadder Back & Forth (1999) +## 13328 Blackadder's Christmas Carol (1988) +## 3341 Creature Comforts (1989) +## 1197 Grand Day Out with Wallace and Gromit, A (1989) +## 2778 Hard Day's Night, A (1964) +## 2861 Help! (1965) +## 2963 How I Won the War (1967) +## 6006 Monty Python Live at the Hollywood Bowl (1982) +## 1113 Monty Python and the Holy Grail (1975) +## 2703 Monty Python's And Now for Something Completel... +## 1058 Monty Python's Life of Brian (1979) +## 6698 Monty Python's The Meaning of Life (1983) +## 27284 Oliver Twist (1997) +## 2216 Producers, The (1968) +## 4716 Quadrophenia (1979) +## 6027 Secret Policeman's Other Ball, The (1982) +## 27448 The Basket (2000) +## 2792 Tommy (1975) +## 10475 Wallace & Gromit in The Curse of the Were-Rabb... +## 732 Wallace & Gromit: A Close Shave (1995) +## 708 Wallace & Gromit: The Best of Aardman Animatio... +## 1125 Wallace & Gromit: The Wrong Trousers (1993) +## 13239 Wallace and Gromit in 'A Matter of Loaf and De... +## 2772 Yellow Submarine (1968) +## 1250 Young Frankenstein (1974) +## Name: title, dtype: object +``` + + diff --git a/.devel/sphinx/weave/sparse.rstw b/.devel/sphinx/weave/sparse.rstw deleted file mode 100644 index 8abb4d46..00000000 --- a/.devel/sphinx/weave/sparse.rstw +++ /dev/null @@ -1,138 +0,0 @@ -Example: Sparse Data and Movie Recommendation -============================================= - -To illustrate how *genieclust* handles -`sparse data `_, -let's perform a simple exercise in movie recommendation based on -`MovieLens `_ data. - -.. important:: - - Make sure that the *nmslib* package (an optional dependency) is installed. - - -<>= -import numpy as np -import scipy.sparse -import pandas as pd -@ - -<>= -import pweave -from tabulate import tabulate -np.set_printoptions(precision=3, threshold=50, edgeitems=50) -pd.set_option("display.min_rows", 200) -#pd.set_option("display.width", 80) -#pd.set_option("display.max_colwidth", 30) -#plt.style.use("bmh") -#plt.rcParams.update({ -# 'font.size': 9, -# 'font.family': 'sans-serif', -# 'font.sans-serif': ['Ubuntu Condensed', 'Alegreya', 'Alegreya Sans']}) -@ - - -First we load the `ratings` data frame -and map the movie IDs to consecutive integers. - -<>= -ratings = pd.read_csv("ml-9-2018-small/ratings.csv") -ratings["movieId"] -= 1 -ratings["userId"] -= 1 -old_movieId_map = np.unique(ratings["movieId"]) -ratings["movieId"] = np.searchsorted(old_movieId_map, ratings["movieId"]) -ratings.head() -@ - -Then we read the movie metadata and transform the movie IDs -in the same way: - -<>= -movies = pd.read_csv("ml-9-2018-small/movies.csv") -movies["movieId"] -= 1 -movies = movies.loc[movies.movieId.isin(old_movieId_map), :] -movies["movieId"] = np.searchsorted(old_movieId_map, movies["movieId"]) -movies.iloc[:, :2].head() -@ - - -Conversion of ratings to a CSR-format sparse matrix: - -<>= -n = ratings.movieId.max()+1 -d = ratings.userId.max()+1 -X = scipy.sparse.dok_matrix((n,d), dtype=np.float32) -X[ratings.movieId, ratings.userId] = ratings.rating -X = X.tocsr() -print(repr(X)) -@ - - -First few observations: - -<>= -X[:5, :10].todense() -@ - -Let's extract 200 clusters with Genie with respect to the cosine similarity between films' ratings -as given by users (two movies considered similar if they get similar reviews). -Sparse inputs are supported by the approximate version of the algorithm -which relies on the near-neighbour search routines implemented in the *nmslib* package. - - -<>= -import genieclust -g = genieclust.Genie(n_clusters=200, exact=False, affinity="cosinesimil_sparse") -movies["cluster"] = g.fit_predict(X) -@ - -Here are the members of an example cluster: - -<>= -movies["cluster"] = g.fit_predict(X) -which_cluster = movies.cluster[movies.title=="Monty Python's The Meaning of Life (1983)"] -movies.loc[movies.cluster == int(which_cluster)].title.sort_values() -@ - - - - -The above was performed on an abridged version of the MovieLens dataset. -The project's `website `_ -also features a full database that yields a 53,889x283,228 ratings table -(with 27,753,444 non-zero elements) -- such a matrix would definitely -not fit into our RAM if it was in the dense form. -Determining the whole cluster hierarchy takes only 144 seconds. -Here is one of 500 clusters extracted: - -.. code:: - - ## 13327 Blackadder Back & Forth (1999) - ## 13328 Blackadder's Christmas Carol (1988) - ## 3341 Creature Comforts (1989) - ## 1197 Grand Day Out with Wallace and Gromit, A (1989) - ## 2778 Hard Day's Night, A (1964) - ## 2861 Help! (1965) - ## 2963 How I Won the War (1967) - ## 6006 Monty Python Live at the Hollywood Bowl (1982) - ## 1113 Monty Python and the Holy Grail (1975) - ## 2703 Monty Python's And Now for Something Completel... - ## 1058 Monty Python's Life of Brian (1979) - ## 6698 Monty Python's The Meaning of Life (1983) - ## 27284 Oliver Twist (1997) - ## 2216 Producers, The (1968) - ## 4716 Quadrophenia (1979) - ## 6027 Secret Policeman's Other Ball, The (1982) - ## 27448 The Basket (2000) - ## 2792 Tommy (1975) - ## 10475 Wallace & Gromit in The Curse of the Were-Rabb... - ## 732 Wallace & Gromit: A Close Shave (1995) - ## 708 Wallace & Gromit: The Best of Aardman Animatio... - ## 1125 Wallace & Gromit: The Wrong Trousers (1993) - ## 13239 Wallace and Gromit in 'A Matter of Loaf and De... - ## 2772 Yellow Submarine (1968) - ## 1250 Young Frankenstein (1974) - ## Name: title, dtype: object - - - diff --git a/.devel/sphinx/weave/string.Rmd b/.devel/sphinx/weave/string.Rmd new file mode 100644 index 00000000..53e331e5 --- /dev/null +++ b/.devel/sphinx/weave/string.Rmd @@ -0,0 +1,70 @@ +# Example: String Data and Grouping of DNA + +The *genieclust* package also allows for clustering of character string +data. Let's perform an example grouping based on +[Levenshtein's](https://en.wikipedia.org/wiki/Levenshtein_distance) edit distance. + +::::{important} +Make sure that the [*nmslib*](https://github.com/nmslib/nmslib) package +(an optional dependency) is installed. + +```{python} +import nmslib +``` +:::: + + +As an example, we will use one of the benchmark datasets mentioned +in {cite}`genieins`: + + + +```{python settings,results="hide",echo=FALSE} +#import pweave +from tabulate import tabulate +import numpy as np +np.set_printoptions(precision=3, threshold=50, edgeitems=50) +``` + + + +```{python string-example-strings} +import numpy as np +# see https://github.com/gagolews/genieclust/tree/master/.devel/sphinx/weave/ +strings = np.loadtxt("actg1.data.gz", dtype=str).tolist() +strings[:5] # preview +``` + +The dataset comes with a set of reference labels that give the "true" grouping +assigned by an expert: + +```{python string-example-labels} +labels_true = np.loadtxt("actg1.labels0.gz", dtype=np.intp) - 1 +n_clusters = len(np.unique(labels_true)) +print(n_clusters) +``` + + +Clustering in the string domain relies on the +near-neighbour search routines implemented in the *nmslib* package. + +```{python string-example-cluster} +import genieclust +g = genieclust.Genie( + n_clusters=n_clusters, + exact=False, # use nmslib + cast_float32=False, # do not convert the string list to a matrix + nmslib_params_index=dict(post=0), # faster + affinity="leven") +labels_pred = g.fit_predict(strings) +``` + + +The adjusted Rand index can be used as an external cluster validity metric: + +```{python string-example-ar} +genieclust.compare_partitions.adjusted_rand_score(labels_true, labels_pred) +``` + +It indicates a very high degree of similarity between the reference +and the obtained clusterings. diff --git a/.devel/sphinx/weave/string.rst b/.devel/sphinx/weave/string.rst deleted file mode 100644 index 90908ddd..00000000 --- a/.devel/sphinx/weave/string.rst +++ /dev/null @@ -1,140 +0,0 @@ -Example: String Data and Grouping of DNA -======================================== - -The *genieclust* package also allows for clustering of character string -data. Let's perform an example grouping based -on `Levenshtein's `_ edit -distance. - -.. important:: - - Make sure that the *nmslib* package (an optional dependency) is installed. - - -We will use one of the benchmark datasets mentioned in :cite:`genieins` -as an example: - - - - - - - - - -.. code-block:: python - - import numpy as np - # see https://github.com/gagolews/genieclust/tree/master/.devel/sphinx/weave/ - strings = np.loadtxt("actg1.data.gz", dtype=np.str).tolist() - strings[:5] # preview - - -:: - - ## /tmp/ipykernel_42024/2791853717.py:3: FutureWarning: In the future - ## `np.str` will be defined as the corresponding NumPy scalar. - ## strings = np.loadtxt("actg1.data.gz", dtype=np.str).tolist() - -:: - - ## ---------------------------------------------------------------------------AttributeError - ## Traceback (most recent call last)Cell In[1], line 3 - ## 1 import numpy as np - ## 2 # see - ## https://github.com/gagolews/genieclust/tree/master/.devel/sphinx/weave/ - ## ----> 3 strings = np.loadtxt("actg1.data.gz", dtype=np.str).tolist() - ## 4 strings[:5] # preview - ## File ~/.virtualenvs/python3-default/lib/python3.11/site- - ## packages/numpy/__init__.py:319, in __getattr__(attr) - ## 314 warnings.warn( - ## 315 f"In the future `np.{attr}` will be defined as the " - ## 316 "corresponding NumPy scalar.", FutureWarning, - ## stacklevel=2) - ## 318 if attr in __former_attrs__: - ## --> 319 raise AttributeError(__former_attrs__[attr]) - ## 321 if attr == 'testing': - ## 322 import numpy.testing as testing - ## AttributeError: module 'numpy' has no attribute 'str'. - ## `np.str` was a deprecated alias for the builtin `str`. To avoid this - ## error in existing code, use `str` by itself. Doing this will not - ## modify any behavior and is safe. If you specifically wanted the numpy - ## scalar type, use `np.str_` here. - ## The aliases was originally deprecated in NumPy 1.20; for more details - ## and guidance see the original release note at: - ## https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations - - - -It comes with a set of reference labels, giving the "true" grouping assigned -by an expert: - - -.. code-block:: python - - labels_true = np.loadtxt("actg1.labels0.gz", dtype=np.intp)-1 - n_clusters = len(np.unique(labels_true)) - print(n_clusters) - - -:: - - ## 20 - - - - -Clustering in the string domain relies on the -near-neighbour search routines implemented in the *nmslib* package. - - -.. code-block:: python - - import genieclust - g = genieclust.Genie( - n_clusters=n_clusters, - exact=False, # use nmslib - cast_float32=False, # do not convert the string list to a matrix - nmslib_params_index=dict(post=0), # faster - affinity="leven") - labels_pred = g.fit_predict(strings) - - -:: - - ## ---------------------------------------------------------------------------NameError - ## Traceback (most recent call last)Cell In[1], line 8 - ## 1 import genieclust - ## 2 g = genieclust.Genie( - ## 3 n_clusters=n_clusters, - ## 4 exact=False, # use nmslib - ## 5 cast_float32=False, # do not convert the string list to a - ## matrix - ## 6 nmslib_params_index=dict(post=0), # faster - ## 7 affinity="leven") - ## ----> 8 labels_pred = g.fit_predict(strings) - ## NameError: name 'strings' is not defined - - - - -The adjusted Rand index can be used as an external cluster validity metric: - - -.. code-block:: python - - genieclust.compare_partitions.adjusted_rand_score(labels_true, labels_pred) - - -:: - - ## ---------------------------------------------------------------------------NameError - ## Traceback (most recent call last)Cell In[1], line 1 - ## ----> 1 genieclust.compare_partitions.adjusted_rand_score(labels_true, - ## labels_pred) - ## NameError: name 'labels_pred' is not defined - - - -This indicates a very high degree of similarity between the reference -and the obtained clusterings. diff --git a/.devel/sphinx/weave/string.rstw b/.devel/sphinx/weave/string.rstw deleted file mode 100644 index 83d0c3fe..00000000 --- a/.devel/sphinx/weave/string.rstw +++ /dev/null @@ -1,75 +0,0 @@ -Example: String Data and Grouping of DNA -======================================== - -The *genieclust* package also allows for clustering of character string -data. Let's perform an example grouping based -on `Levenshtein's `_ edit -distance. - -.. important:: - - Make sure that the *nmslib* package (an optional dependency) is installed. - - -We will use one of the benchmark datasets mentioned in :cite:`genieins` -as an example: - - - -<>= -import pweave -from tabulate import tabulate -import numpy as np -np.set_printoptions(precision=3, threshold=50, edgeitems=50) -#pd.set_option("display.min_rows", 200) -#pd.set_option("display.width", 80) -#pd.set_option("display.max_colwidth", 30) -#plt.style.use("bmh") -#plt.rcParams.update({ -# 'font.size': 9, -# 'font.family': 'sans-serif', -# 'font.sans-serif': ['Ubuntu Condensed', 'Alegreya', 'Alegreya Sans']}) -@ - - - -<>= -import numpy as np -# see https://github.com/gagolews/genieclust/tree/master/.devel/sphinx/weave/ -strings = np.loadtxt("actg1.data.gz", dtype=np.str).tolist() -strings[:5] # preview -@ - -It comes with a set of reference labels, giving the "true" grouping assigned -by an expert: - -<>= -labels_true = np.loadtxt("actg1.labels0.gz", dtype=np.intp)-1 -n_clusters = len(np.unique(labels_true)) -print(n_clusters) -@ - - -Clustering in the string domain relies on the -near-neighbour search routines implemented in the *nmslib* package. - -<>= -import genieclust -g = genieclust.Genie( - n_clusters=n_clusters, - exact=False, # use nmslib - cast_float32=False, # do not convert the string list to a matrix - nmslib_params_index=dict(post=0), # faster - affinity="leven") -labels_pred = g.fit_predict(strings) -@ - - -The adjusted Rand index can be used as an external cluster validity metric: - -<>= -genieclust.compare_partitions.adjusted_rand_score(labels_true, labels_pred) -@ - -This indicates a very high degree of similarity between the reference -and the obtained clusterings. diff --git a/.devel/sphinx/weave/timings-figures/digits-3.pdf b/.devel/sphinx/weave/timings-figures/digits-3.pdf new file mode 100644 index 00000000..2a36cd97 Binary files /dev/null and b/.devel/sphinx/weave/timings-figures/digits-3.pdf differ diff --git a/.devel/sphinx/weave/timings-figures/digits-3.png b/.devel/sphinx/weave/timings-figures/digits-3.png new file mode 100644 index 00000000..01b9eb6b Binary files /dev/null and b/.devel/sphinx/weave/timings-figures/digits-3.png differ diff --git a/.devel/sphinx/weave/timings-figures/g2mg-plot-1.pdf b/.devel/sphinx/weave/timings-figures/g2mg-plot-1.pdf new file mode 100644 index 00000000..d947ffa7 Binary files /dev/null and b/.devel/sphinx/weave/timings-figures/g2mg-plot-1.pdf differ diff --git a/.devel/sphinx/weave/timings-figures/g2mg-plot-1.png b/.devel/sphinx/weave/timings-figures/g2mg-plot-1.png new file mode 100644 index 00000000..aa0aea7f Binary files /dev/null and b/.devel/sphinx/weave/timings-figures/g2mg-plot-1.png differ diff --git a/.devel/sphinx/weave/timings.rstw b/.devel/sphinx/weave/timings.Rmd similarity index 73% rename from .devel/sphinx/weave/timings.rstw rename to .devel/sphinx/weave/timings.Rmd index cbdcac0d..00aece63 100644 --- a/.devel/sphinx/weave/timings.rstw +++ b/.devel/sphinx/weave/timings.Rmd @@ -1,43 +1,40 @@ -Timings (How Fast Is It?) -========================= +# Timings (How Fast Is It?) -In the :any:`previous section ` we have demonstrated -that Genie generates partitions of high *quality*. Now the crucial question is: -does it do this quickly? - -Genie will be compared against K-means from `scikit-learn `_ -:cite:`sklearn` version 0.23.1 +In the [previous section](benchmarks_ar), we have demonstrated +that Genie generates partitions of decent *quality*. However, is it quick? +Let's compare it against K-means from [scikit-learn](https://scikit-learn.org/) +{cite}`sklearn` version 0.23.1 (`sklearn.cluster.KMeans`) for different number of threads -(by default it uses all available resources; +(by default, it uses all available resources; note that the number of restarts, `n_init`, defaults to 10) and hierarchical agglomerative algorithms with the centroid, median, and Ward linkage implemented in the -`fastcluster `_ package -:cite:`fastclusterpkg`. +[fastcluster](http://www.danifold.net/fastcluster.html) package +{cite}`fastclusterpkg`. Genie, just like the single linkage, is based on a minimum spanning tree (MST) -:cite:`clustering-msts` of the pairwise distance graph of an input point set. -Given the MST (the slow part), Genie itself has :math:`O(n \sqrt{n})` time -and :math:`O(n)` memory complexity. +{cite}`clustering-msts` of the pairwise distance graph of an input point set. +Given the MST (the slow part), Genie itself has $O(n \sqrt{n})$ time +and $O(n)$ memory complexity. Generally, our parallelised implementation of a Jarník (Prim/Dijkstra)-like -method :cite:`olson` will be called to compute an MST, which takes :math:`O(d n^2)` time. -However, `mlpack.emst `_ :cite:`mlpack` provides a very fast +method {cite}`olson` will be called to compute an MST, which takes $O(d n^2)$ time. +However, [mlpack.emst](https://www.mlpack.org/) {cite}`mlpack` provides a very fast alternative in the case of Euclidean spaces of (very) low dimensionality, -see :cite:`emst` and the `mlpack_enabled` parameter, which is automatically used -for datasets with up to :math:`d=6` features. -Moreover, in the approximate method (`exact` = ``False``), we apply +see {cite}`emst` and the `mlpack_enabled` parameter, which is automatically used +for datasets with up to $d=6$ features. +Moreover, in the approximate method (`exact=False`), we apply the Kruskal algorithm on the near-neighbour graph determined -by `nmslib` :cite:`nmslib`\ . Albeit this only gives *some* sort of a spanning *forest*, -such a data structure :any:`turns out to be very suitable for our clustering task `\ . +by `nmslib` {cite}`nmslib`. Even though this only gives *some* sort of a spanning *forest*, +such a data structure [turns out to be very suitable for our clustering task](benchmarks_approx). All timings will be performed on a PC running GNU/Linux 5.4.0-40-generic #44-Ubuntu SMP x86_64 kernel with an Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz (12M cache, 6 cores, 12 threads) and total memory of 16,242,084 kB. -<>= +```{python timings-imports,results="hide",echo=FALSE} import numpy as np import pandas as pd import matplotlib.pyplot as plt @@ -88,21 +85,20 @@ dims = dims.loc[:,"dataset":] # "sklearn_spectral_Arbf_G5": "spectral_rbf_5", # "sklearn_birch_T0.01_BF100": "birch_0.01", # }) -@ +``` -Large Datasets --------------- +## Large Datasets Let's study the algorithm's run times for some of the "larger" datasets (70,000-105,600 observations, -see section on :any:`benchmark results ` for discussion) +see section on [benchmark results](benchmarks_ar) for discussion) from the -`Benchmark Suite for Clustering Algorithms — Version 1 `_ -:cite:`clustering_benchmarks_v1`\ . +[Benchmark Suite for Clustering Algorithms (Version 1.0)](https://clustering-benchmarks.gagolewski.com), +{cite}`clustering-benchmarks`. Features with variance of 0 were removed, datasets were centred at **0** and scaled so that they have total variance of 1. Tiny bit of Gaussian noise was added to each observation. @@ -110,7 +106,7 @@ Clustering is performed with respect to the Euclidean distance. -<>= +```{python timings-get-min,results="hide",echo=FALSE} res2 = res.loc[(res.n_threads>0), "dataset":] res2 = res2.loc[res.dataset.isin(["mnist/digits", "mnist/fashion", "sipu/worms_2", "sipu/worms_64"]), :] @@ -120,7 +116,7 @@ res2 = pd.merge(res2, dims, on="dataset") # what's missing: # pd.set_option("display.width", 200) # res.groupby(["dataset", "method", "n_clusters", "n_threads"]).size().unstack([3,2]) -@ +``` @@ -131,7 +127,7 @@ For K-means, the timings are listed as a function of the number of clusters to d for the other hierarchical methods the run-times are almost identical irrespective of the partitions' cardinality. -<>= +```{python timings-summary,results="asis",echo=FALSE} _dat = res2.loc[(res2.n_threads==6) | res2.method.isin(["fastcluster_median", "fastcluster_centroid", "fastcluster_ward"]), \ ["dataset","n", "d", "method","n_clusters","elapsed_time"]].\ set_index(["dataset","n", "d", "method","n_clusters"]).unstack().reset_index() @@ -141,17 +137,19 @@ _dat.loc[~_dat.method.isin(["sklearn_kmeans"]), 100] = np.nan _dat.loc[~_dat.method.isin(["sklearn_kmeans"]), 1000] = np.nan which_repeated = (_dat.dataset.shift(1) == _dat.dataset) _dat.loc[which_repeated, "dataset"] = "" +_dat.loc[:, "n"] = _dat.loc[:, "n"].astype(str) _dat.loc[which_repeated, "n"] = "" +_dat.loc[:, "d"] = _dat.loc[:, "d"].astype(str) _dat.loc[which_repeated, "d"] = "" -_dat = tabulate(_dat, _dat.columns, tablefmt="rst", showindex=False) +_dat = tabulate(_dat, _dat.columns, tablefmt="github", showindex=False) _dat = _dat.replace("nan", "") print(_dat, "\n\n") -@ +``` Of course, the K-means algorithm is the fastest. However, its performance degrades as K increases. Hence, it might not be -a good choice for the so-called *extreme clustering* (compare :cite:`extreme`) +a good choice for the so-called *extreme clustering* (compare {cite}`extreme`) problems. Most importantly, the approximate version of Genie (based on `nmslib`) is only slightly slower. The exact variant is extremely performant in Euclidean spaces of low dimensionality @@ -162,18 +160,18 @@ hierarchical algorithms in this study. -Timings as a Function of `n` and `d` ------------------------------------- +## Timings as a Function of `n` and `d` + In order to study the run-times as a function dataset size and dimensionality, let's consider a series of synthetic benchmarks, each with two Gaussian blobs of size `n/2` (with i.i.d. coordinates), in a `d`-dimensional space. -Here are the medians of 3-10 timings (depending on the dataset size), in seconds, +Here are the medians of 3–10 timings (depending on the dataset size), in seconds, on 6 threads: -<>= -g2mg = pd.read_csv("v1-g2mg.csv") # see timings_g2mg.py +```{python g2mg-summary,results="asis",echo=FALSE} +g2mg = pd.read_csv("v1-g2mg.csv") # see timings_g2mg.py # What's missing: # g2mg.loc[g2mg.n_threads>0,:].groupby(["method", "n", "d"])[["elapsed_time"]].size().unstack(0) # g2mg.loc[g2mg.n_threads>0,:].groupby(["method", "n", "d"])[["elapsed_time"]].agg(scipy.stats.variation).unstack(0) @@ -183,11 +181,12 @@ _dat = _dat.set_index(["method", "d", "n"]).unstack().round(2).reset_index() _dat.columns = [l0 if not l1 else l1 for l0, l1 in _dat.columns] which_repeated = (_dat.method.shift(1) == _dat.method) _dat.loc[which_repeated, "method"] = "" -print(tabulate(_dat, _dat.columns, tablefmt="rst", showindex=False), "\n\n") -@ +print(tabulate(_dat, _dat.columns, tablefmt="github", showindex=False), "\n\n") +``` + -By default, `mlpack_enabled` is ``"auto"``, which translates -to ``True`` if the requested metric is Euclidean, Python package `mlpack` is available, +By default, `mlpack_enabled` is `"auto"`, which translates +to `True` if the requested metric is Euclidean, Python package `mlpack` is available, and `d` is not greater than 6. The effect of the curse of dimensionality is clearly visible -- clustering in very low-dimensional Euclidean spaces is extremely fast. @@ -195,27 +194,25 @@ On the other hand, the approximate version of Genie can easily cluster very large datasets. Only the system's memory limits might become a problem then. -<>= +```{python g2mg-plot,results="hide",echo=FALSE,fig.cap="Timings [s] as a function of the dataset size and dimensionality — problem sizes that can be solved in a few minutes."} _dat = g2mg.loc[g2mg.method.isin(["Genie_0.3_approx", "Genie_0.3_nomlpack", "Genie_0.3_mlpack"])&(g2mg.d>10),["method","n","d","elapsed_time"]].groupby(["method","n","d"]).median().reset_index() sns.lineplot(x="n", y="elapsed_time", hue="method", style="d", data=_dat, markers=True) #plt.yscale("log") #plt.xscale("log") plt.ylim(0, 600) plt.show() -@ +``` -Timings as a Function of the Number of Threads ----------------------------------------------- +## Timings as a Function of the Number of Threads Recall that the timings are done on a PC with 6 physical cores. -Genie turns out to be nicely parallelisable — as evidenced on -the ``mnist/digits`` dataset: +Genie turns out to be nicely parallelisable, as seen on the `mnist/digits` dataset: -<>= +```{python digits,results="hide",echo=FALSE,fig.cap="Timings [s] as a function of the number of clusters and threads."} dataset="mnist/digits" sns.lineplot(x="n_clusters", y="elapsed_time", hue="method", style="n_threads", data=res2.loc[(res2.dataset==dataset) & (res2.method.isin(["sklearn_kmeans", "Genie_0.3", "Genie_0.3_approx"])),:], markers=True) @@ -224,24 +221,23 @@ plt.xscale("log") #plt.yscale("log") plt.ylim(0, 2000) plt.show() -@ +``` -Summary -------- +## Summary -The approximate (`exact` = ``False``) version of Genie is much faster +The approximate (`exact=False`) version of Genie is much faster than the original one. At the same time, it is still -:any:`highly compatible ` with it +[highly compatible](benchmarks_approx) with it (at least at higher levels of the cluster hierarchy). Therefore, we can safely recommend its use in large problem instances. Most importantly, its performance is not much worse than the K-means method with small K. Once a complete cluster hierarchy is determined, -partitioning of any cardinality can be extracted in less than 0.34 s on a 1M dataset. +partitioning of any cardinality can be extracted in less than 0.34s on a 1M dataset. Still, even the exact Genie is amongst the fastest clustering algorithms in the pool. On top of that, we are also allowed to change our mind about the `gini_threshold` @@ -249,22 +245,22 @@ parameter once the clustering is has been determined. The MST is stored for furt reference and is not recomputed unless needed. Here are the timings for a first run of the algorithm: -<>= +```{python timings-cache-1} import time, genieclust, numpy as np X = np.loadtxt("worms_2.data.gz", ndmin=2) g = genieclust.Genie(n_clusters=2, gini_threshold=0.3) t0 = time.time() g.fit(X) print("time elapsed - first run: %.3f" % (time.time()-t0)) -@ +``` Changing some parameters and re-running the cluster search: -<>= +```{python timings-cache-2} g.set_params(n_clusters=10) g.set_params(gini_threshold=0.1) t0 = time.time() g.fit(X) print("time elapsed - consecutive run: %.3f" % (time.time()-t0)) -@ +``` diff --git a/.devel/sphinx/weave/timings.md b/.devel/sphinx/weave/timings.md new file mode 100644 index 00000000..ac8d5faf --- /dev/null +++ b/.devel/sphinx/weave/timings.md @@ -0,0 +1,221 @@ + + + + +# Timings (How Fast Is It?) + +In the [previous section](benchmarks_ar), we have demonstrated +that Genie generates partitions of decent *quality*. However, is it quick? +Let's compare it against K-means from [scikit-learn](https://scikit-learn.org/) +{cite}`sklearn` version 0.23.1 +(`sklearn.cluster.KMeans`) for different number of threads +(by default, it uses all available resources; +note that the number of restarts, `n_init`, defaults to 10) +and hierarchical agglomerative algorithms +with the centroid, median, and Ward linkage implemented in the +[fastcluster](http://www.danifold.net/fastcluster.html) package +{cite}`fastclusterpkg`. + + + +Genie, just like the single linkage, is based on a minimum spanning tree (MST) +{cite}`clustering-msts` of the pairwise distance graph of an input point set. +Given the MST (the slow part), Genie itself has $O(n \sqrt{n})$ time +and $O(n)$ memory complexity. +Generally, our parallelised implementation of a Jarník (Prim/Dijkstra)-like +method {cite}`olson` will be called to compute an MST, which takes $O(d n^2)$ time. +However, [mlpack.emst](https://www.mlpack.org/) {cite}`mlpack` provides a very fast +alternative in the case of Euclidean spaces of (very) low dimensionality, +see {cite}`emst` and the `mlpack_enabled` parameter, which is automatically used +for datasets with up to $d=6$ features. +Moreover, in the approximate method (`exact=False`), we apply +the Kruskal algorithm on the near-neighbour graph determined +by `nmslib` {cite}`nmslib`. Even though this only gives *some* sort of a spanning *forest*, +such a data structure [turns out to be very suitable for our clustering task](benchmarks_approx). + +All timings will be performed on a PC running GNU/Linux 5.4.0-40-generic #44-Ubuntu +SMP x86_64 kernel with an Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz (12M cache, 6 cores, 12 threads) +and total memory of 16,242,084 kB. + + + + + + + +## Large Datasets + + +Let's study the algorithm's run times for some of the +"larger" datasets (70,000-105,600 observations, +see section on [benchmark results](benchmarks_ar) for discussion) +from the +[Benchmark Suite for Clustering Algorithms (Version 1.0)](https://clustering-benchmarks.gagolewski.com), +{cite}`clustering-benchmarks`. +Features with variance of 0 were removed, +datasets were centred at **0** and scaled so that they have total variance of 1. +Tiny bit of Gaussian noise was added to each observation. +Clustering is performed with respect to the Euclidean distance. + + + + + + + + +Here are the results (in seconds) if 6 threads are requested +(except for `fastcluster` which is not parallelised). +For K-means, the timings are listed as a function of the number of clusters to detect, +for the other hierarchical methods the run-times are almost identical irrespective of the +partitions' cardinality. + +| dataset | n | d | method | 10 | 100 | 1000 | +|---------------|--------|-----|----------------------|---------|--------|---------| +| mnist/digits | 70000 | 719 | Genie_0.3 | 412.72 | | | +| | | | Genie_0.3_approx | 42.77 | | | +| | | | fastcluster_centroid | 4170.98 | | | +| | | | fastcluster_median | 3927.93 | | | +| | | | fastcluster_ward | 4114.05 | | | +| | | | sklearn_kmeans | 26.3 | 217.62 | 1691.68 | +| mnist/fashion | 70000 | 784 | Genie_0.3 | 445.81 | | | +| | | | Genie_0.3_approx | 38.02 | | | +| | | | fastcluster_centroid | 4486.32 | | | +| | | | fastcluster_median | 4384.62 | | | +| | | | fastcluster_ward | 4757.32 | | | +| | | | sklearn_kmeans | 24.9 | 225.04 | 1745.88 | +| sipu/worms_2 | 105600 | 2 | Genie_0.3 | 0.57 | | | +| | | | Genie_0.3_approx | 3.67 | | | +| | | | fastcluster_centroid | 66.3 | | | +| | | | fastcluster_median | 64.11 | | | +| | | | fastcluster_ward | 60.92 | | | +| | | | sklearn_kmeans | 0.86 | 10.96 | 111.9 | +| sipu/worms_64 | 105000 | 64 | Genie_0.3 | 76.7 | | | +| | | | Genie_0.3_approx | 8.26 | | | +| | | | fastcluster_centroid | 4945.91 | | | +| | | | fastcluster_median | 2854.27 | | | +| | | | fastcluster_ward | 778.18 | | | +| | | | sklearn_kmeans | 3.35 | 37.89 | 357.84 | + + +Of course, the K-means algorithm is the fastest. +However, its performance degrades as K increases. Hence, it might not be +a good choice for the so-called *extreme clustering* (compare {cite}`extreme`) +problems. Most importantly, the approximate version of Genie (based on `nmslib`) +is only slightly slower. +The exact variant is extremely performant in Euclidean spaces of low dimensionality +(thanks to `mlpack`) and overall at least 10 times more efficient than the other +hierarchical algorithms in this study. + + + + + +## Timings as a Function of `n` and `d` + + +In order to study the run-times as a function dataset size and dimensionality, +let's consider a series of synthetic benchmarks, each with two Gaussian blobs of size `n/2` +(with i.i.d. coordinates), in a `d`-dimensional space. + +Here are the medians of 3–10 timings (depending on the dataset size), in seconds, +on 6 threads: + +| method | d | 10000 | 50000 | 100000 | 500000 | 1000000 | +|--------------------|-----|---------|---------|----------|----------|-----------| +| Genie_0.3_approx | 2 | 0.17 | 0.98 | 2.12 | 14.93 | 33.79 | +| | 5 | 0.2 | 1.3 | 2.87 | 22.75 | 54.66 | +| | 10 | 0.25 | 1.69 | 3.84 | 36.18 | 92.03 | +| | 25 | 0.29 | 1.95 | 5.46 | 62.25 | 158.27 | +| | 50 | 0.36 | 3.15 | 8.15 | 81.95 | 202.08 | +| | 100 | 0.48 | 4.6 | 12.6 | 113.37 | 266.64 | +| Genie_0.3_mlpack | 2 | 0.04 | 0.26 | 0.55 | 3.03 | 6.58 | +| | 5 | 0.28 | 1.96 | 4.46 | 28.4 | 62.75 | +| | 10 | 3.08 | 35.54 | 92.87 | 794.71 | 2014.59 | +| Genie_0.3_nomlpack | 2 | 0.16 | 2.52 | 9.87 | 267.76 | 1657.86 | +| | 5 | 0.14 | 2.62 | 11.4 | 421.46 | 2997.11 | +| | 10 | 0.15 | 3.21 | 12.74 | 719.33 | 4388.26 | +| | 25 | 0.28 | 6.51 | 26.65 | 1627.9 | 7708.23 | +| | 50 | 0.47 | 11.97 | 54.52 | 2175.3 | 11346.3 | +| | 100 | 1 | 26.07 | 132.47 | 4408.07 | 16021.8 | + + +By default, `mlpack_enabled` is `"auto"`, which translates +to `True` if the requested metric is Euclidean, Python package `mlpack` is available, +and `d` is not greater than 6. +The effect of the curse of dimensionality is clearly visible -- clustering +in very low-dimensional Euclidean spaces is extremely fast. +On the other hand, the approximate version of Genie can easily cluster +very large datasets. Only the system's memory limits might become a problem then. + + +(fig:g2mg-plot)= +```{figure} timings-figures/g2mg-plot-1.* +Timings [s] as a function of the dataset size and dimensionality — problem sizes that can be solved in a few minutes. +``` + + + + +## Timings as a Function of the Number of Threads + +Recall that the timings are done on a PC with 6 physical cores. +Genie turns out to be nicely parallelisable, as seen on the `mnist/digits` dataset: + + +(fig:digits)= +```{figure} timings-figures/digits-3.* +Timings [s] as a function of the number of clusters and threads. +``` + + + + + + +## Summary + +The approximate (`exact=False`) version of Genie is much faster +than the original one. At the same time, it is still +[highly compatible](benchmarks_approx) with it +(at least at higher levels of the cluster hierarchy). Therefore, we +can safely recommend its use in large problem instances. +Most importantly, its performance is not much worse than the K-means method +with small K. Once a complete cluster hierarchy is determined, +partitioning of any cardinality can be extracted in less than 0.34s on a 1M dataset. +Still, even the exact Genie is amongst the fastest clustering algorithms in the pool. + +On top of that, we are also allowed to change our mind about the `gini_threshold` +parameter once the clustering is has been determined. The MST is stored for further +reference and is not recomputed unless needed. Here are the timings for +a first run of the algorithm: + + + +```python +import time, genieclust, numpy as np +X = np.loadtxt("worms_2.data.gz", ndmin=2) +g = genieclust.Genie(n_clusters=2, gini_threshold=0.3) +t0 = time.time() +g.fit(X) +print("time elapsed - first run: %.3f" % (time.time()-t0)) +## Genie() +## time elapsed - first run: 33.114 +``` + + +Changing some parameters and re-running the cluster search: + + + +```python +g.set_params(n_clusters=10) +g.set_params(gini_threshold=0.1) +t0 = time.time() +g.fit(X) +print("time elapsed - consecutive run: %.3f" % (time.time()-t0)) +## Genie(n_clusters=10) +## Genie(gini_threshold=0.1, n_clusters=10) +## Genie(gini_threshold=0.1, n_clusters=10) +## time elapsed - consecutive run: 0.048 +``` diff --git a/.devel/sphinx/weave/timings.rst b/.devel/sphinx/weave/timings.rst deleted file mode 100644 index dd2b9a11..00000000 --- a/.devel/sphinx/weave/timings.rst +++ /dev/null @@ -1,255 +0,0 @@ -Timings (How Fast Is It?) -========================= - -In the :any:`previous section ` we have demonstrated -that Genie generates partitions of high *quality*. Now the crucial question is: -does it do this quickly? - -Genie will be compared against K-means from `scikit-learn `_ -:cite:`sklearn` version 0.23.1 -(`sklearn.cluster.KMeans`) for different number of threads -(by default it uses all available resources; -note that the number of restarts, `n_init`, defaults to 10) -and hierarchical agglomerative algorithms -with the centroid, median, and Ward linkage implemented in the -`fastcluster `_ package -:cite:`fastclusterpkg`. - - - -Genie, just like the single linkage, is based on a minimum spanning tree (MST) -:cite:`clustering-msts` of the pairwise distance graph of an input point set. -Given the MST (the slow part), Genie itself has :math:`O(n \sqrt{n})` time -and :math:`O(n)` memory complexity. -Generally, our parallelised implementation of a Jarník (Prim/Dijkstra)-like -method :cite:`olson` will be called to compute an MST, which takes :math:`O(d n^2)` time. -However, `mlpack.emst `_ :cite:`mlpack` provides a very fast -alternative in the case of Euclidean spaces of (very) low dimensionality, -see :cite:`emst` and the `mlpack_enabled` parameter, which is automatically used -for datasets with up to :math:`d=6` features. -Moreover, in the approximate method (`exact` = ``False``), we apply -the Kruskal algorithm on the near-neighbour graph determined -by `nmslib` :cite:`nmslib`\ . Albeit this only gives *some* sort of a spanning *forest*, -such a data structure :any:`turns out to be very suitable for our clustering task `\ . - -All timings will be performed on a PC running GNU/Linux 5.4.0-40-generic #44-Ubuntu -SMP x86_64 kernel with an Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz (12M cache, 6 cores, 12 threads) -and total memory of 16,242,084 kB. - - - - - - - - -Large Datasets --------------- - - -Let's study the algorithm's run times for some of the -"larger" datasets (70,000-105,600 observations, -see section on :any:`benchmark results ` for discussion) -from the -`Benchmark Suite for Clustering Algorithms — Version 1 `_ -:cite:`clustering_benchmarks_v1`\ . -Features with variance of 0 were removed, -datasets were centred at **0** and scaled so that they have total variance of 1. -Tiny bit of Gaussian noise was added to each observation. -Clustering is performed with respect to the Euclidean distance. - - - - - - - - - -Here are the results (in seconds) if 6 threads are requested -(except for `fastcluster` which is not parallelised). -For K-means, the timings are listed as a function of the number of clusters to detect, -for the other hierarchical methods the run-times are almost identical irrespective of the -partitions' cardinality. - - - -============= ====== === ==================== ======= ====== ======= -dataset n d method 10 100 1000 -============= ====== === ==================== ======= ====== ======= -mnist/digits 70000 719 Genie_0.3 412.72 -.. Genie_0.3_approx 42.77 -.. fastcluster_centroid 4170.98 -.. fastcluster_median 3927.93 -.. fastcluster_ward 4114.05 -.. sklearn_kmeans 26.3 217.62 1691.68 -mnist/fashion 70000 784 Genie_0.3 445.81 -.. Genie_0.3_approx 38.02 -.. fastcluster_centroid 4486.32 -.. fastcluster_median 4384.62 -.. fastcluster_ward 4757.32 -.. sklearn_kmeans 24.9 225.04 1745.88 -sipu/worms_2 105600 2 Genie_0.3 0.57 -.. Genie_0.3_approx 3.67 -.. fastcluster_centroid 66.3 -.. fastcluster_median 64.11 -.. fastcluster_ward 60.92 -.. sklearn_kmeans 0.86 10.96 111.9 -sipu/worms_64 105000 64 Genie_0.3 76.7 -.. Genie_0.3_approx 8.26 -.. fastcluster_centroid 4945.91 -.. fastcluster_median 2854.27 -.. fastcluster_ward 778.18 -.. sklearn_kmeans 3.35 37.89 357.84 -============= ====== === ==================== ======= ====== ======= - - -/tmp/ipykernel_38846/4024770980.py:10: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '' has dtype incompatible with int64, please explicitly cast to a compatible dtype first. - _dat.loc[which_repeated, "n"] = "" -/tmp/ipykernel_38846/4024770980.py:11: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '' has dtype incompatible with int64, please explicitly cast to a compatible dtype first. - _dat.loc[which_repeated, "d"] = "" - - - -Of course, the K-means algorithm is the fastest. -However, its performance degrades as K increases. Hence, it might not be -a good choice for the so-called *extreme clustering* (compare :cite:`extreme`) -problems. Most importantly, the approximate version of Genie (based on `nmslib`) -is only slightly slower. -The exact variant is extremely performant in Euclidean spaces of low dimensionality -(thanks to `mlpack`) and overall at least 10 times more efficient than the other -hierarchical algorithms in this study. - - - - - -Timings as a Function of `n` and `d` ------------------------------------- - -In order to study the run-times as a function dataset size and dimensionality, -let's consider a series of synthetic benchmarks, each with two Gaussian blobs of size `n/2` -(with i.i.d. coordinates), in a `d`-dimensional space. - -Here are the medians of 3-10 timings (depending on the dataset size), in seconds, -on 6 threads: - - - -================== === ======= ======= ======== ======== ========= -method d 10000 50000 100000 500000 1000000 -================== === ======= ======= ======== ======== ========= -Genie_0.3_approx 2 0.17 0.98 2.12 14.93 33.79 -.. 5 0.2 1.3 2.87 22.75 54.66 -.. 10 0.25 1.69 3.84 36.18 92.03 -.. 25 0.29 1.95 5.46 62.25 158.27 -.. 50 0.36 3.15 8.15 81.95 202.08 -.. 100 0.48 4.6 12.6 113.37 266.64 -Genie_0.3_mlpack 2 0.04 0.26 0.55 3.03 6.58 -.. 5 0.28 1.96 4.46 28.4 62.75 -.. 10 3.08 35.54 92.87 794.71 2014.59 -Genie_0.3_nomlpack 2 0.16 2.52 9.87 267.76 1657.86 -.. 5 0.14 2.62 11.4 421.46 2997.11 -.. 10 0.15 3.21 12.74 719.33 4388.26 -.. 25 0.28 6.51 26.65 1627.9 7708.23 -.. 50 0.47 11.97 54.52 2175.3 11346.3 -.. 100 1 26.07 132.47 4408.07 16021.8 -================== === ======= ======= ======== ======== ========= - - - - -By default, `mlpack_enabled` is ``"auto"``, which translates -to ``True`` if the requested metric is Euclidean, Python package `mlpack` is available, -and `d` is not greater than 6. -The effect of the curse of dimensionality is clearly visible -- clustering -in very low-dimensional Euclidean spaces is extremely fast. -On the other hand, the approximate version of Genie can easily cluster -very large datasets. Only the system's memory limits might become a problem then. - - - -.. figure:: figures/timings_g2mg-plot_1.png - :width: 15 cm - - Timings [s] as a function of the dataset size and dimensionality — problem sizes that can be solved in a few minutes. - - - - - - -Timings as a Function of the Number of Threads ----------------------------------------------- - -Recall that the timings are done on a PC with 6 physical cores. -Genie turns out to be nicely parallelisable — as evidenced on -the ``mnist/digits`` dataset: - - - -.. figure:: figures/timings_digits_1.png - :width: 15 cm - - Timings [s] as a function of the number of clusters and threads. - - - - - - - - -Summary -------- - -The approximate (`exact` = ``False``) version of Genie is much faster -than the original one. At the same time, it is still -:any:`highly compatible ` with it -(at least at higher levels of the cluster hierarchy). Therefore, we -can safely recommend its use in large problem instances. -Most importantly, its performance is not much worse than the K-means method -with small K. Once a complete cluster hierarchy is determined, -partitioning of any cardinality can be extracted in less than 0.34 s on a 1M dataset. -Still, even the exact Genie is amongst the fastest clustering algorithms in the pool. - -On top of that, we are also allowed to change our mind about the `gini_threshold` -parameter once the clustering is has been determined. The MST is stored for further -reference and is not recomputed unless needed. Here are the timings for -a first run of the algorithm: - - -.. code-block:: python - - import time, genieclust, numpy as np - X = np.loadtxt("worms_2.data.gz", ndmin=2) - g = genieclust.Genie(n_clusters=2, gini_threshold=0.3) - t0 = time.time() - g.fit(X) - print("time elapsed - first run: %.3f" % (time.time()-t0)) - - -:: - - ## time elapsed - first run: 9.411 - - - - -Changing some parameters and re-running the cluster search: - - -.. code-block:: python - - g.set_params(n_clusters=10) - g.set_params(gini_threshold=0.1) - t0 = time.time() - g.fit(X) - print("time elapsed - consecutive run: %.3f" % (time.time()-t0)) - - -:: - - ## time elapsed - consecutive run: 0.025 - - diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 832cfaf5..7ea065e2 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -1,5 +1,4 @@ -Code of Conduct -=============== +# Code of Conduct This is a project conveyed in its authors' free time. It is their little act of charity to make this world an (even) better place. diff --git a/DESCRIPTION b/DESCRIPTION index b23d9caf..00b74527 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: genieclust Type: Package Title: Fast and Robust Hierarchical Clustering with Noise Points Detection -Version: 1.1.4.9004 -Date: 2023-10-17 +Version: 1.1.5 +Date: 2023-10-18 Authors@R: c( person("Marek", "Gagolewski", role = c("aut", "cre", "cph"), diff --git a/NEWS b/NEWS index ab52accf..453c90a6 100644 --- a/NEWS +++ b/NEWS @@ -1,7 +1,6 @@ # Changelog - -## 1.1.4.9xxx +## 1.1.5 (2023-10-18) * [BACKWARD INCOMPATIBILITY] [Python and R] Inequality measures are no longer referred to as inequity measures. @@ -66,9 +65,6 @@ ## 1.1.0 (2022-09-05) -* [GENERAL] The below-mentioned cluster validity measures are discussed - in more detail at . - * [Python and R] New function: `adjusted_asymmetric_accuracy`. * [Python and R] Implementations of the so-called internal cluster @@ -89,6 +85,9 @@ `silhouette_w_index`, `wcnn_index`. + These cluster validity measures are discussed + in more detail at . + * [BACKWARD INCOMPATIBILITY] `normalized_confusion_matrix` now solves the maximal assignment problem instead of applying the somewhat primitive partial pivoting. diff --git a/docs/_downloads/01c61da91a57cfdb372b0edb5845828a/genieclust_plots-4.pdf b/docs/_downloads/01c61da91a57cfdb372b0edb5845828a/genieclust_plots-4.pdf index 88caa8cd..9b89d279 100644 Binary files a/docs/_downloads/01c61da91a57cfdb372b0edb5845828a/genieclust_plots-4.pdf and b/docs/_downloads/01c61da91a57cfdb372b0edb5845828a/genieclust_plots-4.pdf differ diff --git a/docs/_downloads/36c4dad867b3fffadb9a35cb5700de0e/genieclust_plots-3.pdf b/docs/_downloads/36c4dad867b3fffadb9a35cb5700de0e/genieclust_plots-3.pdf index 6ca505fa..87f888a2 100644 Binary files a/docs/_downloads/36c4dad867b3fffadb9a35cb5700de0e/genieclust_plots-3.pdf and b/docs/_downloads/36c4dad867b3fffadb9a35cb5700de0e/genieclust_plots-3.pdf differ diff --git a/docs/_downloads/3db969050b6fe55b9afe4d23ba8ca159/genieclust_plots-1.pdf b/docs/_downloads/3db969050b6fe55b9afe4d23ba8ca159/genieclust_plots-1.pdf index 3161195c..286e8f7d 100644 Binary files a/docs/_downloads/3db969050b6fe55b9afe4d23ba8ca159/genieclust_plots-1.pdf and b/docs/_downloads/3db969050b6fe55b9afe4d23ba8ca159/genieclust_plots-1.pdf differ diff --git a/docs/_downloads/f10d82cc1a8914330c4324f07e968726/genieclust_plots-2.pdf b/docs/_downloads/f10d82cc1a8914330c4324f07e968726/genieclust_plots-2.pdf index 7ffa8a9c..c8943c7e 100644 Binary files a/docs/_downloads/f10d82cc1a8914330c4324f07e968726/genieclust_plots-2.pdf and b/docs/_downloads/f10d82cc1a8914330c4324f07e968726/genieclust_plots-2.pdf differ diff --git a/docs/_images/benchmarks_ar_plot_large_1.png b/docs/_images/benchmarks_ar_plot_large_1.png deleted file mode 100644 index f914e708..00000000 Binary files a/docs/_images/benchmarks_ar_plot_large_1.png and /dev/null differ diff --git a/docs/_images/benchmarks_ar_plot_small_1.png b/docs/_images/benchmarks_ar_plot_small_1.png deleted file mode 100644 index 9cd0e1cf..00000000 Binary files a/docs/_images/benchmarks_ar_plot_small_1.png and /dev/null differ diff --git a/docs/_images/benchmarks_details_indices_large_1.png b/docs/_images/benchmarks_details_indices_large_1.png deleted file mode 100644 index db8018b6..00000000 Binary files a/docs/_images/benchmarks_details_indices_large_1.png and /dev/null differ diff --git a/docs/_images/benchmarks_details_indices_small_1.png b/docs/_images/benchmarks_details_indices_small_1.png deleted file mode 100644 index cf6823df..00000000 Binary files a/docs/_images/benchmarks_details_indices_small_1.png and /dev/null differ diff --git a/docs/_images/digits-3.png b/docs/_images/digits-3.png new file mode 100644 index 00000000..01b9eb6b Binary files /dev/null and b/docs/_images/digits-3.png differ diff --git a/docs/_images/g2mg-plot-1.png b/docs/_images/g2mg-plot-1.png new file mode 100644 index 00000000..aa0aea7f Binary files /dev/null and b/docs/_images/g2mg-plot-1.png differ diff --git a/docs/_images/indices_large-3.png b/docs/_images/indices_large-3.png new file mode 100644 index 00000000..903fd432 Binary files /dev/null and b/docs/_images/indices_large-3.png differ diff --git a/docs/_images/indices_small-1.png b/docs/_images/indices_small-1.png new file mode 100644 index 00000000..87f74215 Binary files /dev/null and b/docs/_images/indices_small-1.png differ diff --git a/docs/_images/noise-Genie1-3.png b/docs/_images/noise-Genie1-3.png new file mode 100644 index 00000000..006cd697 Binary files /dev/null and b/docs/_images/noise-Genie1-3.png differ diff --git a/docs/_images/noise-Genie2-5.png b/docs/_images/noise-Genie2-5.png new file mode 100644 index 00000000..c9eab6ae Binary files /dev/null and b/docs/_images/noise-Genie2-5.png differ diff --git a/docs/_images/noise-Genie3-7.png b/docs/_images/noise-Genie3-7.png new file mode 100644 index 00000000..2cfbf8a2 Binary files /dev/null and b/docs/_images/noise-Genie3-7.png differ diff --git a/docs/_images/noise-HDBSCAN1-9.png b/docs/_images/noise-HDBSCAN1-9.png new file mode 100644 index 00000000..17219ff0 Binary files /dev/null and b/docs/_images/noise-HDBSCAN1-9.png differ diff --git a/docs/_images/noise-HDBSCAN2-11.png b/docs/_images/noise-HDBSCAN2-11.png new file mode 100644 index 00000000..81399e37 Binary files /dev/null and b/docs/_images/noise-HDBSCAN2-11.png differ diff --git a/docs/_images/noise-scatter-1.png b/docs/_images/noise-scatter-1.png new file mode 100644 index 00000000..e6dd8d15 Binary files /dev/null and b/docs/_images/noise-scatter-1.png differ diff --git a/docs/_images/noise_noise-Genie1_1.png b/docs/_images/noise_noise-Genie1_1.png deleted file mode 100644 index 7c2a1998..00000000 Binary files a/docs/_images/noise_noise-Genie1_1.png and /dev/null differ diff --git a/docs/_images/noise_noise-Genie2_1.png b/docs/_images/noise_noise-Genie2_1.png deleted file mode 100644 index a2054edb..00000000 Binary files a/docs/_images/noise_noise-Genie2_1.png and /dev/null differ diff --git a/docs/_images/noise_noise-Genie3_1.png b/docs/_images/noise_noise-Genie3_1.png deleted file mode 100644 index cfcecbad..00000000 Binary files a/docs/_images/noise_noise-Genie3_1.png and /dev/null differ diff --git a/docs/_images/noise_noise-HDBSCAN1_1.png b/docs/_images/noise_noise-HDBSCAN1_1.png deleted file mode 100644 index 3a940155..00000000 Binary files a/docs/_images/noise_noise-HDBSCAN1_1.png and /dev/null differ diff --git a/docs/_images/noise_noise-HDBSCAN2_1.png b/docs/_images/noise_noise-HDBSCAN2_1.png deleted file mode 100644 index 76120991..00000000 Binary files a/docs/_images/noise_noise-HDBSCAN2_1.png and /dev/null differ diff --git a/docs/_images/noise_noise-scatter_1.png b/docs/_images/noise_noise-scatter_1.png deleted file mode 100644 index 76bb591e..00000000 Binary files a/docs/_images/noise_noise-scatter_1.png and /dev/null differ diff --git a/docs/_images/plot_large-3.png b/docs/_images/plot_large-3.png new file mode 100644 index 00000000..0f4cbbbe Binary files /dev/null and b/docs/_images/plot_large-3.png differ diff --git a/docs/_images/plot_small-1.png b/docs/_images/plot_small-1.png new file mode 100644 index 00000000..7567df98 Binary files /dev/null and b/docs/_images/plot_small-1.png differ diff --git a/docs/_images/timings_digits_1.png b/docs/_images/timings_digits_1.png deleted file mode 100644 index 2858c30c..00000000 Binary files a/docs/_images/timings_digits_1.png and /dev/null differ diff --git a/docs/_images/timings_g2mg-plot_1.png b/docs/_images/timings_g2mg-plot_1.png deleted file mode 100644 index 2b5398c0..00000000 Binary files a/docs/_images/timings_g2mg-plot_1.png and /dev/null differ diff --git a/docs/_static/documentation_options.js b/docs/_static/documentation_options.js index 82e6a0eb..69d3a148 100644 --- a/docs/_static/documentation_options.js +++ b/docs/_static/documentation_options.js @@ -1,5 +1,5 @@ const DOCUMENTATION_OPTIONS = { - VERSION: '1.1.4.9004', + VERSION: '1.1.5', LANGUAGE: 'en', COLLAPSE_INDEX: false, BUILDER: 'html', diff --git a/docs/genieclust.html b/docs/genieclust.html index 14f3d009..a68b72a3 100644 --- a/docs/genieclust.html +++ b/docs/genieclust.html @@ -165,7 +165,7 @@ genieclust