diff --git a/.gitignore b/.gitignore
index 9251e52..30ef2cc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -36,6 +36,9 @@ var/
 pip-log.txt
 pip-delete-this-directory.txt
 
+#Test de performance
+tests/data/performance_data/
+
 # Unit test / coverage reports
 htmlcov/
 .tox/
diff --git a/tests/Pruebas de Performance Textar.ipynb b/tests/Pruebas de Performance Textar.ipynb
new file mode 100644
index 0000000..d0f9cdd
--- /dev/null
+++ b/tests/Pruebas de Performance Textar.ipynb	
@@ -0,0 +1,230 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext memory_profiler\n",
+    "from textar import TextClassifier\n",
+    "import xml.etree.ElementTree as ET\n",
+    "from lxml import etree\n",
+    "import numpy as np\n",
+    "import re\n",
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Helper funcs\n",
+    "\n",
+    "def parse_blog(tree, min_words=100):\n",
+    "    dates = []\n",
+    "    posts = []\n",
+    "    for elem in tree:\n",
+    "        post = None\n",
+    "        if elem.tag == 'date':\n",
+    "            date = elem.text\n",
+    "        elif elem.tag == 'post':\n",
+    "            post = elem.text\n",
+    "        if post is not None: \n",
+    "            words = re.findall('\\w+\\W',post)\n",
+    "            if len(words) > min_words and np.mean(map(len,words))>2:\n",
+    "                dates.append(date)\n",
+    "                posts.append(post)\n",
+    "    return dates, posts"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# Configs\n",
+    "DATA_FOLDER = os.path.join('.','data','performance_data','blogs')\n",
+    "MAX_FILES = 10000"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "magic = '''<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n",
+    "            \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\" [\n",
+    "            <!ENTITY nbsp ' '>\n",
+    "            ]>'''\n",
+    "\n",
+    "parser = etree.XMLParser(recover=True)\n",
+    "\n",
+    "all_dates = []\n",
+    "all_posts = []\n",
+    "all_genders = []\n",
+    "all_ages = []\n",
+    "all_categories = []\n",
+    "\n",
+    "for file_name in os.listdir(DATA_FOLDER)[:MAX_FILES]:\n",
+    "    id_f, gender, age, category, zodiac, ext = file_name.split('.')\n",
+    "    with open(os.path.join(DATA_FOLDER, file_name), 'r') as f:\n",
+    "        try:\n",
+    "            tree = ET.fromstring(magic + f.read(), parser=parser)\n",
+    "            dates, posts = parse_blog(tree)\n",
+    "            all_posts += posts\n",
+    "            all_dates += dates\n",
+    "            all_genders += [gender] * len(dates)\n",
+    "            all_ages += [age] * len(dates)\n",
+    "            all_categories += [category] * len(dates)\n",
+    "        except Exception as e:\n",
+    "            pass\n",
+    "            #print(\"Error en {:s}\".format(file_name))\n",
+    "all_ids = map(str, range(len(all_posts)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%timeit\n",
+    "# Tiempo de la creacion del objeto\n",
+    "tc = TextClassifier(all_posts, all_ids)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1 loop, best of 3: 2.36 s per loop\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%timeit\n",
+    "# Tiempo de la busqueda\n",
+    "tc.get_similar(all_ids[1],max_similars=3, term_diff_max_rank=50)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1 loop, best of 3: 17.4 s per loop\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%timeit\n",
+    "# Tiempo de creacion del clasificador\n",
+    "tc.make_classifier(\"topic\",all_ids, all_categories)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "10 loops, best of 3: 31.4 ms per loop\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%timeit\n",
+    "tc.classify(\"topic\", all_ids[1])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[ 0.,  0.,  0., ...,  0.,  0.,  0.]])"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "row.toarray()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array(['0', '1', '10', ..., '997', '998', '999'], \n",
+       "      dtype='|S4')"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python [Root]",
+   "language": "python",
+   "name": "Python [Root]"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.12"
+  },
+  "notify_time": "5"
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}
diff --git a/tests/test_text_classifier.py b/tests/test_text_classifier.py
index 0a24663..2c381d5 100644
--- a/tests/test_text_classifier.py
+++ b/tests/test_text_classifier.py
@@ -11,10 +11,9 @@
 import os
 import codecs
 import numpy as np
-from sklearn.cross_validation import train_test_split
+from sklearn.model_selection import train_test_split
 from sklearn.datasets import fetch_20newsgroups
 sys.path.insert(0, os.path.abspath('..'))
-
 from textar import TextClassifier
 
 
@@ -32,7 +31,7 @@ def test_get_similar(self):
                 "El edificio más antiguo tiene muchas cuadros caros porque era de un multimillonario",
                 "El edificio más moderno tiene muchas programadoras que comen manzanas durante el almuerzo grupal"
             ],
-            ids=map(str, range(4))
+            ids=list(map(str, range(4)))
         )
 
         ids, distancias, palabras_comunes = tc.get_similar(
@@ -42,10 +41,14 @@ def test_get_similar(self):
 
         self.assertEqual(ids, ['0', '3', '2', '1'])
         self.assertEqual(
-            palabras_comunes,
+            [
+                sorted(palabras)
+                for palabras in palabras_comunes
+            ]
+            ,
             [
                 [u'edificio', u'manzanas'],
-                [u'edificio', u'muchas', u'manzanas'],
+                [u'edificio', u'manzanas', u'muchas'],
                 [u'edificio', u'muchas'], [u'muchas']
             ]
         )
@@ -60,13 +63,13 @@ def test_classify(self):
                 "Para hacer una torta de naranja se necesita harina, huevos, leche, ralladura de naranja y polvo de hornear",
                 "Para hacer un lemon pie se necesita crema, ralladura de limón, huevos, leche y harina"
             ],
-            ids=map(str, range(6))
+            ids=list(map(str, range(6)))
         )
 
         # entrena un clasificador
         tc.make_classifier(
             name="recetas_classifier",
-            ids=map(str, range(6)),
+            ids=list(map(str, range(6))),
             labels=["Comida", "Comida", "Trago", "Trago", "Postre", "Postre"]
         )
 
diff --git a/textar/__init__.py b/textar/__init__.py
index 7688f12..e669d40 100644
--- a/textar/__init__.py
+++ b/textar/__init__.py
@@ -4,4 +4,4 @@
 __email__ = 'datos@modernizacion.gob.ar'
 __version__ = '0.0.4'
 
-from text_classifier import TextClassifier
+from .text_classifier import TextClassifier
diff --git a/textar/text_classifier.py b/textar/text_classifier.py
index 00da330..429cced 100644
--- a/textar/text_classifier.py
+++ b/textar/text_classifier.py
@@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+
 u"""Módulo de clasificación de textos.
 
 Este módulo contiene a los objetos que permiten entrenar un clasificador
@@ -10,8 +11,6 @@
 from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
 from sklearn.metrics.pairwise import pairwise_distances
 from sklearn.linear_model import SGDClassifier
-from sklearn.svm import LinearSVC
-from scipy import sparse
 import pandas as pd
 import numpy as np
 import os
@@ -47,7 +46,7 @@ def __init__(self, texts, ids, vocabulary=None, encoding='utf-8'):
             input='content', encoding=encoding, decode_error='strict',
             strip_accents='ascii', lowercase=True, preprocessor=None,
             tokenizer=None, stop_words=es_stopwords, ngram_range=(1, 1),
-            analyzer='word', max_df=1.0, min_df=1, max_features=None,
+            analyzer='word', max_df=0.8, min_df=1, max_features=None,
             vocabulary=vocabulary, binary=False)
 
         self.transformer = TfidfTransformer()
@@ -103,7 +102,7 @@ def retrain(self, name, ids, labels):
         except AttributeError:
             raise AttributeError("No hay ningun clasificador con ese nombre.")
         indices = np.in1d(self.ids, ids)
-        if isinstance(labels, basestring):
+        if isinstance(labels, str):
             labels = [labels]
         classifier.partial_fit(self.tfidf_mat[indices, :], labels)
 
@@ -147,7 +146,7 @@ def _make_text_vectors(self, examples):
                 El tamaño de la matriz es de (N, T) donde N es la cantidad de
                 ejemplos y T es la cantidad de términos en el vocabulario.
         """
-        if isinstance(examples, basestring):
+        if isinstance(examples, str):
             if examples in self.ids:
                 textvec = self.tfidf_mat[self.ids == examples, :]
             else:
@@ -168,7 +167,8 @@ def _make_text_vectors(self, examples):
         return textvec
 
     def get_similar(self, example, max_similars=3, similarity_cutoff=None,
-                    term_diff_cutoff=0.6):
+                    term_diff_max_rank=10, filter_list=None,
+                    term_diff_cutoff=None):
         """Devuelve textos similares al ejemplo dentro de los textos entrenados.
 
         Nota:
@@ -181,11 +181,14 @@ def get_similar(self, example, max_similars=3, similarity_cutoff=None,
                 devolver.
             similarity_cutoff (float, optional): Valor umbral de similaridad
                 para definir que dos textos son similares entre si.
-            term_diff_cutoff (float, optional): Este valor sirve para controlar
+            term_diff_max_rank (int, optional): Este valor sirve para controlar
                 el umbral con el que los terminos son considerados importantes
                 a la hora de recuperar textos (no afecta el funcionamiento de
                 que textos se consideran cercanos, solo la cantidad de terminos
                 que se devuelven en best_words).
+            filter_list (list): Lista de ids de textos en la cual buscar textos
+                similares.
+            term_diff_cutoff (float): Deprecado. Se quitara en el futuro.
 
         Returns:
             tuple (list, list, list): (text_ids, sorted_dist, best_words)
@@ -197,22 +200,46 @@ def get_similar(self, example, max_similars=3, similarity_cutoff=None,
                     palabras mas relevantes que se usaron para seleccionar esa
                     sugerencia.
         """
-        if max_similars > self.term_mat.shape[0]:
+
+        if term_diff_cutoff:
+            warnings.warn('Deprecado. Quedo sin uso. Se quitara en el futuro.',
+                          DeprecationWarning)
+        if filter_list:
+            if max_similars > len(filter_list):
+                raise ValueError("No se pueden pedir mas sugerencias que la \
+                                  cantidad de textos en `filter_list`.")
+            else:
+                filt_idx = np.in1d(self.ids, filter_list)
+
+        elif max_similars > self.term_mat.shape[0]:
             raise ValueError("No se pueden pedir mas sugerencias que la \
                               cantidad de textos que hay almacenados.")
+        else:
+            filt_idx = np.ones(len(self.ids), dtype=bool)
+        # Saco los textos compuestos solo por stop_words
+        good_ids = np.array(np.sum(self.term_mat, 1) > 0).squeeze()
+        filt_idx = filt_idx & good_ids
         if example in self.ids:
             index = self.ids == example
             exmpl_vec = self.tfidf_mat[index, :]
-            distances = np.squeeze(pairwise_distances(self.tfidf_mat,
+            distances = np.squeeze(pairwise_distances(self.tfidf_mat[filt_idx],
                                                       exmpl_vec))
             # Pongo la distancia a si mismo como inf, par que no se devuelva a
             # si mismo como una opcion
-            distances[index] = np.inf
+            if filter_list and example in filter_list:
+                distances[filter_list.index(example)] = np.inf
+            elif not filter_list:
+                idx_example = np.searchsorted(self.ids, example)
+                filt_idx_example = np.searchsorted(np.flatnonzero(filt_idx),
+                                                   idx_example)
+                distances[filt_idx_example] = np.inf
         else:
             exmpl_vec = self.vectorizer.transform([example])  # contar terminos
             exmpl_vec = self.transformer.transform(exmpl_vec)  # calcular tfidf
-            distances = np.squeeze(pairwise_distances(self.tfidf_mat,
+            distances = np.squeeze(pairwise_distances(self.tfidf_mat[filt_idx],
                                                       exmpl_vec))
+        if np.sum(exmpl_vec) == 0:
+            return [], [], []
         sorted_indices = np.argsort(distances)
         closest_n = sorted_indices[:max_similars]
         sorted_dist = distances[closest_n]
@@ -220,20 +247,26 @@ def get_similar(self, example, max_similars=3, similarity_cutoff=None,
             closest_n = closest_n[sorted_dist < similarity_cutoff]
             sorted_dist = sorted_dist[sorted_dist < similarity_cutoff]
         best_words = []
-        exmpl_vec = exmpl_vec.toarray()
+        # Calculo palabras relevantes para cada sugerencia
+        best_example = np.squeeze(exmpl_vec.toarray())
+        sorted_example_weights = np.flipud(np.argsort(best_example))
+        truncated_max_rank = min(term_diff_max_rank, np.sum(best_example > 0))
+        best_example = sorted_example_weights[:truncated_max_rank]
         for suggested in closest_n:
-            test_vec = self.tfidf_mat[suggested, :].toarray()
-            differences = np.abs(exmpl_vec - test_vec)**2 / \
-                (exmpl_vec**2 + test_vec**2)
-            differences = np.squeeze(np.array(differences))
-            sort_I = np.argsort(differences)
-            limit = np.flatnonzero((differences[sort_I] > term_diff_cutoff)
-                                   | (np.isnan(differences[sort_I]))
-                                   )[0]
+            test_vec = np.squeeze(self.tfidf_mat[suggested, :].toarray())
+            sorted_test_weights = np.flipud(np.argsort(test_vec))
+            truncated_max_rank = min(term_diff_max_rank,
+                                     np.sum(test_vec > 0))
+            best_test = sorted_test_weights[:truncated_max_rank]
+            best_words_ids = np.intersect1d(best_example, best_test)
             best_words.append([k for k, v in
-                               self.vectorizer.vocabulary_.iteritems()
-                               if v in sort_I[:limit]])
-        text_ids = self.ids[closest_n]
+                               self.vectorizer.vocabulary_.items()
+                               if v in best_words_ids])
+        if filter_list:
+            filt_idx_to_general_idx = np.flatnonzero(filt_idx)
+            text_ids = self.ids[filt_idx_to_general_idx[closest_n]]
+        else:
+            text_ids = self.ids[closest_n]
         return list(text_ids), list(sorted_dist), best_words
 
     def reload_texts(self, texts, ids, vocabulary=None):
@@ -307,5 +340,5 @@ def _check_id_length(self, ids):
             ingresado textos planos en lugar de ids.")
 
     def _check_repeated_ids(self, ids):
-        if length(np.unique(ids)) != length(ids):
+        if len(np.unique(ids)) != len(ids):
             raise ValueError("Hay ids repetidos.")