diff --git a/crawler/crawler_exceptions.py b/crawler/crawler_exceptions.py
index 063c9813..462db995 100644
--- a/crawler/crawler_exceptions.py
+++ b/crawler/crawler_exceptions.py
@@ -16,6 +16,13 @@ class CrawlTimeoutError(CrawlError):
     pass
 
 
+class CrawlUnsupportedPackageManager(CrawlError):
+
+    """Could not detect what is the package manager."""
+
+    pass
+
+
 class ContainerInvalidEnvironment(Exception):
 
     """Indicates that the environment can not be applied to the operation."""
diff --git a/crawler/features_crawler.py b/crawler/features_crawler.py
index 49d26712..011c8c27 100644
--- a/crawler/features_crawler.py
+++ b/crawler/features_crawler.py
@@ -24,7 +24,8 @@
     psvmi = None
 
 from namespace import run_as_another_namespace, ALL_NAMESPACES
-from crawler_exceptions import CrawlError
+from crawler_exceptions import (CrawlError,
+                                CrawlUnsupportedPackageManager)
 import dockerutils
 from features import (OSFeature, FileFeature, ConfigFeature, DiskFeature,
                       ProcessFeature, MetricFeature, ConnectionFeature,
@@ -473,8 +474,7 @@ def _crawl_config_files(
 
         assert os.path.isdir(root_dir)
 
-        if root_dir_alias is None:
-            root_dir_alias = root_dir
+        root_dir_alias = root_dir_alias or root_dir
         exclude_dirs = [os.path.join(root_dir, d) for d in
                         exclude_dirs]
         exclude_regex = r'|'.join([fnmatch.translate(d) for d in
@@ -570,68 +570,77 @@ def _crawl_processes(self):
         if self.crawl_mode == Modes.OUTVM:
             if psvmi is None:
                 raise NotImplementedError()
-            else:
-                list = psvmi.process_iter(self.get_vm_context())
+            proc_list = psvmi.process_iter(self.get_vm_context())
         else:
-            list = psutil.process_iter()
+            proc_list = psutil.process_iter()
 
-        for p in list:
+        for p in proc_list:
             create_time = (
                 p.create_time() if hasattr(
                     p.create_time,
                     '__call__') else p.create_time)
-            if create_time > created_since:
-                name = (p.name() if hasattr(p.name, '__call__'
-                                            ) else p.name)
-                cmdline = (p.cmdline() if hasattr(p.cmdline, '__call__'
-                                                  ) else p.cmdline)
-                pid = (p.pid() if hasattr(p.pid, '__call__') else p.pid)
-                status = (p.status() if hasattr(p.status, '__call__'
-                                                ) else p.status)
-                if status == psutil.STATUS_ZOMBIE:
-                    cwd = 'unknown'  # invalid
-                else:
-                    try:
-                        cwd = (p.cwd() if hasattr(p, 'cwd') and
-                               hasattr(p.cwd, '__call__') else p.getcwd())
-                    except Exception:
-                        logger.error('Error crawling process %s for cwd'
-                                     % pid, exc_info=True)
-                        cwd = 'unknown'
-                ppid = (p.ppid() if hasattr(p.ppid, '__call__'
-                                            ) else p.ppid)
-                try:
-                    if (hasattr(p, 'num_threads') and
-                            hasattr(p.num_threads, '__call__')):
-                        num_threads = p.num_threads()
-                    else:
-                        num_threads = p.get_num_threads()
-                except:
-                    num_threads = 'unknown'
-
-                try:
-                    username = (p.username() if hasattr(p, 'username') and
-                                hasattr(p.username, '__call__') else
-                                p.username)
-                except:
-                    username = 'unknown'
-
-                openfiles = []
-                for f in p.get_open_files():
-                    openfiles.append(f.path)
-                openfiles.sort()
-                feature_key = '{0}/{1}'.format(name, pid)
-                yield (feature_key, ProcessFeature(
-                    str(' '.join(cmdline)),
-                    create_time,
-                    cwd,
-                    name,
-                    openfiles,
-                    pid,
-                    ppid,
-                    num_threads,
-                    username,
-                ))
+            if create_time <= created_since:
+                continue
+            yield self._crawl_single_process(p)
+
+    def _crawl_single_process(self, p):
+        """Returns a ProcessFeature"""
+        create_time = (
+            p.create_time() if hasattr(
+                p.create_time,
+                '__call__') else p.create_time)
+
+        name = (p.name() if hasattr(p.name, '__call__'
+                                    ) else p.name)
+        cmdline = (p.cmdline() if hasattr(p.cmdline, '__call__'
+                                          ) else p.cmdline)
+        pid = (p.pid() if hasattr(p.pid, '__call__') else p.pid)
+        status = (p.status() if hasattr(p.status, '__call__'
+                                        ) else p.status)
+        if status == psutil.STATUS_ZOMBIE:
+            cwd = 'unknown'  # invalid
+        else:
+            try:
+                cwd = (p.cwd() if hasattr(p, 'cwd') and
+                       hasattr(p.cwd, '__call__') else p.getcwd())
+            except Exception:
+                logger.error('Error crawling process %s for cwd'
+                             % pid, exc_info=True)
+                cwd = 'unknown'
+        ppid = (p.ppid() if hasattr(p.ppid, '__call__'
+                                    ) else p.ppid)
+        try:
+            if (hasattr(p, 'num_threads') and
+                    hasattr(p.num_threads, '__call__')):
+                num_threads = p.num_threads()
+            else:
+                num_threads = p.get_num_threads()
+        except:
+            num_threads = 'unknown'
+
+        try:
+            username = (p.username() if hasattr(p, 'username') and
+                        hasattr(p.username, '__call__') else
+                        p.username)
+        except:
+            username = 'unknown'
+
+        openfiles = []
+        for f in p.get_open_files():
+            openfiles.append(f.path)
+        openfiles.sort()
+        feature_key = '{0}/{1}'.format(name, pid)
+        return (feature_key, ProcessFeature(
+            str(' '.join(cmdline)),
+            create_time,
+            cwd,
+            name,
+            openfiles,
+            pid,
+            ppid,
+            num_threads,
+            username,
+        ))
 
     # crawl network connection metadata
     def crawl_connections(self):
@@ -648,12 +657,11 @@ def _crawl_connections(self):
         if self.crawl_mode == Modes.OUTVM:
             if psvmi is None:
                 raise NotImplementedError()
-            else:
-                list = psvmi.process_iter(self.get_vm_context())
+            proc_list = psvmi.process_iter(self.get_vm_context())
         else:
-            list = psutil.process_iter()
+            proc_list = psutil.process_iter()
 
-        for p in list:
+        for p in proc_list:
             pid = (p.pid() if hasattr(p.pid, '__call__') else p.pid)
             status = (p.status() if hasattr(p.status, '__call__'
                                             ) else p.status)
@@ -668,46 +676,45 @@ def _crawl_connections(self):
 
             if create_time <= created_since:
                 continue
-            try:
-                for c in p.get_connections():
-                    try:
-                        (localipaddr, localport) = c.laddr[:]
-                    except:
-
-                        # Older version of psutil uses local_address instead of
-                        # laddr.
-
-                        (localipaddr, localport) = c.local_address[:]
-                    try:
-                        if c.raddr:
-                            (remoteipaddr, remoteport) = c.raddr[:]
-                        else:
-                            (remoteipaddr, remoteport) = (None, None)
-                    except:
+            for conn in p.get_connections():
+                yield self._crawl_single_connection(conn, pid, name)
 
-                        # Older version of psutil uses remote_address instead
-                        # of raddr.
+    def _crawl_single_connection(self, c, pid, name):
+        """Returns a ConnectionFeature"""
+        try:
+            (localipaddr, localport) = c.laddr[:]
+        except:
 
-                        if c.remote_address:
-                            (remoteipaddr, remoteport) = \
-                                c.remote_address[:]
-                        else:
-                            (remoteipaddr, remoteport) = (None, None)
-                    feature_key = '{0}/{1}/{2}'.format(pid,
-                                                       localipaddr, localport)
-                    yield (feature_key, ConnectionFeature(
-                        localipaddr,
-                        localport,
-                        name,
-                        pid,
-                        remoteipaddr,
-                        remoteport,
-                        str(c.status),
-                    ))
-            except Exception as e:
-                logger.error('Error crawling connection for process %s'
-                             % pid, exc_info=True)
-                raise CrawlError(e)
+            # Older version of psutil uses local_address instead of
+            # laddr.
+
+            (localipaddr, localport) = c.local_address[:]
+        try:
+            if c.raddr:
+                (remoteipaddr, remoteport) = c.raddr[:]
+            else:
+                (remoteipaddr, remoteport) = (None, None)
+        except:
+
+            # Older version of psutil uses remote_address instead
+            # of raddr.
+
+            if c.remote_address:
+                (remoteipaddr, remoteport) = \
+                    c.remote_address[:]
+            else:
+                (remoteipaddr, remoteport) = (None, None)
+        feature_key = '{0}/{1}/{2}'.format(pid,
+                                           localipaddr, localport)
+        return (feature_key, ConnectionFeature(
+            localipaddr,
+            localport,
+            name,
+            pid,
+            remoteipaddr,
+            remoteport,
+            str(c.status),
+        ))
 
     # crawl performance metric data
 
@@ -857,56 +864,25 @@ def _crawl_packages(self, dbpath=None, root_dir='/'):
 
         logger.debug('Crawling Packages')
 
-        result = osinfo.get_osinfo(mount_point=root_dir)
-        if result:
-            os_distro = result['os']
-        else:
-            os_distro = 'unknown'
-
-        if self.crawl_mode == Modes.INVM:
-
-            logger.debug('Using in-VM state information (crawl mode: ' +
-                         self.crawl_mode + ')')
+        if self.crawl_mode in (Modes.INVM, Modes.MOUNTPOINT):
             reload_needed = False
         elif self.crawl_mode == Modes.OUTCONTAINER:
-
-            logger.debug('Using outcontainer state information (crawl mode: ' +
-                         self.crawl_mode + ')')
             reload_needed = True
-        elif self.crawl_mode == Modes.MOUNTPOINT:
-
-            logger.debug('Using disk image information (crawl mode: ' +
-                         self.crawl_mode + ')')
-            reload_needed = False
         else:
             raise NotImplementedError('Unsupported crawl mode')
 
         installed_since = self.feature_epoch
-        if os_distro == 'unknown':
-            # Package feature is only valid for Linux platforms.
-
-            raise StopIteration()
 
-        pkg_manager = 'unknown'
-        if os_distro in ['ubuntu', 'debian']:
-            pkg_manager = 'dpkg'
-        elif os_distro in ['redhat', 'red hat', 'rhel', 'fedora', 'centos']:
-            pkg_manager = 'rpm'
-        elif os.path.exists(os.path.join(root_dir, 'var/lib/dpkg')):
-            pkg_manager = 'dpkg'
-        elif os.path.exists(os.path.join(root_dir, 'var/lib/rpm')):
-            pkg_manager = 'rpm'
+        pkg_manager = self._get_package_manager(root_dir)
 
         try:
             if pkg_manager == 'dpkg':
-                if not dbpath:
-                    dbpath = 'var/lib/dpkg'
+                dbpath = dbpath or 'var/lib/dpkg'
                 for (key, feature) in get_dpkg_packages(
                         root_dir, dbpath, installed_since):
                     yield (key, feature)
             elif pkg_manager == 'rpm':
-                if not dbpath:
-                    dbpath = 'var/lib/rpm'
+                dbpath = dbpath or 'var/lib/rpm'
                 for (key, feature) in get_rpm_packages(
                         root_dir, dbpath, installed_since, reload_needed):
                     yield (key, feature)
@@ -917,6 +893,24 @@ def _crawl_packages(self, dbpath=None, root_dir='/'):
                          exc_info=True)
             raise CrawlError(e)
 
+    def _get_package_manager(self, root_dir):
+        result = osinfo.get_osinfo(mount_point=root_dir)
+        if result:
+            os_distro = result['os']
+        else:
+            raise CrawlUnsupportedPackageManager()
+
+        pkg_manager = None
+        if os_distro in ['ubuntu', 'debian']:
+            pkg_manager = 'dpkg'
+        elif os_distro in ['redhat', 'red hat', 'rhel', 'fedora', 'centos']:
+            pkg_manager = 'rpm'
+        elif os.path.exists(os.path.join(root_dir, 'var/lib/dpkg')):
+            pkg_manager = 'dpkg'
+        elif os.path.exists(os.path.join(root_dir, 'var/lib/rpm')):
+            pkg_manager = 'rpm'
+        return pkg_manager
+
     # crawl virtual memory information
 
     def crawl_memory(self):
@@ -952,36 +946,8 @@ def crawl_memory(self):
 
         elif self.crawl_mode == Modes.OUTCONTAINER:
 
-            used = buffered = cached = free = 'unknown'
             try:
-                with open(self.container.get_memory_cgroup_path('memory.stat'
-                                                                ), 'r') as f:
-                    for line in f:
-                        (key, value) = line.strip().split(' ')
-                        if key == 'total_cache':
-                            cached = int(value)
-                        if key == 'total_active_file':
-                            buffered = int(value)
-
-                with open(self.container.get_memory_cgroup_path(
-                        'memory.limit_in_bytes'), 'r') as f:
-                    limit = int(f.readline().strip())
-
-                with open(self.container.get_memory_cgroup_path(
-                        'memory.usage_in_bytes'), 'r') as f:
-                    used = int(f.readline().strip())
-
-                host_free = psutil.virtual_memory().free
-                container_total = used + min(host_free, limit - used)
-                free = container_total - used
-
-                if 'unknown' not in [used, free] and (free + used) > 0:
-                    util_percentage = float(used) / (free + used) * 100.0
-                else:
-                    util_percentage = 'unknown'
-
-                feature_attributes = MemoryFeature(
-                    used, buffered, cached, free, util_percentage)
+                feature_attributes = self._crawl_memory_outcontainer()
             except Exception as e:
 
                 logger.error('Error crawling memory', exc_info=True)
@@ -991,6 +957,36 @@ def crawl_memory(self):
 
         yield (feature_key, feature_attributes)
 
+    def _crawl_memory_outcontainer(self):
+        used = buffered = cached = free = 'unknown'
+        with open(self.container.get_memory_cgroup_path('memory.stat'
+                                                        ), 'r') as f:
+            for line in f:
+                (key, value) = line.strip().split(' ')
+                if key == 'total_cache':
+                    cached = int(value)
+                if key == 'total_active_file':
+                    buffered = int(value)
+
+        with open(self.container.get_memory_cgroup_path(
+                'memory.limit_in_bytes'), 'r') as f:
+            limit = int(f.readline().strip())
+
+        with open(self.container.get_memory_cgroup_path(
+                'memory.usage_in_bytes'), 'r') as f:
+            used = int(f.readline().strip())
+
+        host_free = psutil.virtual_memory().free
+        container_total = used + min(host_free, limit - used)
+        free = container_total - used
+
+        if 'unknown' not in [used, free] and (free + used) > 0:
+            util_percentage = float(used) / (free + used) * 100.0
+        else:
+            util_percentage = 'unknown'
+
+        return MemoryFeature(used, buffered, cached, free, util_percentage)
+
     def _save_container_cpu_times(self, container_long_id, times):
         cache_key = container_long_id
         self._cache_put_value(cache_key, times)
@@ -1003,138 +999,134 @@ def crawl_cpu(self, per_cpu=False):
 
         logger.debug('Crawling cpu information')
 
-        if self.crawl_mode not in [
-                Modes.INVM,
-                Modes.OUTCONTAINER,
-                Modes.OUTVM]:
+        if self.crawl_mode == Modes.INVM:
+            for key, feature in self._crawl_cpu_invm():
+                yield (key, feature)
+        elif self.crawl_mode == Modes.OUTCONTAINER:
+            try:
+                for key, feature in self._crawl_cpu_outcontainer(per_cpu):
+                    yield (key, feature)
+            except Exception as e:
+                logger.error('Error crawling cpu information',
+                             exc_info=True)
+                raise CrawlError(e)
+        else:
             raise NotImplementedError('Unsupported crawl mode')
 
+    def _crawl_cpu_outcontainer(self, per_cpu=False):
         host_cpu_feature = {}
-        if self.crawl_mode in [Modes.INVM, Modes.OUTCONTAINER]:
-            for (index, cpu) in \
-                    enumerate(psutil.cpu_times_percent(percpu=True)):
-
-                idle = cpu.idle
-                nice = cpu.nice
-                user = cpu.user
-                wait = cpu.iowait
-                system = cpu.system
-                interrupt = cpu.irq
-                steal = cpu.steal
-
-                used = 100 - int(idle)
-
-                feature_key = '{0}-{1}'.format('cpu', index)
-                feature_attributes = CpuFeature(
-                    idle,
-                    nice,
-                    user,
-                    wait,
-                    system,
-                    interrupt,
-                    steal,
-                    used,
-                )
-                host_cpu_feature[index] = feature_attributes
-                if self.crawl_mode == Modes.INVM:
-                    yield (feature_key, feature_attributes)
-
-        if self.crawl_mode == Modes.OUTVM:
-            raise NotImplementedError()
+        for (idx, cpu) in enumerate(psutil.cpu_times_percent(percpu=True)):
+            host_cpu_feature[idx] = CpuFeature(
+                cpu.idle,
+                cpu.nice,
+                cpu.user,
+                cpu.iowait,
+                cpu.system,
+                cpu.irq,
+                cpu.steal,
+                100 - int(cpu.idle),
+            )
 
-        if self.crawl_mode == Modes.OUTCONTAINER:
+        if per_cpu:
+            stat_file_name = 'cpuacct.usage_percpu'
+        else:
+            stat_file_name = 'cpuacct.usage'
 
-            if per_cpu:
-                stat_file_name = 'cpuacct.usage_percpu'
-            else:
-                stat_file_name = 'cpuacct.usage'
+        container = self.container
 
-            container = self.container
+        (cpu_usage_t1, prev_time) = (
+            self._get_prev_container_cpu_times(container.long_id))
 
-            try:
-                (cpu_usage_t1, prev_time) = \
-                    self._get_prev_container_cpu_times(container.long_id)
-
-                if cpu_usage_t1:
-                    logger.debug('Using previous cpu times for container %s'
-                                 % container.long_id)
-                    interval = time.time() - prev_time
-
-                if not cpu_usage_t1 or interval == 0:
-                    logger.debug(
-                        'There are no previous cpu times for container %s '
-                        'so we will be sleeping for 100 milliseconds' %
-                        container.long_id)
-
-                    with open(container.get_cpu_cgroup_path(stat_file_name),
-                              'r') as f:
-                        cpu_usage_t1 = f.readline().strip().split(' ')
-                    interval = 0.1  # sleep for 100ms
-                    time.sleep(interval)
-
-                with open(container.get_cpu_cgroup_path(stat_file_name),
-                          'r') as f:
-                    cpu_usage_t2 = f.readline().strip().split(' ')
-
-                # Store the cpu times for the next crawl
-
-                self._save_container_cpu_times(container.long_id,
-                                               cpu_usage_t2)
-
-                cpu_user_system = {}
-                path = container.get_cpu_cgroup_path('cpuacct.stat')
-                with open(path, 'r') as f:
-                    for line in f:
-                        m = re.search(r"(system|user)\s+(\d+)", line)
-                        if m:
-                            cpu_user_system[m.group(1)] = \
-                                float(m.group(2))
-            except Exception as e:
-                logger.error('Error crawling cpu information',
-                             exc_info=True)
-                raise CrawlError(e)
+        if cpu_usage_t1:
+            logger.debug('Using previous cpu times for container %s'
+                         % container.long_id)
+            interval = time.time() - prev_time
+        else:
+            logger.debug(
+                'There are no previous cpu times for container %s '
+                'so we will be sleeping for 100 milliseconds' %
+                container.long_id)
+
+            with open(container.get_cpu_cgroup_path(stat_file_name),
+                      'r') as f:
+                cpu_usage_t1 = f.readline().strip().split(' ')
+            interval = 0.1  # sleep for 100ms
+            time.sleep(interval)
+
+        with open(container.get_cpu_cgroup_path(stat_file_name),
+                  'r') as f:
+            cpu_usage_t2 = f.readline().strip().split(' ')
+
+        # Store the cpu times for the next crawl
+
+        self._save_container_cpu_times(container.long_id,
+                                       cpu_usage_t2)
+
+        cpu_user_system = {}
+        path = container.get_cpu_cgroup_path('cpuacct.stat')
+        with open(path, 'r') as f:
+            for line in f:
+                m = re.search(r"(system|user)\s+(\d+)", line)
+                if m:
+                    cpu_user_system[m.group(1)] = \
+                        float(m.group(2))
+
+        for (index, cpu_usage_ns) in enumerate(cpu_usage_t1):
+            usage_secs = (float(cpu_usage_t2[index]) -
+                          float(cpu_usage_ns)) / float(1e9)
+
+            # Interval is never 0 because of step 0 (forcing a sleep)
+
+            usage_percent = usage_secs / interval * 100.0
+            if usage_percent > 100.0:
+                usage_percent = 100.0
+            idle = 100.0 - usage_percent
+
+            # Approximation 1
+
+            user_plus_sys_hz = cpu_user_system['user'] \
+                + cpu_user_system['system']
+            if user_plus_sys_hz == 0:
+                # Fake value to avoid divide by zero.
+                user_plus_sys_hz = 0.1
+            user = usage_percent * (cpu_user_system['user'] /
+                                    user_plus_sys_hz)
+            system = usage_percent * (cpu_user_system['system'] /
+                                      user_plus_sys_hz)
+
+            # Approximation 2
+
+            nice = host_cpu_feature[index][1]
+            wait = host_cpu_feature[index][3]
+            interrupt = host_cpu_feature[index][5]
+            steal = host_cpu_feature[index][6]
+            feature_key = '{0}-{1}'.format('cpu', index)
+            feature_attributes = CpuFeature(
+                idle,
+                nice,
+                user,
+                wait,
+                system,
+                interrupt,
+                steal,
+                usage_percent,
+            )
+            yield (feature_key, feature_attributes)
 
-            for (index, cpu_usage_ns) in enumerate(cpu_usage_t1):
-                usage_secs = (float(cpu_usage_t2[index]) -
-                              float(cpu_usage_ns)) / float(1e9)
-
-                # Interval is never 0 because of step 0 (forcing a sleep)
-
-                usage_percent = usage_secs / interval * 100.0
-                if usage_percent > 100.0:
-                    usage_percent = 100.0
-                idle = 100.0 - usage_percent
-
-                # Approximation 1
-
-                user_plus_sys_hz = cpu_user_system['user'] \
-                    + cpu_user_system['system']
-                if user_plus_sys_hz == 0:
-                    # Fake value to avoid divide by zero.
-                    user_plus_sys_hz = 0.1
-                user = usage_percent * (cpu_user_system['user'] /
-                                        user_plus_sys_hz)
-                system = usage_percent * (cpu_user_system['system'] /
-                                          user_plus_sys_hz)
-
-                # Approximation 2
-
-                nice = host_cpu_feature[index][1]
-                wait = host_cpu_feature[index][3]
-                interrupt = host_cpu_feature[index][5]
-                steal = host_cpu_feature[index][6]
-                feature_key = '{0}-{1}'.format('cpu', index)
-                feature_attributes = CpuFeature(
-                    idle,
-                    nice,
-                    user,
-                    wait,
-                    system,
-                    interrupt,
-                    steal,
-                    usage_percent,
-                )
-                yield (feature_key, feature_attributes)
+    def _crawl_cpu_invm(self):
+        for (idx, cpu) in enumerate(psutil.cpu_times_percent(percpu=True)):
+            feature_attributes = CpuFeature(
+                cpu.idle,
+                cpu.nice,
+                cpu.user,
+                cpu.iowait,
+                cpu.system,
+                cpu.irq,
+                cpu.steal,
+                100 - int(cpu.idle),
+            )
+            feature_key = '{0}-{1}'.format('cpu', idx)
+            yield (feature_key, feature_attributes)
 
     def crawl_interface(self):
         _mode = self.crawl_mode