Skip to content

Commit

Permalink
Merge pull request #352 from sahilsuneja1/alpine
Browse files Browse the repository at this point in the history
adding alpine package crawler from UK codebase
  • Loading branch information
nadgowdas authored Jan 30, 2018
2 parents 5750a5c + 5994630 commit f738c63
Show file tree
Hide file tree
Showing 5 changed files with 246 additions and 3 deletions.
65 changes: 62 additions & 3 deletions crawler/utils/package_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,59 @@ def _rpm_reload_db(

return reloaded_db_dir

# from UK crawler codebase


def apk_parser(filename):
try:
db_contents = open(filename).read()
packages = db_contents.split('\n\n')
logger.debug('Found {} APK packages'.format(len(packages)))
for package in packages:
if package:
attributes = package.split('\n')
name = ""
version = ""
architecture = ""
size = ""
for attribute in attributes:
if (attribute.startswith('P:')):
name = attribute[2:]
elif (attribute.startswith('V:')):
version = attribute[2:]
elif (attribute.startswith('A:')):
architecture = attribute[2:]
elif (attribute.startswith('S:')):
size = attribute[2:]
yield (name, PackageFeature(None, name,
size, version,
architecture))
except IOError as e:
logger.error('Failed to read APK database to obtain packages. '
'Check if %s is present. [Exception: %s: %s]'
' ' % (filename, type(e).__name__, e.strerror))
raise


def get_apk_packages(
root_dir='/',
dbpath='lib/apk/db'):

if os.path.isabs(dbpath):
logger.warning(
'dbpath: ' +
dbpath +
' is defined absolute. Ignoring prefix: ' +
root_dir +
'.')

# Update for a different route.
dbpath = os.path.join(root_dir, dbpath)

for feature_key, package_feature in apk_parser(
os.path.join(dbpath, 'installed')):
yield (feature_key, package_feature)


def crawl_packages(
dbpath=None,
Expand All @@ -150,12 +203,11 @@ def crawl_packages(
reload_needed=True):

# package attributes: ["installed", "name", "size", "version"]

logger.debug('Crawling Packages')

pkg_manager = _get_package_manager(root_dir)

try:
pkg_manager = _get_package_manager(root_dir)

if pkg_manager == 'dpkg':
dbpath = dbpath or 'var/lib/dpkg'
for (key, feature) in get_dpkg_packages(
Expand All @@ -166,6 +218,11 @@ def crawl_packages(
for (key, feature) in get_rpm_packages(
root_dir, dbpath, installed_since, reload_needed):
yield (key, feature, 'package')
elif pkg_manager == 'apk':
dbpath = dbpath or 'lib/apk/db'
for (key, feature) in get_apk_packages(
root_dir, dbpath):
yield (key, feature, 'package')
else:
logger.warning('Unsupported package manager for Linux distro')
except Exception as e:
Expand All @@ -186,6 +243,8 @@ def _get_package_manager(root_dir):
pkg_manager = 'dpkg'
elif os_distro in ['redhat', 'red hat', 'rhel', 'fedora', 'centos']:
pkg_manager = 'rpm'
elif os_distro in ['alpine']:
pkg_manager = 'apk'
elif os.path.exists(os.path.join(root_dir, 'var/lib/dpkg')):
pkg_manager = 'dpkg'
elif os.path.exists(os.path.join(root_dir, 'var/lib/rpm')):
Expand Down
98 changes: 98 additions & 0 deletions tests/functional/test_functional_apk_package_crawler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
import unittest
import docker
import requests.exceptions
import tempfile
import os
import shutil
import subprocess
import sys
import pykafka

# Tests for crawlers in kraken crawlers configuration.

from containers_crawler import ContainersCrawler
from worker import Worker
from emitters_manager import EmittersManager

import logging

# Tests conducted with a single container running.


class ContainersCrawlerTests(unittest.TestCase):

def setUp(self):
root = logging.getLogger()
root.setLevel(logging.INFO)
ch = logging.StreamHandler(sys.stdout)
ch.setLevel(logging.INFO)
formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(message)s')
ch.setFormatter(formatter)
root.addHandler(ch)

self.docker = docker.Client(base_url='unix://var/run/docker.sock',
version='auto')
try:
if len(self.docker.containers()) != 0:
raise Exception(
"Sorry, this test requires a machine with no docker"
"containers running.")
except requests.exceptions.ConnectionError:
print ("Error connecting to docker daemon, are you in the docker"
"group? You need to be in the docker group.")

self.start_crawled_container()


def start_crawled_container(self):
# start a container to be crawled
self.docker.pull(repository='alpine', tag='latest')
self.container = self.docker.create_container(
image='alpine:latest', command='/bin/sleep 60')
self.tempd = tempfile.mkdtemp(prefix='crawlertest.')
self.docker.start(container=self.container['Id'])

def tearDown(self):
self.remove_crawled_container()
shutil.rmtree(self.tempd)

def remove_crawled_container(self):
self.docker.stop(container=self.container['Id'])
self.docker.remove_container(container=self.container['Id'])

def testCrawlContainer(self):
env = os.environ.copy()
mypath = os.path.dirname(os.path.realpath(__file__))
os.makedirs(self.tempd + '/out')

# crawler itself needs to be root
process = subprocess.Popen(
[
'/usr/bin/python', mypath + '/../../crawler/crawler.py',
'--url', 'file://' + self.tempd + '/out/crawler',
'--features', 'os,package',
'--crawlContainers', self.container['Id'],
'--crawlmode', 'OUTCONTAINER',
'--numprocesses', '1'
],
env=env)
stdout, stderr = process.communicate()
assert process.returncode == 0

print stderr
print stdout

subprocess.call(['/bin/chmod', '-R', '777', self.tempd])

files = os.listdir(self.tempd + '/out')
assert len(files) == 1

f = open(self.tempd + '/out/' + files[0], 'r')
output = f.read()
print output # only printed if the test fails
assert 'alpine' in output
assert 'musl' in output
assert 'busybox' in output
f.close()

24 changes: 24 additions & 0 deletions tests/unit/single_package_apk_db
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
C:Q128iBSO2PpkDnOWZqp3Watn4H20U=
P:test-package
V:999.9.9
A:x86_64
S:999
I:581632
T:the musl c library (libc) implementation
U:http://www.musl-libc.org/
L:MIT
o:musl
m:Timo Teräs <timo.teras@iki.fi>
t:1485463794
c:2e4493888fff74afc6a6ef6257aeea469df32af5
p:so:libc.musl-x86_64.so.1=1
F:lib
R:libc.musl-x86_64.so.1
a:0:0:777
Z:Q17yJ3JFNypA4mxhJJr0ou6CzsJVI=
R:ld-musl-x86_64.so.1
a:0:0:755
Z:Q1OSxgRxSfAKG2m68kTiuRMQclyD8=
F:usr
F:usr/lib

53 changes: 53 additions & 0 deletions tests/unit/test_apk_package_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# from UK crawler codebase
import unittest
from utils import package_utils
from utils.features import PackageFeature
import os
import logging

class PackageUtilsTest(unittest.TestCase):


def test_single_package_is_parsed(self):
input_file = os.path.join(os.path.dirname(__file__), 'single_package_apk_db')
parser = package_utils.apk_parser(input_file)
package = parser.next()
self.assert_package_is_correct(package, 'test-package', '999.9.9', '999', 'x86_64')
self.assertRaises(StopIteration, parser.next)

def test_multiple_packages_parsed(self):
input_file = os.path.join(os.path.dirname(__file__), 'two_packages_apk_db')
parser = package_utils.apk_parser(input_file)
self.assert_package_is_correct(parser.next(), 'first-package', '111.1.1', '111', 'x86_64')
self.assert_package_is_correct(parser.next(), 'second-package', '222.2.2', '222', 'x86_64')
self.assertRaises(StopIteration, parser.next)

def test_error_message_produced(self):
logger = logging.getLogger('crawlutils')
log_handler = CapturingLogHandler()
logger.addHandler(log_handler)
with self.assertRaises(IOError):
input_file = os.path.join(os.path.dirname(__file__), 'does.not.exist')
parser = package_utils.apk_parser(input_file)
parser.next()
self.assertIsNotNone(log_handler.msg)
self.assertIn('Failed to read APK database to obtain packages', log_handler.msg)
self.assertIn('does.not.exist', log_handler.msg)
self.assertIn('IOError: No such file or directory', log_handler.msg)

def assert_package_is_correct(self, package, name, version, size, architecture):
self.assertEqual(name, package[0])
packageFeature = package[1]
self.assertEqual(name, packageFeature.pkgname)
self.assertEqual(version, packageFeature.pkgversion)
self.assertEqual(size, packageFeature.pkgsize)
self.assertEqual(architecture, packageFeature.pkgarchitecture)
self.assertIsNone(packageFeature.installed)

class CapturingLogHandler(logging.Handler):
msg = None
def emit(self, record):
self.msg = record.msg

if __name__ == '__main__':
unittest.main()
9 changes: 9 additions & 0 deletions tests/unit/two_packages_apk_db
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
P:first-package
V:111.1.1
A:x86_64
S:111

P:second-package
V:222.2.2
A:x86_64
S:222

0 comments on commit f738c63

Please sign in to comment.