Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parse and import both rpm and deb packages metadata #9101

Open
wants to merge 39 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
860a02f
Create lzreposync subdirectory
agraul May 23, 2024
d9b3efe
Correct error message
waterflow80 Aug 4, 2024
e594cfa
Add remote_path column
waterflow80 Aug 4, 2024
199381e
Add expand_full_filelist parameter
waterflow80 Aug 4, 2024
5acf7ab
Update deprecated method
waterflow80 Aug 4, 2024
86df68e
Add import_signatures parameter
waterflow80 Aug 4, 2024
f772e64
Implement Primary.xml file parser
waterflow80 Aug 4, 2024
3b2d236
Implement filelists.xml file parser
waterflow80 Aug 4, 2024
1649f34
Implement full rpm metadata parsing
waterflow80 Aug 4, 2024
a6d462c
Parse and import rpm patches/updates
waterflow80 Aug 4, 2024
0df0030
Import parsed rpm & deb packages to db
waterflow80 Aug 4, 2024
fda9e51
Implement the deb Packages md file
waterflow80 Aug 4, 2024
eee052b
Implement the Translation file parser
waterflow80 Aug 4, 2024
5a51d0d
Implement full deb metadata parsing
waterflow80 Aug 4, 2024
5e26b7a
Fetch repository information from the db
waterflow80 Aug 4, 2024
8ef299a
Complete lzreposync service entry point
waterflow80 Aug 4, 2024
5435907
Add new dependency
waterflow80 Aug 4, 2024
3567817
Add unit tests for rpm metadata parsers
waterflow80 Aug 4, 2024
8bad179
Delete no longer used files
waterflow80 Aug 4, 2024
61b0a74
Remove already defined function
waterflow80 Aug 4, 2024
898d571
Fix linting complain
waterflow80 Aug 4, 2024
4c7db58
Complete code for lzreposync version 0.1
waterflow80 Aug 15, 2024
4f8a070
Complete tests for lzreposync service
waterflow80 Aug 15, 2024
f329e51
Fix error: too many clients already
waterflow80 Aug 15, 2024
913f21c
Complete latest version
waterflow80 Aug 17, 2024
8a49313
Optimize code and do some cleanup
waterflow80 Aug 26, 2024
6ccb3bf
Optimize and consolidate code
waterflow80 Aug 29, 2024
2157d56
Fix cachedir path formatting issue
waterflow80 Aug 29, 2024
dceccec
fixup! Complete lzreposync service entry point
waterflow80 Sep 2, 2024
2a79e72
fixup! Optimize code and do some cleanup
waterflow80 Sep 2, 2024
2f4c998
fixup! Optimize and consolidate code
waterflow80 Sep 2, 2024
9a95e5a
fixup! Complete latest version
waterflow80 Sep 2, 2024
fed31fe
fixup! Optimize and consolidate code
waterflow80 Sep 2, 2024
a033e4d
Complete gpg signature check for rpm
waterflow80 Sep 9, 2024
b890b1c
fixup! Add remote_path column
waterflow80 Sep 9, 2024
7d0c57c
Refactor: Allow more input variants in makedirs()
agraul Sep 9, 2024
7aa602c
Merge pull request #1 from agraul/refactor-makedirs
waterflow80 Sep 9, 2024
4042fc4
fixup! Refactor: Allow more input variants in makedirs()
waterflow80 Sep 9, 2024
a7270cd
Complete gpg signature check for debian
waterflow80 Sep 11, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 32 additions & 6 deletions python/lzreposync/src/lzreposync/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
get_compatible_arches,
get_channel_info_by_label,
get_all_arches,
create_channel,
ChannelAlreadyExistsException,
)
from lzreposync.deb_repo import DebRepo
from lzreposync.import_utils import (
Expand Down Expand Up @@ -105,18 +107,44 @@ def main():
default=False,
)

parser.add_argument(
"--create-channel",
help="Create a new channel by providing the 'channel_label', and the 'channel_arch' eg: x86_64.\n"
"Eg: --create-channel test_channel x86_64",
dest="channel_info",
type=str,
nargs=2,
)

args = parser.parse_args()

# Creating a new channel
if args.channel_info:
channel_label, channel_arch = args.channel_info[0], args.channel_info[1]
print(
f"Creating a new channel with label: {channel_label}, and arch: {channel_arch}"
)
try:
channel = create_channel(
channel_label=channel_label, channel_arch=channel_arch
)
print(
f"Info: successfully created channel: {channel_label} -> id={channel.get_id()}, name={channel.get_label()}"
)
except ChannelAlreadyExistsException:
print(f"Warn: failed to create channel {channel_label}. Already exists !!")
return

arch = args.arch
if arch != ".*":
# pylint: disable-next=consider-using-f-string
arch = "(noarch|{})".format(args.arch)
# TODO: check for arch validity using table `rhnPackageArch`

logging.getLogger().setLevel(args.loglevel)
if args.url:
if not args.repo_type:
print("ERROR: --type (yum/deb) must be specified when using --url")
return # TODO: maybe add some custom exception
return
if args.repo_type == "yum":
repo = RPMRepo(args.name, args.cache, args.url, arch)
elif args.repo_type == "deb":
Expand All @@ -134,13 +162,11 @@ def main():
# No url specified
if args.channel:
channel_label = args.channel
channel = get_channel_info_by_label(
channel_label
) # TODO handle None exception
channel = get_channel_info_by_label(channel_label)
if not channel:
logging.error("Couldn't fetch channel with label %s", channel_label)
return
compatible_arches = get_compatible_arches(int(channel["id"]))
compatible_arches = get_compatible_arches(channel_label)
if args.arch and args.arch != ".*" and args.arch not in compatible_arches:
logging.error(
"Not compatible arch: %s for channel: %s",
Expand Down
175 changes: 165 additions & 10 deletions python/lzreposync/src/lzreposync/db_utils.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,185 @@
# pylint: disable=missing-module-docstring
import logging
import time

from psycopg2 import errors
from psycopg2.errorcodes import UNIQUE_VIOLATION

from lzreposync.repo_dto import RepoDTO
from spacewalk.common.rhnConfig import cfg_component
from spacewalk.server import rhnSQL, rhnChannel


# stolen from python/spacewalk/server/test/misc_functions.py
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Anything copied will need to be maintained in two places. Can you add a TODO comment to move this to a common place? You don't have to do the move, a comment we can easily search for later is good.

def _new_channel_dict(**kwargs):
# pylint: disable-next=invalid-name
_counter = 0

label = kwargs.get("label")
if label is None:
# pylint: disable-next=consider-using-f-string
label = "rhn-unittest-%.3f-%s" % (time.time(), _counter)
# pylint: disable-next=invalid-name
_counter = _counter + 1

release = kwargs.get("release") or "release-" + label
# pylint: disable-next=redefined-outer-name
os = kwargs.get("os") or "Unittest Distro"
if "org_id" in kwargs:
# pylint: disable-next=unused-variable
org_id = kwargs["org_id"]
else:
org_id = "rhn-noc"

vdict = {
"label": label,
"name": kwargs.get("name") or label,
"summary": kwargs.get("summary") or label,
"description": kwargs.get("description") or label,
"basedir": kwargs.get("basedir") or "/",
"channel_arch": kwargs.get("channel_arch") or "i386",
"channel_families": [kwargs.get("channel_family") or label],
"org_id": kwargs.get("org_id"),
"gpg_key_url": kwargs.get("gpg_key_url"),
"gpg_key_id": kwargs.get("gpg_key_id"),
"gpg_key_fp": kwargs.get("gpg_key_fp"),
"end_of_life": kwargs.get("end_of_life"),
"dists": [
{
"release": release,
"os": os,
}
],
}
return vdict


class ChannelAlreadyExistsException(Exception):
"""
Exception raised when a channel already exists in the db
"""


def create_channel(channel_label, channel_arch, org_id=1):
"""
Create a new test channel with label :channel_label using the channel family private-channel-family-1
:channel_arch: eg: "x86_64"
"""
rhnSQL.initDB()
try:
# Channel family "private-channel-family-1" is automatically created when starting the susemanager docker db
channel_family_label = "private-channel-family-1"

# Create a new channel using the channel family info
vdict = _new_channel_dict(
label=channel_label,
channel_family=channel_family_label,
org_id=org_id,
channel_arch=channel_arch,
)
c = rhnChannel.Channel()
c.load_from_dict(vdict)
c.save()
rhnSQL.commit()
return c
except errors.lookup(UNIQUE_VIOLATION) as exc:
print(f"INFO: Channel {channel_label} already exists!")
raise ChannelAlreadyExistsException() from exc
finally:
rhnSQL.closeDB()


def create_content_source(
channel_label,
repo_label,
source_url,
metadata_signed="N",
org_id=1,
source_type="yum",
repo_id=1,
):
"""
Create a new content source and associate it with the given channel
source_type: yum|deb
"""
try:
rhnSQL.initDB()
fetch_source_type_query = rhnSQL.prepare(
"""
SELECT id from rhnContentSourceType where label = :source_type_label"""
)
fetch_source_type_query.execute(source_type_label=source_type)
type_id = fetch_source_type_query.fetchone_dict()["id"]

add_repo_query = rhnSQL.prepare(
"""INSERT INTO rhnContentSource(id, org_id, type_id, source_url, label, metadata_signed) VALUES (:repo_id, :org_id,
:type_id, :source_url, :label, :metadata_signed)
"""
)
add_repo_query.execute(
repo_id=repo_id,
org_id=org_id,
type_id=type_id,
source_url=source_url,
label=repo_label,
metadata_signed=metadata_signed,
)

fetch_source_id_query = rhnSQL.prepare(
"""
SELECT id from rhnContentSource LIMIT 1"""
)
fetch_source_id_query.execute()
source_id = fetch_source_id_query.fetchone_dict()["id"]

# associate the source/repo with the channel
fetch_channel_id_query = rhnSQL.prepare(
"""
SELECT id FROM rhnChannel WHERE label = :channel_label"""
)
fetch_channel_id_query.execute(channel_label=channel_label)
channel_id = fetch_channel_id_query.fetchone_dict()["id"]

associate_repo_channel_query = rhnSQL.prepare(
"""INSERT INTO rhnChannelContentSource(source_id, channel_id) VALUES (:source_id, :channel_id)
"""
)
associate_repo_channel_query.execute(source_id=source_id, channel_id=channel_id)
rhnSQL.commit()
except errors.lookup(UNIQUE_VIOLATION):
print(f"INFO: Source {repo_label} already exists!")
finally:
rhnSQL.closeDB()


# Stolen from python/spacewalk/satellite_tools/reposync.py
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here

def get_compatible_arches(channel_id):
"""Return a list of compatible package arch labels for this channel"""
def get_compatible_arches(channel_label):
"""Return a list of compatible package arch labels for the given channel"""
rhnSQL.initDB()
h = rhnSQL.prepare(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This SQL query could use an update to JOIN ... ON

"""select pa.label
from rhnChannelPackageArchCompat cpac,
rhnChannel c,
rhnpackagearch pa
where c.id = :channel_id
where c.label = :channel_label
and c.channel_arch_id = cpac.channel_arch_id
and cpac.package_arch_id = pa.id"""
)
h.execute(channel_id=channel_id)
h.execute(channel_label=channel_label)
res_dict = h.fetchall_dict()
if not res_dict:
logging.warning(
"Couldn't fetch compatible arches for channel: %s", channel_label
)
return None
# pylint: disable-next=invalid-name
with cfg_component("server.susemanager") as CFG:
arches = [
k["label"]
for k in h.fetchall_dict()
if CFG.SYNC_SOURCE_PACKAGES or k["label"] not in ["src", "nosrc"]
for k in res_dict
if CFG.SYNC_SOURCE_PACKAGES
or k["label"]
not in ["src", "nosrc"] # TODO: what is CFG.SYNC_SOURCE_PACKAGES - ask team
]
rhnSQL.closeDB()
return arches
Expand All @@ -47,12 +202,12 @@ def get_all_arches():


def get_channel_info_by_label(channel_label):
# TODO: possible exception handling
"""
Fetch the channel information from the given label and return
the result in a dict like object
"""
rhnSQL.initDB()
channel = rhnChannel.channel_info(channel_label)
print(
f"===> HAROUNE fetched channel = {channel}, for channel label = {channel_label}"
)
rhnSQL.closeDB()
return channel or None

Expand Down
3 changes: 2 additions & 1 deletion python/lzreposync/src/lzreposync/deb_metadata_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,12 @@ def parse_packages_metadata(self):
checksum_type="md5",
checksum=deb_pkg_header["MD5sum"],
relpath=None, # This is the path on the filesystem
org_id=1, # TODO: correct
org_id=1, # TODO: how to set this - ask team
channels=[],
expand_full_filelist=False,
remote_path=deb_pkg_header["remote_path"],
)
deb_package.arch = deb_pkg_header["arch"]
yield deb_package
except (KeyboardInterrupt, rhnSQL.SQLError):
raise
Expand Down
7 changes: 2 additions & 5 deletions python/lzreposync/src/lzreposync/deb_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,9 @@

# pylint: disable-next=missing-class-docstring
class DebRepo(Repo):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should consolidate the debian repo parsing. Right now it's split into repo_plugins.deb_src, common.repo and this deb_repo (which appears to be a copy of the class in deb_src

def __init__(
self, name, cache_path, url, channel_label=None
): # TODO remove the arch_filter (the arch is specified in the url query)
def __init__(self, name, cache_path, url, channel_label=None):
super().__init__(name, cache_path, url, None, "deb")
# 'arch_filter' is None because the arch is specified in the url query
self.signature_verified = True # TODO: complete
self.url = url
parts = url.rsplit("/dists/", 1)
Expand Down Expand Up @@ -235,5 +234,3 @@ def download_translation_file(self):
f"ERROR: Download of Translation{extension} descriptions file failed."
)
return None

# TODO: delete the downloaded pacakges.gz(xz)/translation.. files after finishing
32 changes: 0 additions & 32 deletions python/lzreposync/src/lzreposync/deb_utils.py

This file was deleted.

Loading