Skip to content

Commit

Permalink
rewrite improve opensuse error handling
Browse files Browse the repository at this point in the history
Signed-off-by: Logan Bond <[email protected]>

set lint to run on only branches, not on tags

Signed-off-by: Logan Bond <[email protected]>

adding hadolint to Actions workflow

Signed-off-by: Logan Bond <[email protected]>

fix lint job syntax

Signed-off-by: Logan Bond <[email protected]>

fix uses in lint actions

Signed-off-by: Logan Bond <[email protected]>

add yamllint

Signed-off-by: Logan Bond <[email protected]>

adjust some names of pipeline workflow

Signed-off-by: Logan Bond <[email protected]>

cleanup circle ci - not used anymore

Signed-off-by: Logan Bond <[email protected]>

tabs to spaces in Dockerfile

Signed-off-by: Logan Bond <[email protected]>

add back circleci - it builds the containers

Signed-off-by: Logan Bond <[email protected]>

add back whitespace

Signed-off-by: Logan Bond <[email protected]>

set back test name

Signed-off-by: Logan Bond <[email protected]>

add dockerfile build check to pipeline

Signed-off-by: Logan Bond <[email protected]>

change job name to be more uniform

Signed-off-by: Logan Bond <[email protected]>

revert name change, PR expects it

Signed-off-by: Logan Bond <[email protected]>

revert pipeline changes for next PR

Signed-off-by: Logan Bond <[email protected]>

revert pipeline changes for next PR

Signed-off-by: Logan Bond <[email protected]>

replace xpath with iterparse in opensuse XML parsing for better memory management

Signed-off-by: Logan Bond <[email protected]>
  • Loading branch information
EXONER4TED authored and poiana committed Nov 11, 2022
1 parent 1e9901f commit 906fe7f
Showing 1 changed file with 38 additions and 14 deletions.
52 changes: 38 additions & 14 deletions kernel_crawler/rpm.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import sqlite3
import tempfile

from io import BytesIO

from . import repo
from kernel_crawler.utils.download import get_url

Expand All @@ -31,6 +33,12 @@ def get_loc_by_xpath(cls, text, expr):
'repo': 'http://linux.duke.edu/metadata/repo',
'rpm': 'http://linux.duke.edu/metadata/rpm'
})

# if unable to find the expression in the XML, return None
if not loc:
return None

# else return the first item out of the tuple
return loc[0]

@classmethod
Expand Down Expand Up @@ -183,6 +191,12 @@ def get_repodb_url(self):
'''
repomd = get_url(self.base_url + 'repodata/repomd.xml')
pkglist_url = self.get_loc_by_xpath(repomd, '//repo:repomd/repo:data[@type="primary"]/repo:location/@href')

# if no pkglist was found, return None
if not pkglist_url:
return None

# else add the pkglist_url to the base_url
return self.base_url + pkglist_url

def parse_kernel_release(self, kernel_devel_pkg):
Expand Down Expand Up @@ -212,7 +226,6 @@ def get_package_tree(self, filter=''):
Once parsed, use the package URL to parse the kernel release and determine the kernel-devel*noarch package URL.
'''

packages = {}

# attempt to query for the repomd - bail out if 404
try:
Expand All @@ -222,20 +235,31 @@ def get_package_tree(self, filter=''):
# traceback.print_exc() # extremely verbose, uncomment if debugging
return {}

# SUSE stores their package information in raw XML
# parse it for the kernel-default-devel package
expression = f'//common:location/@href[starts-with(., "{self.arch}/{self._kernel_devel_pattern}")]'
kernel_default_devel_pkg_url = self.get_loc_by_xpath(repodb, expression)

# parse out the kernel release from the url, faster than re-parsing the xml
parsed_kernel_release = self.parse_kernel_release(kernel_default_devel_pkg_url)
# using iterparse, loop over the XML to find the kernel devel package
# iterparse is used over xpath as iterparse does not load the giant file into memory all at once
package_match = f'{self.arch}/{self._kernel_devel_pattern}'
for _, element in etree.iterparse(BytesIO(repodb)):
if 'href' in element.attrib.keys() and package_match in element.attrib['href']:
kernel_default_devel_pkg_url = element.attrib['href']
break # found the entry, no need to keep looping

# add the kernel-devel-default package
packages.setdefault(parsed_kernel_release, set()).add(self.base_url + kernel_default_devel_pkg_url)
# check to ensure a kernel_devel_pkg was found
if not kernel_default_devel_pkg_url:
return {} # return an empty packages dict

# also add the noarch kernel-devel pacakge
# SUSE combines the kernel-default-devel package and kernel-devel*.noarch pacakge for compilation
noarch_kernel_devel = self.build_kernel_devel_noarch_url(parsed_kernel_release)
packages.setdefault(parsed_kernel_release, set()).add(noarch_kernel_devel)
else: # was able to find some packages
packages = {}

return packages
# parse out the kernel release from the url, faster than re-parsing the xml
parsed_kernel_release = self.parse_kernel_release(kernel_default_devel_pkg_url)

# add the kernel-devel-default package
packages.setdefault(parsed_kernel_release, set()).add(self.base_url + kernel_default_devel_pkg_url)

# also add the noarch kernel-devel pacakge
# SUSE combines the kernel-default-devel package and kernel-devel*.noarch pacakge for compilation
noarch_kernel_devel = self.build_kernel_devel_noarch_url(parsed_kernel_release)
packages.setdefault(parsed_kernel_release, set()).add(noarch_kernel_devel)

return packages

0 comments on commit 906fe7f

Please sign in to comment.