Skip to content

Commit 3eb2d94

Browse files
committed
update gentoo repo handling
1 parent 084840e commit 3eb2d94

File tree

4 files changed

+136
-104
lines changed

4 files changed

+136
-104
lines changed

client/patchman-client

+42-14
Original file line numberDiff line numberDiff line change
@@ -574,24 +574,52 @@ get_repos() {
574574
fi
575575

576576
# Gentoo
577-
if [ "${os}" == "Gentoo" ] ; then
577+
if [[ "${os}" =~ "Gentoo" ]] ; then
578578
if [ ${verbose} == 1 ] ; then
579579
echo 'Finding portage repos...'
580580
fi
581-
declare -A repos
582-
repos[gentoo]='-1000'
583-
repos_conf=$(awk '/\[/{prefix=$0; next} $1{print prefix $0}' /etc/portage/repos.conf/*.conf | grep '^\[')
584-
for stanza in ${repos_conf} ; do
585-
repo=$(echo ${stanza} | cut -d ']' -f 1 | sed -e 's/\[//')
586-
rhs=$(echo ${stanza} | cut -d ']' -f 2 | grep -v '^#')
587-
if [[ ${rhs} =~ "priority" ]] ; then
588-
priority=$(echo ${rhs} | sed -e 's/^ *priority *= *//')
589-
repos[${repo}]+=${priority}
590-
unset priority
581+
declare -A repo_info
582+
repos_output=$(portageq repos_config /)
583+
repo_name=""
584+
priority=""
585+
sync_uri=""
586+
587+
while IFS= read -r line; do
588+
# if the line starts with a section header (e.g., [gentoo], [guru]), it's the repo name
589+
if [[ "${line}" =~ ^\[(.*)\] ]]; then
590+
# if we already have a repo_name, save the previous entry
591+
if [[ -n "${repo_name}" && -n "${sync_uri}" ]]; then
592+
repo_info["${repo_name}"]="${priority},${sync_uri}"
593+
fi
594+
# else start new repo parsing, resetting vars
595+
repo_name="${BASH_REMATCH[1]}"
596+
priority=""
597+
sync_uri=""
591598
fi
592-
done
593-
for r in "${!repos[@]}"; do
594-
echo "'gentoo' 'Gentoo Linux ${r} ${host_arch}' '${r}' '${repos[${r}]}'" >> "${tmpfile_rep}"
599+
600+
# if the line contains "priority", extract the value, 0 if it doesnt exist
601+
if [[ "${line}" =~ "priority" ]]; then
602+
priority=$(echo "${line}" | cut -d'=' -f2 | xargs)
603+
fi
604+
605+
# if the line contains "sync-uri", extract the value
606+
if [[ "${line}" =~ "sync-uri" ]]; then
607+
sync_uri=$(echo "${line}" | cut -d'=' -f2 | xargs)
608+
fi
609+
done <<< "${repos_output}"
610+
611+
# save the last repository entry if it's available
612+
if [[ -n "${repo_name}" && -n "${sync_uri}" ]]; then
613+
repo_info["${repo_name}"]="${priority},${sync_uri}"
614+
fi
615+
616+
for repo in "${!repo_info[@]}"; do
617+
priority=$(echo ${repo_info[$repo]} | cut -d',' -f1)
618+
sync_uri=$(echo ${repo_info[$repo]} | cut -d',' -f2)
619+
if [ "${priority}" == "" ] ; then
620+
priority=0
621+
fi
622+
echo "'gentoo' 'Gentoo Linux ${repo} Repo ${host_arch}' '${repo}' '${priority}' '${sync_uri}'" >> "${tmpfile_rep}"
595623
done
596624
fi
597625

reports/utils.py

+3-17
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,6 @@
3434
def process_repos(report, host):
3535
""" Processes the quoted repos string sent with a report
3636
"""
37-
if host.osvariant.name.startswith('Gentoo'):
38-
gentoo_repo = Repository.objects.get(repo_id='gentoo')
39-
host_repos = HostRepo.objects.filter(host=host)
40-
hostrepo, c = host_repos.get_or_create(host=host, repo=gentoo_repo)
4137
if report.repos:
4238
repo_ids = []
4339
host_repos = HostRepo.objects.filter(host=host)
@@ -224,6 +220,7 @@ def process_repo(repo, arch):
224220
r_type = Repository.GENTOO
225221
r_id = repo.pop(2)
226222
r_priority = repo[2]
223+
arch = 'any'
227224

228225
if repo[1]:
229226
r_name = repo[1]
@@ -232,6 +229,8 @@ def process_repo(repo, arch):
232229

233230
unknown = []
234231
for r_url in repo[3:]:
232+
if r_type == Repository.GENTOO and r_url.startswith('rsync'):
233+
r_url = 'https://api.gentoo.org/mirrors/distfiles.xml'
235234
try:
236235
mirror = Mirror.objects.get(url=r_url.strip('/'))
237236
except Mirror.DoesNotExist:
@@ -360,19 +359,6 @@ def process_gentoo_package(package, name, category, repo):
360359
package.category = category
361360
package.save()
362361

363-
repo_arch, created = MachineArchitecture.objects.get_or_create(name='any')
364-
repo_name = 'Gentoo Linux'
365-
gentoo_repo = get_or_create_repo(repo_name, repo_arch, Repository.GENTOO, repo)
366-
367-
if repo == 'gentoo':
368-
url = 'https://api.gentoo.org/mirrors/distfiles.xml'
369-
else:
370-
# this may not be correct. the urls are hardcoded anyway in repos/utils.py
371-
# need to figure out a better way to determine which repo/repo url to use
372-
url = 'https://api.gentoo.org/overlays/repositories.xml'
373-
mirror, c = Mirror.objects.get_or_create(repo=gentoo_repo, url=url, mirrorlist=True)
374-
MirrorPackage.objects.create(mirror=mirror, package=package)
375-
376362

377363
def get_arch(arch):
378364
""" Get or create MachineArchitecture from arch

repos/repo_types/gentoo.py

+90-72
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,14 @@
1616

1717
import git
1818
import os
19-
import re
2019
import shutil
2120
import tarfile
2221
import tempfile
2322
from defusedxml import ElementTree
2423
from fnmatch import fnmatch
2524
from io import BytesIO
25+
from pathlib import Path
2626

27-
from arch.models import PackageArchitecture
2827
from packages.models import PackageString
2928
from packages.utils import find_evr
3029
from patchman.signals import info_message, warning_message, error_message, pbar_start, pbar_update
@@ -37,13 +36,70 @@ def refresh_gentoo_main_repo(repo):
3736
"""
3837
mirrors = get_gentoo_mirror_urls()
3938
add_mirrors_from_urls(repo, mirrors)
39+
ts = get_datetime_now()
40+
for mirror in repo.mirror_set.filter(mirrorlist=False, refresh=True, enabled=True):
41+
if mirror.url == 'https://api.gentoo.org/mirrors/distfiles.xml':
42+
mirror.mirrorlist = True
43+
mirror.save()
44+
continue
45+
46+
res = get_url(mirror.url + '.md5sum')
47+
data = fetch_content(res, 'Fetching Repo checksum')
48+
if data is None:
49+
mirror.fail()
50+
continue
51+
52+
checksum = data.decode().split()[0]
53+
if checksum is None:
54+
mirror.fail()
55+
continue
56+
57+
if mirror.packages_checksum == checksum:
58+
text = 'Mirror checksum has not changed, not refreshing Package metadata'
59+
warning_message.send(sender=None, text=text)
60+
continue
61+
62+
res = get_url(mirror.url)
63+
mirror.last_access_ok = response_is_valid(res)
64+
if not mirror.last_access_ok:
65+
mirror.fail()
66+
continue
67+
68+
data = fetch_content(res, 'Fetching Repo data')
69+
if data is None:
70+
mirror.fail()
71+
continue
72+
extracted = extract(data, mirror.url)
73+
info_message.send(sender=None, text=f'Found Gentoo Repo - {mirror.url}')
74+
75+
computed_checksum = get_checksum(data, Checksum.md5)
76+
if not mirror_checksum_is_valid(computed_checksum, checksum, mirror, 'package'):
77+
mirror.fail()
78+
continue
79+
else:
80+
mirror.packages_checksum = checksum
81+
82+
packages = extract_gentoo_packages(mirror, extracted)
83+
if packages:
84+
update_mirror_packages(mirror, packages)
85+
86+
mirror.timestamp = ts
87+
mirror.save()
4088

4189

4290
def refresh_gentoo_overlay_repo(repo):
4391
""" Refresh all mirrors of a Gentoo overlay repo
4492
"""
4593
mirrors = get_gentoo_overlay_mirrors(repo.repo_id)
4694
add_mirrors_from_urls(repo, mirrors)
95+
ts = get_datetime_now()
96+
for mirror in repo.mirror_set.filter(mirrorlist=False, refresh=True, enabled=True):
97+
# FIXME: need to check for failure
98+
packages = extract_gentoo_overlay_packages(mirror)
99+
if packages:
100+
update_mirror_packages(mirror, packages)
101+
mirror.timestamp = ts
102+
mirror.save()
47103

48104

49105
def get_gentoo_ebuild_keywords(content):
@@ -82,7 +138,10 @@ def get_gentoo_ebuild_keywords(content):
82138
continue
83139
keywords.add(keyword)
84140
break
85-
return keywords
141+
if keywords:
142+
return keywords
143+
else:
144+
return default_keywords
86145

87146

88147
def get_gentoo_overlay_mirrors(repo_name):
@@ -157,7 +216,25 @@ def extract_gentoo_ebuilds(data):
157216
for member in tar.getmembers():
158217
if member.isfile() and member.name.endswith('ebuild') and not member.name.endswith('skel.ebuild'):
159218
file_content = tar.extractfile(member).read()
160-
extracted_ebuilds[member.name] = file_content
219+
full_path = Path(member.name)
220+
ebuild_path = Path(*full_path.parts[1:])
221+
extracted_ebuilds[str(ebuild_path)] = file_content
222+
return extracted_ebuilds
223+
224+
225+
def extract_gentoo_overlay_ebuilds(t):
226+
""" Extract ebuilds from a Gentoo overlay tarball
227+
"""
228+
extracted_ebuilds = {}
229+
for root, dirs, files in os.walk(t):
230+
for name in files:
231+
if fnmatch(name, '*.ebuild'):
232+
package_name = root.replace(t + '/', '')
233+
if len(package_name.split('/')) > 2:
234+
continue
235+
with open(os.path.join(root, name), 'rb') as f:
236+
content = f.read()
237+
extracted_ebuilds[f'{package_name}/{name}'] = content
161238
return extracted_ebuilds
162239

163240

@@ -175,14 +252,14 @@ def extract_gentoo_packages_from_ebuilds(extracted_ebuilds):
175252
return
176253

177254
packages = set()
178-
flen = len(extracted_ebuilds)
179-
pbar_start.send(sender=None, ptext=f'Processing {flen} ebuilds', plen=flen)
255+
elen = len(extracted_ebuilds)
256+
pbar_start.send(sender=None, ptext=f'Processing {elen} ebuilds', plen=elen)
180257
for i, (path, content) in enumerate(extracted_ebuilds.items()):
181258
pbar_update.send(sender=None, index=i + 1)
182259
components = path.split(os.sep)
183-
category = components[1]
184-
name = components[2]
185-
evr = components[3].replace(f'{name}-', '').replace('.ebuild', '')
260+
category = components[0]
261+
name = components[1]
262+
evr = components[2].replace(f'{name}-', '').replace('.ebuild', '')
186263
epoch, version, release = find_evr(evr)
187264
arches = get_gentoo_ebuild_keywords(content)
188265
for arch in arches:
@@ -205,78 +282,19 @@ def extract_gentoo_overlay_packages(mirror):
205282
""" Extract packages from gentoo overlay repo
206283
"""
207284
t = tempfile.mkdtemp()
208-
git.Repo.clone_from(mirror.url, t, branch='master', depth=1)
285+
info_message.send(sender=None, text=f'Extracting Gentoo packages from {mirror.url}')
286+
git.Repo.clone_from(mirror.url, t, depth=1)
209287
packages = set()
210-
arch, c = PackageArchitecture.objects.get_or_create(name='any')
211-
for root, dirs, files in os.walk(t):
212-
for name in files:
213-
if fnmatch(name, '*.ebuild'):
214-
full_name = root.replace(t + '/', '')
215-
p_category, p_name = full_name.split('/')
216-
m = re.match(fr'{p_name}-(.*)\.ebuild', name)
217-
if m:
218-
p_evr = m.group(1)
219-
epoch, version, release = find_evr(p_evr)
220-
package = PackageString(
221-
name=p_name.lower(),
222-
epoch=epoch,
223-
version=version,
224-
release=release,
225-
arch=arch,
226-
packagetype='G',
227-
category=p_category,
228-
)
229-
packages.add(package)
288+
extracted_ebuilds = extract_gentoo_overlay_ebuilds(t)
230289
shutil.rmtree(t)
290+
packages = extract_gentoo_packages_from_ebuilds(extracted_ebuilds)
231291
return packages
232292

233293

234294
def refresh_gentoo_repo(repo):
235295
""" Refresh a Gentoo repo
236296
"""
237297
if repo.repo_id == 'gentoo':
238-
repo_type = 'main'
239298
refresh_gentoo_main_repo(repo)
240299
else:
241300
refresh_gentoo_overlay_repo(repo)
242-
repo_type = 'overlay'
243-
ts = get_datetime_now()
244-
for mirror in repo.mirror_set.filter(mirrorlist=False, refresh=True, enabled=True):
245-
res = get_url(mirror.url + '.md5sum')
246-
data = fetch_content(res, 'Fetching Repo checksum')
247-
if data is None:
248-
mirror.fail()
249-
continue
250-
checksum = data.decode().split()[0]
251-
if checksum is None:
252-
mirror.fail()
253-
continue
254-
if mirror.packages_checksum == checksum:
255-
text = 'Mirror checksum has not changed, not refreshing Package metadata'
256-
warning_message.send(sender=None, text=text)
257-
continue
258-
res = get_url(mirror.url)
259-
mirror.last_access_ok = response_is_valid(res)
260-
if mirror.last_access_ok:
261-
data = fetch_content(res, 'Fetching Repo data')
262-
if data is None:
263-
mirror.fail()
264-
continue
265-
extracted = extract(data, mirror.url)
266-
text = f'Found Gentoo Repo - {mirror.url}'
267-
info_message.send(sender=None, text=text)
268-
computed_checksum = get_checksum(data, Checksum.md5)
269-
if not mirror_checksum_is_valid(computed_checksum, checksum, mirror, 'package'):
270-
continue
271-
else:
272-
mirror.packages_checksum = checksum
273-
if repo_type == 'main':
274-
packages = extract_gentoo_packages(mirror, extracted)
275-
elif repo_type == 'overlay':
276-
packages = extract_gentoo_overlay_packages(mirror)
277-
mirror.timestamp = ts
278-
if packages:
279-
update_mirror_packages(mirror, packages)
280-
else:
281-
mirror.fail()
282-
mirror.save()

repos/utils.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ def add_mirrors_from_urls(repo, mirror_urls):
177177
from repos.models import Mirror
178178
# FIXME: maybe we should store the mirrorlist url with full path to repomd.xml?
179179
# that is what metalink urls return now
180-
m, c = Mirror.objects.get_or_create(repo=repo, url=mirror_url.rstrip('/').rstrip('repodata/repomd.xml'))
180+
m, c = Mirror.objects.get_or_create(repo=repo, url=mirror_url.rstrip('/').replace('repodata/repomd.xml', ''))
181181
if c:
182182
text = f'Added Mirror - {mirror_url}'
183183
info_message.send(sender=None, text=text)

0 commit comments

Comments
 (0)