[Concept,5/5] buildman: Filter out doubled-prefix toolchain binaries
Commit Message
From: Simon Glass <simon.glass@canonical.com>
Some toolchain tarballs from kernel.org contain symlinks with a doubled
cross-compile prefix, e.g. 'x86_64-linux-x86_64-linux-gcc' alongside the
correct 'x86_64-linux-gcc'. This causes buildman to print a warning
about ambiguous toolchains when downloading.
Add a regex-based check to detect and filter out these malformed
binaries during toolchain scanning. When verbose output is enabled,
these files are shown as "ignoring ... (doubled prefix)" rather than
"found ..."
Co-developed-by: Claude Opus 4.5 <noreply@anthropic.com>
Signed-off-by: Simon Glass <simon.glass@canonical.com>
---
tools/buildman/test.py | 24 ++++++++++++++++++++++++
tools/buildman/toolchain.py | 25 +++++++++++++++++++++++++
2 files changed, 49 insertions(+)
@@ -744,6 +744,30 @@ class TestBuild(TestBuildBase):
# Verify downloaded priority beats system priority
self.assertLess(toolchain.PRIORITY_DOWNLOADED, sys_tc.priority)
+ def test_is_doubled_prefix(self):
+ """Test detection of doubled toolchain prefixes"""
+ # Valid toolchain names (not doubled)
+ self.assertFalse(
+ toolchain.Toolchains.is_doubled_prefix('aarch64-linux-gcc'))
+ self.assertFalse(
+ toolchain.Toolchains.is_doubled_prefix('x86_64-linux-gcc'))
+ self.assertFalse(
+ toolchain.Toolchains.is_doubled_prefix('arm-linux-gnueabi-gcc'))
+ self.assertFalse(
+ toolchain.Toolchains.is_doubled_prefix('gcc'))
+
+ # Doubled prefixes (should be filtered out)
+ self.assertTrue(
+ toolchain.Toolchains.is_doubled_prefix(
+ 'aarch64-linux-aarch64-linux-gcc'))
+ self.assertTrue(
+ toolchain.Toolchains.is_doubled_prefix(
+ 'x86_64-linux-x86_64-linux-gcc'))
+
+ # Not a gcc file
+ self.assertFalse(
+ toolchain.Toolchains.is_doubled_prefix('aarch64-linux-ld'))
+
def test_get_env_args(self):
"""Test the GetEnvArgs() function"""
tc = self.toolchains.select('arm')
@@ -30,6 +30,10 @@ from u_boot_pylib import tools
# Environment variable / argument types for get_env_args()
(VAR_CROSS_COMPILE, VAR_PATH, VAR_ARCH, VAR_MAKE_ARGS) = range(4)
+# Matches a repeated prefix, e.g. 'aarch64-linux-aarch64-linux-gcc'
+RE_DOUBLED_PREFIX = re.compile(r'^(.+)\1gcc$')
+
+
class MyHTMLParser(HTMLParser):
"""Simple class to collect links from a page
@@ -378,6 +382,22 @@ class Toolchains:
f"toolchain for arch '{toolchain.arch}' has priority "
f"{self.toolchains[toolchain.arch].priority}")
+ @staticmethod
+ def is_doubled_prefix(fname):
+ """Check if a gcc filename has a doubled prefix
+
+ Some toolchain tarballs contain symlinks with the cross-compile prefix
+ repeated, e.g. 'x86_64-linux-x86_64-linux-gcc'. These are not valid
+ toolchains and should be ignored.
+
+ Args:
+ fname (str): Filename to check (basename, not full path)
+
+ Returns:
+ bool: True if the prefix is doubled, False otherwise
+ """
+ return bool(RE_DOUBLED_PREFIX.match(fname))
+
def scan_path(self, path, verbose):
"""Scan a path for a valid toolchain
@@ -394,6 +414,11 @@ class Toolchains:
if verbose:
print(f" - looking in '{dirname}'")
for fname in glob.glob(dirname + '/*gcc'):
+ basename = os.path.basename(fname)
+ if self.is_doubled_prefix(basename):
+ if verbose:
+ print(f" - ignoring '{fname}' (doubled prefix)")
+ continue
if verbose:
print(f" - found '{fname}'")
fnames.append(fname)