[Concept,5/5] buildman: Filter out doubled-prefix toolchain binaries

Message ID 20260112225406.3274105-6-sjg@u-boot.org
State New
Headers
Series buildman: Improve toolchain selection and config adjustment |

Commit Message

Simon Glass Jan. 12, 2026, 10:54 p.m. UTC
  From: Simon Glass <simon.glass@canonical.com>

Some toolchain tarballs from kernel.org contain symlinks with a doubled
cross-compile prefix, e.g. 'x86_64-linux-x86_64-linux-gcc' alongside the
correct 'x86_64-linux-gcc'. This causes buildman to print a warning
about ambiguous toolchains when downloading.

Add a regex-based check to detect and filter out these malformed
binaries during toolchain scanning. When verbose output is enabled,
these files are shown as "ignoring ... (doubled prefix)" rather than
"found ..."

Co-developed-by: Claude Opus 4.5 <noreply@anthropic.com>
Signed-off-by: Simon Glass <simon.glass@canonical.com>
---

 tools/buildman/test.py      | 24 ++++++++++++++++++++++++
 tools/buildman/toolchain.py | 25 +++++++++++++++++++++++++
 2 files changed, 49 insertions(+)
  

Patch

diff --git a/tools/buildman/test.py b/tools/buildman/test.py
index da6df1f173c..0f4a5b9e543 100644
--- a/tools/buildman/test.py
+++ b/tools/buildman/test.py
@@ -744,6 +744,30 @@  class TestBuild(TestBuildBase):
             # Verify downloaded priority beats system priority
             self.assertLess(toolchain.PRIORITY_DOWNLOADED, sys_tc.priority)
 
+    def test_is_doubled_prefix(self):
+        """Test detection of doubled toolchain prefixes"""
+        # Valid toolchain names (not doubled)
+        self.assertFalse(
+            toolchain.Toolchains.is_doubled_prefix('aarch64-linux-gcc'))
+        self.assertFalse(
+            toolchain.Toolchains.is_doubled_prefix('x86_64-linux-gcc'))
+        self.assertFalse(
+            toolchain.Toolchains.is_doubled_prefix('arm-linux-gnueabi-gcc'))
+        self.assertFalse(
+            toolchain.Toolchains.is_doubled_prefix('gcc'))
+
+        # Doubled prefixes (should be filtered out)
+        self.assertTrue(
+            toolchain.Toolchains.is_doubled_prefix(
+                'aarch64-linux-aarch64-linux-gcc'))
+        self.assertTrue(
+            toolchain.Toolchains.is_doubled_prefix(
+                'x86_64-linux-x86_64-linux-gcc'))
+
+        # Not a gcc file
+        self.assertFalse(
+            toolchain.Toolchains.is_doubled_prefix('aarch64-linux-ld'))
+
     def test_get_env_args(self):
         """Test the GetEnvArgs() function"""
         tc = self.toolchains.select('arm')
diff --git a/tools/buildman/toolchain.py b/tools/buildman/toolchain.py
index 8f3d3ab3b0c..27302f20d42 100644
--- a/tools/buildman/toolchain.py
+++ b/tools/buildman/toolchain.py
@@ -30,6 +30,10 @@  from u_boot_pylib import tools
 # Environment variable / argument types for get_env_args()
 (VAR_CROSS_COMPILE, VAR_PATH, VAR_ARCH, VAR_MAKE_ARGS) = range(4)
 
+# Matches a repeated prefix, e.g. 'aarch64-linux-aarch64-linux-gcc'
+RE_DOUBLED_PREFIX = re.compile(r'^(.+)\1gcc$')
+
+
 class MyHTMLParser(HTMLParser):
     """Simple class to collect links from a page
 
@@ -378,6 +382,22 @@  class Toolchains:
                       f"toolchain for arch '{toolchain.arch}' has priority "
                       f"{self.toolchains[toolchain.arch].priority}")
 
+    @staticmethod
+    def is_doubled_prefix(fname):
+        """Check if a gcc filename has a doubled prefix
+
+        Some toolchain tarballs contain symlinks with the cross-compile prefix
+        repeated, e.g. 'x86_64-linux-x86_64-linux-gcc'. These are not valid
+        toolchains and should be ignored.
+
+        Args:
+            fname (str): Filename to check (basename, not full path)
+
+        Returns:
+            bool: True if the prefix is doubled, False otherwise
+        """
+        return bool(RE_DOUBLED_PREFIX.match(fname))
+
     def scan_path(self, path, verbose):
         """Scan a path for a valid toolchain
 
@@ -394,6 +414,11 @@  class Toolchains:
             if verbose:
                 print(f"      - looking in '{dirname}'")
             for fname in glob.glob(dirname + '/*gcc'):
+                basename = os.path.basename(fname)
+                if self.is_doubled_prefix(basename):
+                    if verbose:
+                        print(f"         - ignoring '{fname}' (doubled prefix)")
+                    continue
                 if verbose:
                     print(f"         - found '{fname}'")
                 fnames.append(fname)