[Concept,1/2] pickman: Write pipeline-fix log tails to temp files

Message ID 20260316185102.3892597-2-sjg@u-boot.org
State New
Headers
Series pickman: Fix robustness issues with error handling and large prompts |

Commit Message

Simon Glass March 16, 2026, 6:50 p.m. UTC
  From: Simon Glass <simon.glass@canonical.com>

The Claude Agent SDK passes the prompt as a command-line argument to the
claude subprocess. Linux enforces a 128 KB limit per argument
(MAX_ARG_STRLEN), which is exceeded when multiple CI jobs fail and their
log tails (up to 200 lines each) are embedded in the prompt.

Write job log tails and large MR descriptions to temporary files under
a pickman-logs-* directory, and reference the file paths in the prompt
so the agent reads them with its Read tool. The temp directory is
cleaned up after the agent finishes. When no tempdir is provided, the
original inline behaviour is preserved.

Signed-off-by: Simon Glass <simon.glass@canonical.com>
---

 tools/pickman/agent.py |  82 ++++++++++++++++++++++++---------
 tools/pickman/ftest.py | 100 +++++++++++++++++++++++++++++++++++++----
 2 files changed, 152 insertions(+), 30 deletions(-)
  

Patch

diff --git a/tools/pickman/agent.py b/tools/pickman/agent.py
index 023b65b5d36..7e482a7b8ee 100644
--- a/tools/pickman/agent.py
+++ b/tools/pickman/agent.py
@@ -7,7 +7,9 @@ 
 
 import asyncio
 import os
+import shutil
 import sys
+import tempfile
 
 # Allow 'from pickman import xxx' to work via symlink
 our_path = os.path.dirname(os.path.realpath(__file__))
@@ -540,9 +542,15 @@  def handle_mr_comments(mr_iid, branch_name, comments, remote, target='master',
 
 # pylint: disable=too-many-arguments
 def build_pipeline_fix_prompt(mr_iid, branch_name, failed_jobs, remote,
-                               target, mr_description, attempt):
+                               target, mr_description, attempt,
+                               tempdir=None):
     """Build prompt and task description for the pipeline fix agent
 
+    The CI job log tails and MR description are written to temporary files
+    instead of being embedded in the prompt, to avoid exceeding the Linux
+    MAX_ARG_STRLEN limit (128 KB) when the Claude Agent SDK passes the
+    prompt as a command-line argument.
+
     Args:
         mr_iid (int): Merge request IID
         branch_name (str): Source branch name
@@ -551,6 +559,8 @@  def build_pipeline_fix_prompt(mr_iid, branch_name, failed_jobs, remote,
         target (str): Target branch
         mr_description (str): MR description with context
         attempt (int): Fix attempt number
+        tempdir (str): Directory for temporary log files; when None
+            the log tails are embedded in the prompt (legacy behaviour)
 
     Returns:
         tuple: (prompt, task_desc) where prompt is the full agent prompt and
@@ -558,20 +568,42 @@  def build_pipeline_fix_prompt(mr_iid, branch_name, failed_jobs, remote,
     """
     task_desc = f'fix {len(failed_jobs)} failed pipeline job(s) (attempt {attempt})'
 
-    # Format failed jobs
+    # Format failed jobs - write log tails to temp files when tempdir is
+    # provided, to keep the prompt small enough for execve()
     job_sections = []
     for job in failed_jobs:
-        job_sections.append(
-            f'### Job: {job.name} (stage: {job.stage})\n'
-            f'URL: {job.web_url}\n'
-            f'Log tail:\n```\n{job.log_tail}\n```'
-        )
+        if tempdir and job.log_tail:
+            safe_name = job.name.replace('/', '_').replace(':', '_')
+            log_path = os.path.join(tempdir,
+                                    f'job-{job.id}-{safe_name}.log')
+            with open(log_path, 'w', encoding='utf-8') as fout:
+                fout.write(job.log_tail)
+            job_sections.append(
+                f'### Job: {job.name} (stage: {job.stage})\n'
+                f'URL: {job.web_url}\n'
+                f'Log tail: read file {log_path}'
+            )
+        else:
+            job_sections.append(
+                f'### Job: {job.name} (stage: {job.stage})\n'
+                f'URL: {job.web_url}\n'
+                f'Log tail:\n```\n{job.log_tail}\n```'
+            )
     jobs_text = '\n\n'.join(job_sections)
 
-    # Include MR description for context
+    # Include MR description for context - write to file when tempdir
+    # is provided and the description is non-trivial
     context_section = ''
     if mr_description:
-        context_section = f'''
+        if tempdir and len(mr_description) > 1024:
+            desc_path = os.path.join(tempdir, 'mr-description.txt')
+            with open(desc_path, 'w', encoding='utf-8') as fout:
+                fout.write(mr_description)
+            context_section = f'''
+Context from MR description: read file {desc_path}
+'''
+        else:
+            context_section = f'''
 Context from MR description:
 
 {mr_description}
@@ -656,6 +688,10 @@  async def run_pipeline_fix_agent(mr_iid, branch_name, failed_jobs, remote,
                                   attempt=1, repo_path=None):
     """Run the Claude agent to fix pipeline failures
 
+    Job log tails are written to temporary files so that the prompt
+    passed to the agent stays well below the Linux MAX_ARG_STRLEN
+    limit (128 KB).
+
     Args:
         mr_iid (int): Merge request IID
         branch_name (str): Source branch name
@@ -676,20 +712,24 @@  async def run_pipeline_fix_agent(mr_iid, branch_name, failed_jobs, remote,
     if repo_path is None:
         repo_path = os.getcwd()
 
-    prompt, task_desc = build_pipeline_fix_prompt(
-        mr_iid, branch_name, failed_jobs, remote, target,
-        mr_description, attempt)
-
-    options = ClaudeAgentOptions(
-        allowed_tools=['Bash', 'Read', 'Grep', 'Edit', 'Write'],
-        cwd=repo_path,
-        max_buffer_size=MAX_BUFFER_SIZE,
-    )
+    tempdir = tempfile.mkdtemp(prefix='pickman-logs-')
+    try:
+        prompt, task_desc = build_pipeline_fix_prompt(
+            mr_iid, branch_name, failed_jobs, remote, target,
+            mr_description, attempt, tempdir=tempdir)
+
+        options = ClaudeAgentOptions(
+            allowed_tools=['Bash', 'Read', 'Grep', 'Edit', 'Write'],
+            cwd=repo_path,
+            max_buffer_size=MAX_BUFFER_SIZE,
+        )
 
-    tout.info(f'Starting Claude agent to {task_desc}...')
-    tout.info('')
+        tout.info(f'Starting Claude agent to {task_desc}...')
+        tout.info('')
 
-    return await run_agent_collect(prompt, options)
+        return await run_agent_collect(prompt, options)
+    finally:
+        shutil.rmtree(tempdir, ignore_errors=True)
 
 
 def fix_pipeline(mr_iid, branch_name, failed_jobs, remote, target='master',
diff --git a/tools/pickman/ftest.py b/tools/pickman/ftest.py
index fba10d3c4d7..52c807cf8cc 100644
--- a/tools/pickman/ftest.py
+++ b/tools/pickman/ftest.py
@@ -6005,6 +6005,16 @@  class TestGetFailedJobs(unittest.TestCase):
 class TestBuildPipelineFixPrompt(unittest.TestCase):
     """Tests for build_pipeline_fix_prompt function."""
 
+    def setUp(self):
+        """Set up temp directory for log files."""
+        self.tmp_dir = tempfile.mkdtemp(prefix='pickman-test-')
+
+    def tearDown(self):
+        """Remove temp directory."""
+        import shutil
+
+        shutil.rmtree(self.tmp_dir, ignore_errors=True)
+
     def test_single_job(self):
         """Test prompt with a single failed job"""
         failed_jobs = [
@@ -6015,16 +6025,22 @@  class TestBuildPipelineFixPrompt(unittest.TestCase):
         ]
         prompt, task_desc = agent.build_pipeline_fix_prompt(
             42, 'cherry-abc123', failed_jobs, 'ci', 'master',
-            'Test MR desc', 1)
+            'Test MR desc', 1, tempdir=self.tmp_dir)
 
         self.assertIn('!42', prompt)
         self.assertIn('cherry-abc123', prompt)
         self.assertIn('build:sandbox', prompt)
-        self.assertIn('error: undefined reference', prompt)
         self.assertIn('attempt 1', prompt)
         self.assertIn('cherry-abc123-fix1', prompt)
         self.assertIn('1 failed', task_desc)
 
+        # Log tail should be in the temp file, not in the prompt
+        log_path = os.path.join(self.tmp_dir, 'job-1-build_sandbox.log')
+        self.assertTrue(os.path.exists(log_path))
+        with open(log_path, encoding='utf-8') as inf:
+            self.assertEqual(inf.read(), 'error: undefined reference')
+        self.assertIn(log_path, prompt)
+
     def test_multiple_jobs(self):
         """Test prompt with multiple failed jobs"""
         failed_jobs = [
@@ -6038,14 +6054,19 @@  class TestBuildPipelineFixPrompt(unittest.TestCase):
                 log_tail='test failure'),
         ]
         prompt, task_desc = agent.build_pipeline_fix_prompt(
-            42, 'cherry-abc123', failed_jobs, 'ci', 'master', '', 1)
+            42, 'cherry-abc123', failed_jobs, 'ci', 'master', '', 1,
+            tempdir=self.tmp_dir)
 
         self.assertIn('build:sandbox', prompt)
         self.assertIn('test:dm', prompt)
-        self.assertIn('build error', prompt)
-        self.assertIn('test failure', prompt)
         self.assertIn('2 failed', task_desc)
 
+        # Both log files should exist
+        self.assertTrue(os.path.exists(
+            os.path.join(self.tmp_dir, 'job-1-build_sandbox.log')))
+        self.assertTrue(os.path.exists(
+            os.path.join(self.tmp_dir, 'job-2-test_dm.log')))
+
     def test_attempt_number(self):
         """Test that attempt number is reflected in prompt"""
         failed_jobs = [
@@ -6055,7 +6076,8 @@  class TestBuildPipelineFixPrompt(unittest.TestCase):
                 log_tail='error'),
         ]
         prompt, task_desc = agent.build_pipeline_fix_prompt(
-            42, 'cherry-abc123', failed_jobs, 'ci', 'master', '', 3)
+            42, 'cherry-abc123', failed_jobs, 'ci', 'master', '', 3,
+            tempdir=self.tmp_dir)
 
         self.assertIn('attempt 3', prompt)
         self.assertIn('cherry-abc123-fix3', prompt)
@@ -6070,7 +6092,8 @@  class TestBuildPipelineFixPrompt(unittest.TestCase):
                 log_tail='error'),
         ]
         prompt, _ = agent.build_pipeline_fix_prompt(
-            42, 'cherry-abc123', failed_jobs, 'ci', 'master', '', 1)
+            42, 'cherry-abc123', failed_jobs, 'ci', 'master', '', 1,
+            tempdir=self.tmp_dir)
 
         self.assertIn('um build sandbox', prompt)
 
@@ -6087,7 +6110,8 @@  class TestBuildPipelineFixPrompt(unittest.TestCase):
                 log_tail='error'),
         ]
         prompt, _ = agent.build_pipeline_fix_prompt(
-            42, 'cherry-abc123', failed_jobs, 'ci', 'master', '', 1)
+            42, 'cherry-abc123', failed_jobs, 'ci', 'master', '', 1,
+            tempdir=self.tmp_dir)
 
         # Should include both boards plus sandbox in the buildman command
         self.assertIn('buildman', prompt)
@@ -6104,11 +6128,69 @@  class TestBuildPipelineFixPrompt(unittest.TestCase):
                 log_tail='error'),
         ]
         prompt, _ = agent.build_pipeline_fix_prompt(
-            42, 'cherry-abc123', failed_jobs, 'ci', 'master', '', 1)
+            42, 'cherry-abc123', failed_jobs, 'ci', 'master', '', 1,
+            tempdir=self.tmp_dir)
 
         self.assertIn('buildman -o /tmp/pickman', prompt)
         self.assertIn('coral', prompt)
 
+    def test_large_logs_stay_under_limit(self):
+        """Test that large log tails are written to files, keeping the
+        prompt well under the Linux MAX_ARG_STRLEN limit (128 KB)."""
+        # Create 5 jobs with 200-line logs (~120 bytes per line)
+        big_log = '\n'.join(f'line {i}: ' + 'x' * 100 for i in range(200))
+        failed_jobs = [
+            gitlab.FailedJob(
+                id=i, name=f'build:board{i}', stage='build',
+                web_url=f'https://gitlab.com/job/{i}',
+                log_tail=big_log)
+            for i in range(5)
+        ]
+        prompt, _ = agent.build_pipeline_fix_prompt(
+            42, 'cherry-abc123', failed_jobs, 'ci', 'master',
+            'x' * 50000, 1, tempdir=self.tmp_dir)
+
+        # Prompt should be well under 128 KB (the logs are in files)
+        self.assertLess(len(prompt), 128 * 1024)
+
+        # All 5 log files plus mr-description should exist
+        log_files = [f for f in os.listdir(self.tmp_dir)
+                     if f.startswith('job-')]
+        self.assertEqual(len(log_files), 5)
+        self.assertTrue(os.path.exists(
+            os.path.join(self.tmp_dir, 'mr-description.txt')))
+
+    def test_no_tempdir_embeds_inline(self):
+        """Test legacy behaviour when tempdir is None"""
+        failed_jobs = [
+            gitlab.FailedJob(
+                id=1, name='build:sandbox', stage='build',
+                web_url='https://gitlab.com/job/1',
+                log_tail='error: undefined reference'),
+        ]
+        prompt, _ = agent.build_pipeline_fix_prompt(
+            42, 'cherry-abc123', failed_jobs, 'ci', 'master',
+            'Test MR desc', 1)
+
+        self.assertIn('error: undefined reference', prompt)
+        self.assertIn('Test MR desc', prompt)
+
+    def test_small_mr_desc_stays_inline(self):
+        """Test that a small MR description is kept inline"""
+        failed_jobs = [
+            gitlab.FailedJob(
+                id=1, name='build:sandbox', stage='build',
+                web_url='https://gitlab.com/job/1',
+                log_tail='error'),
+        ]
+        prompt, _ = agent.build_pipeline_fix_prompt(
+            42, 'cherry-abc123', failed_jobs, 'ci', 'master',
+            'Short desc', 1, tempdir=self.tmp_dir)
+
+        self.assertIn('Short desc', prompt)
+        self.assertFalse(os.path.exists(
+            os.path.join(self.tmp_dir, 'mr-description.txt')))
+
 
 class TestProcessPipelineFailures(unittest.TestCase):
     """Tests for process_pipeline_failures function."""