[Concept,4/9] codman: Add a new source-code analysis tool

Message ID 20251124134932.1991031-5-sjg@u-boot.org
State New
Headers
Series codman: Add a new source-code analysis tool |

Commit Message

Simon Glass Nov. 24, 2025, 1:49 p.m. UTC
  From: Simon Glass <simon.glass@canonical.com>

Add a new tool called 'codman' (code manager) for analysing source code
usage in U-Boot builds. This tool determines which files and lines of
code are actually compiled based on the build configuration.

Co-developed-by: Claude <noreply@anthropic.com>
Signed-off-by: Simon Glass <simon.glass@canonical.com>
---

 tools/codman/analyser.py |  76 +++++
 tools/codman/codman      |   1 +
 tools/codman/codman.py   | 664 +++++++++++++++++++++++++++++++++++++++
 tools/codman/output.py   | 536 +++++++++++++++++++++++++++++++
 4 files changed, 1277 insertions(+)
 create mode 100644 tools/codman/analyser.py
 create mode 120000 tools/codman/codman
 create mode 100755 tools/codman/codman.py
 create mode 100644 tools/codman/output.py
  

Patch

diff --git a/tools/codman/analyser.py b/tools/codman/analyser.py
new file mode 100644
index 00000000000..2c0cc8b8855
--- /dev/null
+++ b/tools/codman/analyser.py
@@ -0,0 +1,76 @@ 
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright 2025 Canonical Ltd
+#
+"""Base classes for source code analysis.
+
+This module provides base classes and data structures for analyzing which lines
+in source files are active vs inactive.
+"""
+
+import os
+from collections import namedtuple
+
+# Named tuple for file analysis results
+# Fields:
+#   total_lines: Total number of lines in the file
+#   active_lines: Number of lines that are active (not removed by
+#       preprocessor)
+#   inactive_lines: Number of lines that are inactive (removed by
+#       preprocessor)
+#   line_status: Dict mapping line numbers to status ('active',
+#       'inactive', etc.)
+FileResult = namedtuple('FileResult',
+                        ['total_lines', 'active_lines',
+                         'inactive_lines', 'line_status'])
+
+
+class Analyser:  # pylint: disable=too-few-public-methods
+    """Base class for source code analysers.
+
+    This class provides common initialisation for analysers that determine
+    which lines in source files are active vs inactive based on various
+    methods (preprocessor analysis, debug info, etc.).
+    """
+
+    def __init__(self, srcdir, keep_temps=False):
+        """Set up the analyser.
+
+        Args:
+            srcdir (str): Path to source root directory
+            keep_temps (bool): If True, keep temporary files for debugging
+        """
+        self.srcdir = srcdir
+        self.keep_temps = keep_temps
+
+    def find_object_files(self, build_dir):
+        """Find all object files in the build directory.
+
+        Args:
+            build_dir (str): Build directory to search
+
+        Returns:
+            list: List of absolute paths to .o files
+        """
+        obj_files = []
+        for root, _, files in os.walk(build_dir):
+            for fname in files:
+                if fname.endswith('.o'):
+                    obj_files.append(os.path.join(root, fname))
+        return obj_files
+
+    @staticmethod
+    def count_lines(file_path):
+        """Count the number of lines in a file.
+
+        Args:
+            file_path (str): Path to file to count lines in
+
+        Returns:
+            int: Number of lines in the file, or 0 on error
+        """
+        try:
+            with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
+                return len(f.readlines())
+        except IOError:
+            return 0
diff --git a/tools/codman/codman b/tools/codman/codman
new file mode 120000
index 00000000000..e7e14ca1165
--- /dev/null
+++ b/tools/codman/codman
@@ -0,0 +1 @@ 
+codman.py
\ No newline at end of file
diff --git a/tools/codman/codman.py b/tools/codman/codman.py
new file mode 100755
index 00000000000..dbd72b066c1
--- /dev/null
+++ b/tools/codman/codman.py
@@ -0,0 +1,664 @@ 
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright 2025 Canonical Ltd
+#
+"""Analyse C source code usage in U-Boot builds.
+
+This script performs file-level and line-level analysis of U-Boot source code:
+- File level: which files are compiled vs not compiled
+- Line level: which lines within compiled files are active based on CONFIG_*
+
+This combines file-level analysis (which files are used) with optional
+line-level analysis: which parts of each file are active based on the
+preprocessor and Kconfig options.
+"""
+
+import argparse
+import fnmatch
+import multiprocessing
+import os
+import re
+import subprocess
+import sys
+
+# Allow 'from patman import xxx to work'
+# pylint: disable=C0413
+our_path = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(os.path.join(our_path, '..'))
+
+# pylint: disable=wrong-import-position
+from u_boot_pylib import terminal, tools, tout
+
+# Import analysis modules
+import dwarf
+import lsp
+import output
+import unifdef
+# pylint: enable=wrong-import-position
+
+# Pattern to match .cmd files
+RE_PATTERN = re.compile(r'^\..*\.cmd$')
+
+# Pattern to extract the source file from a .cmd file
+RE_LINE = re.compile(r'^(saved)?cmd_[^ ]*\.o := (?P<command_prefix>.* )'
+                     r'(?P<file_path>[^ ]*\.[cS]) *(;|$)')
+RE_SOURCE = re.compile(r'^source_[^ ]*\.o := (?P<file_path>[^ ]*\.[cS])')
+
+# Directories to exclude from analysis
+EXCLUDE_DIRS = ['.git', 'Documentation', 'doc', 'scripts', 'tools']
+
+# Default base directory for builds
+BUILD_BASE = '/tmp/b'
+
+
+def cmdfiles_in_dir(directory):
+    """Generate paths to all .cmd files under the directory"""
+    for dirpath, dirnames, filenames in os.walk(directory, topdown=True):
+        dirnames = [d for d in dirnames if d not in EXCLUDE_DIRS]
+
+        for filename in filenames:
+            if RE_PATTERN.match(filename):
+                yield os.path.join(dirpath, filename)
+
+
+def extract_source_from_cmdfile(cmdfile_path, srcdir):
+    """Extract the source file path from a .cmd file.
+
+    Args:
+        cmdfile_path (str): Path to the .cmd file to parse.
+        srcdir (str): Root directory of the U-Boot source tree.
+    """
+    with open(cmdfile_path, 'rt', encoding='utf-8') as f:
+        for line in f:
+            result = RE_SOURCE.match(line)
+            if result:
+                file_path = result.group('file_path')
+                abs_path = os.path.realpath(os.path.join(srcdir, file_path))
+                if os.path.exists(abs_path):
+                    return abs_path
+
+            result = RE_LINE.match(line)
+            if result:
+                file_path = result.group('file_path')
+                abs_path = os.path.realpath(os.path.join(srcdir, file_path))
+                if os.path.exists(abs_path):
+                    return abs_path
+
+    return None
+
+
+def find_all_source_files(srcdir):
+    """Find all C/assembly/header source files in the source tree.
+
+    Args:
+        srcdir (str): Root directory of the U-Boot source tree.
+
+    Returns:
+        Set of absolute paths to all source files.
+    """
+    tout.progress('Finding all source files...')
+    all_sources = set()
+    exclude_dirs = [os.path.join(srcdir, d) for d in EXCLUDE_DIRS]
+
+    for dirpath, dirnames, filenames in os.walk(srcdir, topdown=True):
+        # Skip excluded directories
+        if any(dirpath.startswith(excl) for excl in exclude_dirs):
+            dirnames[:] = []
+            continue
+
+        for filename in filenames:
+            if filename.endswith(('.c', '.S', '.h')):
+                abs_path = os.path.realpath(os.path.join(dirpath, filename))
+                all_sources.add(abs_path)
+
+    tout.info(f'Found {len(all_sources)} total source files')
+
+    return all_sources
+
+
+def extract_deps_from_cmdfile(cmdfile_path):
+    """Extract all source file dependencies from a .cmd file.
+
+    This includes the main source file and all headers it depends on.
+
+    Args:
+        cmdfile_path (str): Path to the .cmd file to parse.
+
+    Returns:
+        Set of absolute paths to source files (c/S/h) used.
+    """
+    deps = set()
+
+    with open(cmdfile_path, 'rt', encoding='utf-8') as f:
+        in_deps_section = False
+        for line in f:
+            # Look for deps_* := lines
+            if line.startswith('deps_'):
+                in_deps_section = True
+                continue
+
+            # If we're in the deps section, extract file paths
+            if in_deps_section:
+                # Lines look like: /path/to/file.h \
+                # or: $(wildcard include/config/foo.h) \
+                if line.strip() == '':
+                    in_deps_section = False
+                    continue
+
+                # Skip wildcard lines
+                if '$(wildcard' in line:
+                    continue
+
+                # Extract the file path
+                path = line.strip().rstrip('\\').strip()
+                if path and os.path.exists(path):
+                    abs_path = os.path.realpath(path)
+                    # Only include .c, .S, .h files
+                    if abs_path.endswith(('.c', '.S', '.h')):
+                        deps.add(abs_path)
+
+    return deps
+
+
+def resolve_wrapper_file(source_file):
+    """Check if a file is a wrapper that only includes another .c file.
+
+    For example lib/libfdt/fdt_overlay.c which holds:
+        #include <linux/libfdt_env.h>
+        #include "../../scripts/dtc/libfdt/fdt_overlay.c"
+
+    Args:
+        source_file (str): Path to the source file
+
+    Returns:
+        str: Path to the included .c file if this is a wrapper, else the
+            original file
+    """
+    lines = tools.read_file(source_file, binary=False).splitlines()
+
+    # Check if file only has #include directives (and comments/blank lines)
+    included_c_file = None
+    has_other_content = False
+
+    for line in lines:
+        stripped = line.strip()
+        # Skip blank lines and comments
+        if not stripped or stripped.startswith('//') or \
+           stripped.startswith('/*') or stripped.startswith('*'):
+            continue
+
+        # Check for #include directive
+        if stripped.startswith('#include'):
+            # Extract the included file
+            match = re.search(r'#include\s+[<"]([^>"]+)[>"]', stripped)
+            if match:
+                included = match.group(1)
+                # Only track .c file includes (the actual source)
+                if included.endswith('.c'):
+                    included_c_file = included
+            continue
+
+        # Found non-include content
+        has_other_content = True
+        break
+
+    # If we only found includes and one was a .c file, resolve it
+    if not has_other_content and included_c_file:
+        # Resolve relative to the wrapper file's directory
+        wrapper_dir = os.path.dirname(source_file)
+        resolved = os.path.realpath(
+            os.path.join(wrapper_dir, included_c_file))
+        if os.path.exists(resolved):
+            return resolved
+
+    return source_file
+
+
+def _process_cmdfile(args):
+    """Process a single .cmd file to extract source files.
+
+    This is a worker function for multiprocessing.
+
+    Args:
+        args: Tuple of (cmdfile_path, srcdir, srcdir_real)
+
+    Returns:
+        set: Set of absolute paths to source files found in this .cmd file
+    """
+    cmdfile, srcdir, srcdir_real = args
+    sources = set()
+
+    # Get the main source file (.c or .S)
+    source_file = extract_source_from_cmdfile(cmdfile, srcdir)
+    if source_file:
+        # Resolve wrapper files to their actual source
+        resolved = resolve_wrapper_file(source_file)
+        # Only include files within the source tree
+        if os.path.realpath(resolved).startswith(srcdir_real):
+            sources.add(resolved)
+
+    # Get all dependencies (headers)
+    deps = extract_deps_from_cmdfile(cmdfile)
+    # Filter to only include files within the source tree
+    for dep in deps:
+        if os.path.realpath(dep).startswith(srcdir_real):
+            sources.add(dep)
+
+    return sources
+
+
+def find_used_sources(build_dir, srcdir, jobs=None):
+    """Find all source files used in the build.
+
+    This includes both the compiled .c/.S files and all .h headers they depend
+    on. For wrapper files that only include another .c file, the included file
+    is returned instead.
+
+    Only files within the source tree are included - system headers and
+    toolchain files are excluded.
+
+    Args:
+        build_dir (str): Path to the build directory containing .cmd files
+        srcdir (str): Path to U-Boot source root directory
+        jobs (int): Number of parallel jobs (None = use all CPUs)
+
+    Returns:
+        set: Set of absolute paths to all source files used in the build
+    """
+    tout.progress('Finding used source files...')
+    srcdir_real = os.path.realpath(srcdir)
+
+    # Collect all cmdfiles first
+    cmdfiles = list(cmdfiles_in_dir(build_dir))
+    tout.progress(f'Processing {len(cmdfiles)} .cmd files...')
+
+    # Prepare arguments for each worker
+    worker_args = [(cmdfile, srcdir, srcdir_real) for cmdfile in cmdfiles]
+
+    # Use multiprocessing to process cmdfiles in parallel
+    if jobs is None:
+        jobs = multiprocessing.cpu_count()
+
+    used_sources = set()
+    with multiprocessing.Pool(processes=jobs) as pool:
+        # Process cmdfiles in parallel
+        for sources in pool.imap_unordered(_process_cmdfile, worker_args,
+                                           chunksize=100):
+            used_sources.update(sources)
+
+    tout.info(f'Found {len(used_sources)} used source files')
+
+    return used_sources
+
+
+def select_sources(srcdir, build_dir, filter_pattern, jobs=None):
+    """Find all and used source files, optionally applying a filter.
+
+    Args:
+        srcdir (str): Root directory of the source tree
+        build_dir (str): Build directory path
+        filter_pattern (str): Optional wildcard pattern to filter files
+            (None to skip)
+        jobs (int): Number of parallel jobs (None = use all CPUs)
+
+    Returns:
+        tuple: (all_sources, used_sources, skipped_sources) - sets of file paths
+    """
+    all_sources = find_all_source_files(srcdir)
+
+    # Find used source files
+    used_sources = find_used_sources(build_dir, srcdir, jobs)
+
+    # Apply filter if specified
+    if filter_pattern:
+        all_sources = {f for f in all_sources
+                       if fnmatch.fnmatch(os.path.basename(f),
+                                          filter_pattern) or
+                          fnmatch.fnmatch(f, filter_pattern)}
+        used_sources = {f for f in used_sources
+                        if fnmatch.fnmatch(os.path.basename(f),
+                                           filter_pattern) or
+                           fnmatch.fnmatch(f, filter_pattern)}
+        tout.progress(f'After filter: {len(all_sources)} total, ' +
+                     f'{len(used_sources)} used')
+
+    # Calculate unused sources
+    skipped_sources = all_sources - used_sources
+
+    return all_sources, used_sources, skipped_sources
+
+
+def do_build(args):
+    """Set up and validate source and build directories.
+
+    Args:
+        args (Namespace): Parsed command-line arguments
+
+    Returns:
+        tuple: (srcdir, build_dir) on success
+        Calls tout.fatal() on failure
+    """
+    srcdir = os.path.realpath(args.source)
+
+    if not os.path.isdir(srcdir):
+        tout.fatal(f'Source directory does not exist: {srcdir}')
+
+    # Determine build directory
+    if args.build_dir:
+        build_dir = os.path.realpath(args.build_dir)
+    else:
+        # Use default: build_base/<board>
+        build_dir = os.path.join(args.build_base, args.board)
+
+    # If not skipping build, build it
+    if not args.no_build:
+        if args.board:
+            build_board(args.board, build_dir, srcdir, args.adjust,
+                        args.use_dwarf)
+            # Note: build_board() calls tout.fatal() on failure which exits
+
+    # Verify build directory exists
+    if not os.path.isdir(build_dir):
+        tout.fatal(f'Build directory does not exist: {build_dir}')
+
+    tout.info(f'Analysing build in: {build_dir}')
+    tout.info(f'Source directory: {srcdir}')
+
+    return srcdir, build_dir
+
+
+def build_board(board, build_dir, srcdir, adjust_cfg=None, use_dwarf=False):
+    """Build a board using buildman.
+
+    Args:
+        board (str): Board name to build
+        build_dir (str): Directory to build into
+        srcdir (str): U-Boot source directory
+        adjust_cfg (list): List of CONFIG adjustments
+        use_dwarf (bool): Enable CC_OPTIMIZE_FOR_DEBUG to prevent inlining
+
+    Returns:
+        True on success (note: failures call tout.fatal() which exits)
+    """
+    tout.info(f"Building board '{board}' with buildman...")
+    tout.info(f'Build directory: {build_dir}')
+
+    # Enable CC_OPTIMIZE_FOR_DEBUG if using DWARF to prevent inlining
+    if use_dwarf:
+        adjust_cfg = list(adjust_cfg or []) + ['CC_OPTIMIZE_FOR_DEBUG']
+
+    if adjust_cfg:
+        # Count actual adjustments (handle comma-separated values)
+        num_adjustments = sum(len([x for x in item.split(',') if x.strip()])
+                              for item in adjust_cfg)
+        tout.progress(f'Building with {num_adjustments} Kconfig adjustments')
+    else:
+        tout.progress('Building')
+
+    # Run buildman to build the board
+    # -L: disable LTO, -w: enable warnings, -o: output directory,
+    # -m: mrproper (clean), -I: show errors/warnings only (incremental)
+    cmd = ['buildman', '--board', board, '-L', '-w', '-m', '-I', '-o',
+           build_dir]
+
+    # Add CONFIG adjustments if specified
+    if adjust_cfg:
+        for adj in adjust_cfg:
+            cmd.extend(['--adjust-cfg', adj])
+
+    try:
+        result = subprocess.run(cmd, cwd=srcdir, check=False,
+                              capture_output=False, text=True)
+        if result.returncode != 0:
+            tout.fatal(f'buildman exited with code {result.returncode}')
+        return True
+    except FileNotFoundError:
+        tout.fatal('buildman not found. Please ensure buildman is in '
+                   'your PATH.')
+    except OSError as e:
+        tout.fatal(f'Error running buildman: {e}')
+    return None
+
+
+def parse_args(argv=None):
+    """Parse command-line arguments.
+
+    Returns:
+        Parsed arguments object
+    """
+    parser = argparse.ArgumentParser(
+        description='Analyse C source code usage in U-Boot builds',
+        epilog='Example: %(prog)s -b sandbox --stats')
+
+    parser.add_argument('-s', '--source', type=str, default='.',
+                        help='Path to U-Boot source directory '
+                             '(default: current directory)')
+    parser.add_argument('-b', '--board', type=str, default='sandbox',
+                        help='Board name to build and analyse (default: sandbox)')
+    parser.add_argument('-B', '--build-dir', type=str,
+                        help='Use existing build directory instead of building')
+    parser.add_argument('--build-base', type=str, default=BUILD_BASE,
+                        help=f'Base directory for builds (default: {BUILD_BASE})')
+    parser.add_argument('-n', '--no-build', action='store_true',
+                        help='Skip building, use existing build directory')
+    parser.add_argument('-a', '--adjust', type=str, action='append',
+                        help='Adjust CONFIG options '
+                             '(e.g., -a CONFIG_FOO, -a ~CONFIG_BAR)')
+    parser.add_argument('-w', '--dwarf', action='store_true',
+                        dest='use_dwarf',
+                        help='Use DWARF debug info '
+                             '(more accurate, requires rebuild)')
+    parser.add_argument('-l', '--lsp', action='store_true',
+                        dest='use_lsp',
+                        help='Use clangd LSP to analyse inactive regions '
+                             '(requires clangd)')
+    parser.add_argument('-u', '--unifdef', type=str, default='unifdef',
+                        help='Path to unifdef executable (default: unifdef)')
+    parser.add_argument('-j', '--jobs', type=int, metavar='N',
+                        help='Number of parallel jobs (default: all CPUs)')
+    parser.add_argument('-i', '--include-headers', action='store_true',
+                        help='Include header files in unifdef analysis')
+    parser.add_argument('-f', '--filter', type=str, metavar='PATTERN',
+                        help='Filter files by wildcard pattern (e.g., *acpi*)')
+    parser.add_argument('-v', '--verbose', action='store_true',
+                        help='Show verbose output')
+    parser.add_argument('-D', '--debug', action='store_true',
+                        help='Enable debug mode')
+
+    # Subcommands
+    subparsers = parser.add_subparsers(dest='cmd', help='Command to execute')
+
+    # stats command (default)
+    stats = subparsers.add_parser('stats',
+                                   help='Show statistics about code usage')
+    stats.add_argument('--top', type=int, metavar='N', default=20,
+                       help='Show top N files with most inactive code '
+                            '(default: 20)')
+
+    # dirs command
+    dirs = subparsers.add_parser('dirs', help='Show directory breakdown')
+    dirs.add_argument('-s', '--subdirs', action='store_true',
+                      help='Show breakdown by all subdirectories')
+    dirs.add_argument('-f', '--show-files', action='store_true',
+                      help='Show individual files within directories')
+    dirs.add_argument('-e', '--show-empty', action='store_true',
+                      help='Show directories with 0 lines used')
+
+    # detail command
+    detail = subparsers.add_parser('detail',
+                                    help='Show line-by-line analysis of files')
+    detail.add_argument('files', nargs='+', metavar='FILE',
+                        help='File(s) to analyse')
+
+    # unused command
+    subparsers.add_parser('unused', help='List all unused source files')
+
+    # used command
+    subparsers.add_parser('used', help='List all used source files')
+
+    # summary command
+    subparsers.add_parser('summary',
+                          help='Show per-file summary of active/inactive lines')
+
+    # copy-used command
+    copy = subparsers.add_parser('copy-used',
+                                  help='Copy used source files to a directory')
+    copy.add_argument('dest_dir', metavar='DIR',
+                      help='Destination directory')
+
+    args = parser.parse_args(argv)
+
+    # Default command is stats
+    if not args.cmd:
+        args.cmd = 'stats'
+        # Set default value for --top when stats is the default command
+        args.top = 20
+
+    # Map subcommand arguments to expected names
+    if args.cmd == 'detail':
+        args.detail = args.files
+    elif args.cmd == 'copy-used':
+        args.copy_used = args.dest_dir
+    else:
+        args.detail = None
+        args.copy_used = None
+
+    # Validation
+    if args.no_build and args.adjust:
+        tout.warning('-a/--adjust ignored when using -n/--no-build')
+
+    return args
+
+
+def do_analysis(used, build_dir, srcdir, unifdef_path, include_headers, jobs,
+                use_lsp, keep_temps=False):
+    """Perform line-level analysis if requested.
+
+    Args:
+        used (set): Set of used source files
+        build_dir (str): Build directory path
+        srcdir (str): Source directory path
+        unifdef_path (str): Path to unifdef executable (None to use DWARF/LSP)
+        include_headers (bool): Include header files in unifdef analysis
+        jobs (int): Number of parallel jobs
+        use_lsp (bool): Use LSP (clangd) instead of DWARF
+        keep_temps (bool): If True, keep temporary files for debugging
+
+    Returns:
+        dict: Line-level analysis results, or None if not requested/failed
+    """
+    if unifdef_path:
+        config_file = os.path.join(build_dir, '.config')
+        analyser = unifdef.UnifdefAnalyser(config_file, srcdir, used,
+                                            unifdef_path, include_headers,
+                                            keep_temps)
+    elif use_lsp:
+        analyser = lsp.LspAnalyser(build_dir, srcdir, used, keep_temps)
+    else:
+        analyser = dwarf.DwarfAnalyser(build_dir, srcdir, used, keep_temps)
+    return analyser.process(jobs)
+
+
+def do_output(args, all_srcs, used, skipped, results, srcdir):
+    """Perform output operation based on command.
+
+    Args:
+        args (argparse.Namespace): Parsed command-line arguments
+        all_srcs (set): All source files
+        used (set): Used source files
+        skipped (set): Unused source files
+        results (dict): Line-level analysis results (or None)
+        srcdir (str): Source directory path
+
+    Returns:
+        bool: True on success, False on failure
+    """
+    terminal.print_clear()
+
+    # Execute the command
+    if args.cmd == 'detail':
+        # Show detail for each file, collecting missing files
+        missing = []
+        shown = 0
+        for fname in args.detail:
+            if output.show_file_detail(fname, results, srcdir):
+                shown += 1
+            else:
+                missing.append(fname)
+
+        # Show summary if any files were missing
+        if missing:
+            tout.warning(f'{len(missing)} file(s) not found in analysed '
+                         f"sources: {', '.join(missing)}")
+
+        ok = shown > 0
+    elif args.cmd == 'summary':
+        ok = output.show_file_summary(results, srcdir)
+    elif args.cmd == 'unused':
+        ok = output.list_unused_files(skipped, srcdir)
+    elif args.cmd == 'used':
+        ok = output.list_used_files(used, srcdir)
+    elif args.cmd == 'copy-used':
+        ok = output.copy_used_files(used, srcdir, args.copy_used)
+    elif args.cmd == 'dirs':
+        ok = output.show_dir_breakdown(all_srcs, used, results, srcdir,
+                                        args.subdirs, args.show_files,
+                                        args.show_empty)
+    else:
+        # stats (default)
+        ok = output.show_statistics(all_srcs, used, skipped, results, srcdir,
+                                     args.top)
+
+    return ok
+
+
+def main(argv=None):
+    """Main function.
+
+    Args:
+        argv (list): Command-line arguments (default: sys.argv[1:])
+
+    Returns:
+        int: Exit code (0 for success, 1 for failure)
+    """
+    tout.init(tout.NOTICE)
+    args = parse_args(argv)
+
+    # Init tout based on verbosity flags
+    if args.debug:
+        tout.init(tout.DEBUG)
+    elif args.verbose:
+        tout.init(tout.INFO)
+
+    srcdir, build_dir = do_build(args)
+    all_srcs, used, skipped = select_sources(srcdir, build_dir, args.filter,
+                                              args.jobs)
+
+    # Determine which files to analyse
+    files_to_analyse = used
+    if args.cmd == 'detail':
+        # For detail command, only analyse the requested files
+        files_to_analyse = set()
+        for fname in args.detail:
+            abs_path = os.path.realpath(os.path.join(srcdir, fname))
+            if abs_path in used:
+                files_to_analyse.add(abs_path)
+
+    # Perform line-level analysis
+    unifdef_path = None if (args.use_dwarf or args.use_lsp) else args.unifdef
+    keep_temps = args.debug
+    results = do_analysis(files_to_analyse, build_dir, srcdir, unifdef_path,
+                          args.include_headers, args.jobs, args.use_lsp,
+                          keep_temps)
+    if results is None:
+        return 1
+
+    if not do_output(args, all_srcs, used, skipped, results, srcdir):
+        return 1
+
+    return 0
+
+
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/tools/codman/output.py b/tools/codman/output.py
new file mode 100644
index 00000000000..2b1d097fe26
--- /dev/null
+++ b/tools/codman/output.py
@@ -0,0 +1,536 @@ 
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright 2025 Canonical Ltd
+#
+"""Output formatting and display functions for srcman.
+
+This module provides functions for displaying analysis results in various
+formats:
+- Statistics views (file-level and line-level)
+- Directory breakdowns (top-level and subdirectories)
+- Per-file summaries
+- Detailed line-by-line views
+- File listings (used/unused)
+- File copying operations
+"""
+
+import os
+import shutil
+import sys
+from collections import defaultdict
+
+# Import from tools directory
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
+from u_boot_pylib import terminal, tout  # pylint: disable=wrong-import-position
+
+
+class DirStats:  # pylint: disable=too-few-public-methods
+    """Statistics for a directory.
+
+    Attributes:
+        total: Total number of files in directory
+        used: Number of files used (compiled)
+        unused: Number of files not used
+        lines_total: Total lines of code in directory
+        lines_used: Number of active lines (after preprocessing)
+        files: List of file info dicts (for --show-files)
+    """
+    def __init__(self):
+        self.total = 0
+        self.used = 0
+        self.unused = 0
+        self.lines_total = 0
+        self.lines_used = 0
+        self.files = []
+
+
+def count_lines(file_path):
+    """Count lines in a file"""
+    try:
+        with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
+            return sum(1 for _ in f)
+    except IOError:
+        return 0
+
+
+def klocs(lines):
+    """Format line count in thousands, rounded to 1 decimal place.
+
+    Args:
+        lines (int): Line count (e.g., 3500)
+
+    Returns:
+        Formatted string in thousands (e.g., '3.5')
+    """
+    kloc = round(lines / 1000, 1)
+    return f'{kloc:.1f}'
+
+
+def percent(numerator, denominator):
+    """Calculate percentage, handling division by zero.
+
+    Args:
+        numerator (int/float): The numerator
+        denominator (int/float): The denominator
+
+    Returns:
+        float: Percentage (0-100), or 0 if denominator is 0
+    """
+    return 100 * numerator / denominator if denominator else 0
+
+
+def print_heading(text, width=70, char='='):
+    """Print a heading with separator lines.
+
+    Args:
+        text (str): Heading text to display (empty for separator only)
+        width (int): Width of the separator line
+        char (str): Character to use for separator
+    """
+    print(char * width)
+    if text:
+        print(text)
+        print(char * width)
+
+
+def show_file_detail(detail_file, file_results, srcdir):
+    """Show detailed line-by-line analysis for a specific file.
+
+    Args:
+        detail_file (str): Path to the file to show details for (relative or
+            absolute)
+        file_results (dict): Dictionary mapping file paths to analysis results
+        srcdir (str): Root directory of the source tree
+
+    Returns:
+        True on success, False on error
+    """
+    detail_path = os.path.realpath(detail_file)
+    if detail_path not in file_results:
+        # Try relative to source root
+        detail_path = os.path.realpath(os.path.join(srcdir, detail_file))
+
+    if detail_path in file_results:
+        result = file_results[detail_path]
+        rel_path = os.path.relpath(detail_path, srcdir)
+
+        print_heading(f'DETAIL FOR: {rel_path}', width=70)
+        print(f'Total lines:    {result.total_lines:6}')
+        pct_active = percent(result.active_lines, result.total_lines)
+        pct_inactive = percent(result.inactive_lines, result.total_lines)
+        print(f'Active lines:   {result.active_lines:6} ({pct_active:.1f}%)')
+        print(f'Inactive lines: {result.inactive_lines:6} ' +
+              f'({pct_inactive:.1f}%)')
+        print()
+
+        # Show the file with status annotations
+        with open(detail_path, 'r', encoding='utf-8', errors='ignore') as f:
+            lines = f.readlines()
+
+        col = terminal.Color()
+        for line_num, line in enumerate(lines, 1):
+            status = result.line_status.get(line_num, 'unknown')
+            marker = '-' if status == 'inactive' else ' '
+            prefix = f'{marker} {line_num:4} | '
+            code = line.rstrip()
+
+            if status == 'active':
+                # Normal color for active code
+                print(prefix + code)
+            else:
+                # Non-bright cyan for inactive code
+                print(prefix + col.build(terminal.Color.CYAN, code,
+                                         bright=False))
+        return True
+
+    # File not found - caller handles errors
+    return False
+
+
+def show_file_summary(file_results, srcdir):
+    """Show per-file summary of line analysis.
+
+    Args:
+        file_results (dict): Dictionary mapping file paths to analysis results
+        srcdir (str): Root directory of the source tree
+
+    Returns:
+        bool: True on success
+    """
+    print_heading('PER-FILE SUMMARY', width=90)
+    print(f"{'File':<50} {'Total':>8} {'Active':>8} "
+          f"{'Inactive':>8} {'%Active':>8}")
+    print('-' * 90)
+
+    for source_file in sorted(file_results.keys()):
+        result = file_results[source_file]
+        rel_path = os.path.relpath(source_file, srcdir)
+        if len(rel_path) > 47:
+            rel_path = '...' + rel_path[-44:]
+
+        pct_active = percent(result.active_lines, result.total_lines)
+        print(f'{rel_path:<50} {result.total_lines:>8} '
+              f'{result.active_lines:>8} {result.inactive_lines:>8} '
+              f'{pct_active:>7.1f}%')
+
+    return True
+
+
+def list_unused_files(skipped_sources, srcdir):
+    """List unused source files.
+
+    Args:
+        skipped_sources (set of str): Set of unused source file paths (relative
+            to srcdir)
+        srcdir (str): Root directory of the source tree
+
+    Returns:
+        bool: True on success
+    """
+    print(f'Unused source files ({len(skipped_sources)}):')
+    for source_file in sorted(skipped_sources):
+        try:
+            rel_path = os.path.relpath(source_file, srcdir)
+        except ValueError:
+            rel_path = source_file
+        print(f'  {rel_path}')
+
+    return True
+
+
+def list_used_files(used_sources, srcdir):
+    """List used source files.
+
+    Args:
+        used_sources (set of str): Set of used source file paths (relative
+            to srcdir)
+        srcdir (str): Root directory of the source tree
+
+    Returns:
+        bool: True on success
+    """
+    print(f'Used source files ({len(used_sources)}):')
+    for source_file in sorted(used_sources):
+        try:
+            rel_path = os.path.relpath(source_file, srcdir)
+        except ValueError:
+            rel_path = source_file
+        print(f'  {rel_path}')
+
+    return True
+
+
+def copy_used_files(used_sources, srcdir, dest_dir):
+    """Copy used source files to a destination directory, preserving structure.
+
+    Args:
+        used_sources (set): Set of used source file paths (relative to srcdir)
+        srcdir (str): Root directory of the source tree
+        dest_dir (str): Destination directory for the copy
+
+    Returns:
+        True on success, False if errors occurred
+    """
+    if os.path.exists(dest_dir):
+        tout.error(f'Destination directory already exists: {dest_dir}')
+        return False
+
+    tout.progress(f'Copying {len(used_sources)} used source files to ' +
+                  f'{dest_dir}')
+
+    copied_count = 0
+    error_count = 0
+
+    for source_file in sorted(used_sources):
+        src_path = os.path.join(srcdir, source_file)
+        dest_path = os.path.join(dest_dir, source_file)
+
+        try:
+            # Create parent directory if needed
+            dest_parent = os.path.dirname(dest_path)
+            os.makedirs(dest_parent, exist_ok=True)
+
+            # Copy the file
+            shutil.copy2(src_path, dest_path)
+            copied_count += 1
+        except IOError as e:
+            error_count += 1
+            tout.error(f'Error copying {source_file}: {e}')
+
+    tout.progress(f'Copied {copied_count} files to {dest_dir}')
+    if error_count:
+        tout.error(f'Failed to copy {error_count} files')
+        return False
+
+    return True
+
+
+def collect_dir_stats(all_sources, used_sources, file_results, srcdir,
+                      by_subdirs, show_files):
+    """Collect statistics organized by directory.
+
+    Args:
+        all_sources (set): Set of all source file paths
+        used_sources (set): Set of used source file paths
+        file_results (dict): Optional dict mapping file paths to line
+            analysis results (or None)
+        srcdir (str): Root directory of the source tree
+        by_subdirs (bool): If True, use full subdirectory paths;
+            otherwise top-level only
+        show_files (bool): If True, collect individual file info within
+            each directory
+
+    Returns:
+        dict: Directory statistics keyed by directory path
+    """
+    dir_stats = defaultdict(DirStats)
+
+    for source_file in all_sources:
+        rel_path = os.path.relpath(source_file, srcdir)
+
+        if by_subdirs:
+            # Use the full directory path (not including the filename)
+            dir_path = os.path.dirname(rel_path)
+            if not dir_path:
+                dir_path = '.'
+        else:
+            # Use only the top-level directory
+            dir_path = (rel_path.split(os.sep)[0] if os.sep in rel_path
+                        else rel_path)
+
+        line_count = count_lines(source_file)
+        dir_stats[dir_path].total += 1
+        dir_stats[dir_path].lines_total += line_count
+
+        if source_file in used_sources:
+            dir_stats[dir_path].used += 1
+            # Use active line count if line-level analysis was performed
+            # Normalize path to match file_results keys (absolute paths)
+            abs_source = os.path.realpath(source_file)
+
+            # Try to find the file in file_results
+            result = None
+            if file_results:
+                if abs_source in file_results:
+                    result = file_results[abs_source]
+                elif source_file in file_results:
+                    result = file_results[source_file]
+
+            if result:
+                active_lines = result.active_lines
+                inactive_lines = result.inactive_lines
+                dir_stats[dir_path].lines_used += active_lines
+                # Store file info for --show-files (exclude .h files)
+                if show_files and not rel_path.endswith('.h'):
+                    dir_stats[dir_path].files.append({
+                        'path': rel_path,
+                        'total': line_count,
+                        'active': active_lines,
+                        'inactive': inactive_lines
+                    })
+            else:
+                # File not found in results - count all lines
+                tout.debug(f'File not in results (using full count): '
+                           f'{rel_path}')
+                dir_stats[dir_path].lines_used += line_count
+                if show_files and not rel_path.endswith('.h'):
+                    dir_stats[dir_path].files.append({
+                        'path': rel_path,
+                        'total': line_count,
+                        'active': line_count,
+                        'inactive': 0
+                    })
+        else:
+            dir_stats[dir_path].unused += 1
+
+    return dir_stats
+
+
+def print_dir_stats(dir_stats, file_results, by_subdirs, show_files,
+                    show_empty):
+    """Print directory statistics table.
+
+    Args:
+        dir_stats (dict): Directory statistics keyed by directory path
+        file_results (dict): Optional dict mapping file paths to line analysis
+            results (or None)
+        by_subdirs (bool): If True, show full subdirectory breakdown; otherwise
+            top-level only
+        show_files (bool): If True, show individual files within directories
+        show_empty (bool): If True, show directories with 0 lines used
+    """
+    # Sort alphabetically by directory name
+    sorted_dirs = sorted(dir_stats.items(), key=lambda x: x[0])
+
+    for dir_path, stats in sorted_dirs:
+        # Skip subdirectories with 0 lines used unless --show-zero-lines is set
+        if by_subdirs and not show_empty and stats.lines_used == 0:
+            continue
+
+        pct_used = percent(stats.used, stats.total)
+        pct_code = percent(stats.lines_used, stats.lines_total)
+        # Truncate long paths
+        display_path = dir_path
+        if len(display_path) > 37:
+            display_path = '...' + display_path[-34:]
+        print(f'{display_path:<40} {stats.total:>7} {stats.used:>7} '
+              f'{pct_used:>6.0f} {pct_code:>6.0f} '
+              f'{klocs(stats.lines_total):>8} {klocs(stats.lines_used):>7}')
+
+        # Show individual files if requested
+        if show_files and stats.files:
+            # Sort files by inactive lines (descending) for line-level, or
+            # alphabetically otherwise
+            if file_results:
+                sorted_files = sorted(stats.files, key=lambda x: x['inactive'],
+                                      reverse=True)
+            else:
+                sorted_files = sorted(stats.files, key=lambda x: x['path'])
+
+            for info in sorted_files:
+                filename = os.path.basename(info['path'])
+                if len(filename) > 35:
+                    filename = filename[:32] + '...'
+
+                if file_results:
+                    # Show line-level details
+                    pct_active = percent(info['active'], info['total'])
+                    print(f"  {filename:<38} {info['total']:>7} "
+                          f"{info['active']:>7} {pct_active:>6.1f} "
+                          f"{info['inactive']:>7}")
+                else:
+                    # Show file-level only
+                    print(f"  {filename:<38} {info['total']:>7} lines")
+
+
+def show_dir_breakdown(all_sources, used_sources, file_results, srcdir,
+                       by_subdirs, show_files, show_empty):
+    """Show breakdown by directory (top-level or subdirectories).
+
+    Args:
+        all_sources (set): Set of all source file paths
+        used_sources (set): Set of used source file paths
+        file_results (dict): Optional dict mapping file paths to line analysis
+            results (or None)
+        srcdir (str): Root directory of the source tree
+        by_subdirs (bool): If True, show full subdirectory breakdown; otherwise
+             top-level only
+        show_files (bool): If True, show individual files within each directory
+        show_empty (bool): If True, show directories with 0 lines used
+
+    Returns:
+        bool: True on success
+    """
+    # Width of the main table (Directory + Total + Used columns)
+    table_width = 87
+
+    print_heading('BREAKDOWN BY TOP-LEVEL DIRECTORY' if by_subdirs else '',
+                  width=table_width)
+    print(f"{'Directory':<40} {'Files':>7} {'Used':>7} {'%Used':>6} " +
+          f"{'%Code':>6} {'kLOC':>8} {'Used':>7}")
+    print('-' * table_width)
+
+    # Collect directory statistics
+    dir_stats = collect_dir_stats(all_sources, used_sources, file_results,
+                                  srcdir, by_subdirs, show_files)
+
+    # Print directory statistics
+    print_dir_stats(dir_stats, file_results, by_subdirs, show_files, show_empty)
+
+    print('-' * table_width)
+    total_lines_all = sum(count_lines(f) for f in all_sources)
+    # Calculate used lines: if we have file_results, use active_lines from there
+    # Otherwise, count all lines in used files
+    if file_results:
+        total_lines_used = sum(r.active_lines for r in file_results.values())
+    else:
+        total_lines_used = sum(count_lines(f) for f in used_sources)
+    pct_files = percent(len(used_sources), len(all_sources))
+    pct_code = percent(total_lines_used, total_lines_all)
+    print(f"{'TOTAL':<40} {len(all_sources):>7} {len(used_sources):>7} "
+          f"{pct_files:>6.0f} {pct_code:>6.0f} "
+          f"{klocs(total_lines_all):>8} {klocs(total_lines_used):>7}")
+    print_heading('', width=table_width)
+    print()
+
+    return True
+
+
+def show_statistics(all_sources, used_sources, skipped_sources, file_results,
+                    srcdir, top_n):
+    """Show overall statistics about source file usage.
+
+    Args:
+        all_sources (set of str): Set of all source file paths
+        used_sources (set of str): Set of used source file paths
+        skipped_sources (set of str): Set of unused source file paths
+        file_results (dict): Optional dict mapping file paths to line analysis
+            results
+        srcdir (str): Root directory of the source tree
+        top_n (int): Number of top files with most inactive code to show
+
+    Returns:
+        bool: True on success
+    """
+    # Calculate line counts - use file_results (DWARF/unifdef) if available
+    if file_results:
+        # Use active lines from analysis results
+        used_lines = sum(r.active_lines for r in file_results.values())
+    else:
+        # Fall back to counting all lines in used files
+        used_lines = sum(count_lines(f) for f in used_sources)
+
+    unused_lines = sum(count_lines(f) for f in skipped_sources)
+    total_lines = used_lines + unused_lines
+
+    print_heading('FILE-LEVEL STATISTICS', width=70)
+    print(f'Total source files:   {len(all_sources):6}')
+    used_pct = percent(len(used_sources), len(all_sources))
+    print(f'Used source files:    {len(used_sources):6} ({used_pct:.1f}%)')
+    unused_pct = percent(len(skipped_sources), len(all_sources))
+    print(f'Unused source files:  {len(skipped_sources):6} ' +
+          f'({unused_pct:.1f}%)')
+    print()
+    print(f'Total lines of code:  {total_lines:6}')
+    used_lines_pct = percent(used_lines, total_lines)
+    print(f'Used lines of code:   {used_lines:6} ({used_lines_pct:.1f}%)')
+    unused_lines_pct = percent(unused_lines, total_lines)
+    print(f'Unused lines of code: {unused_lines:6} ' +
+          f'({unused_lines_pct:.1f}%)')
+    print_heading('', width=70)
+
+    # If line-level analysis was performed, show those stats too
+    if file_results:
+        print()
+        total_lines_analysed = sum(r.total_lines for r in file_results.values())
+        active_lines = sum(r.active_lines for r in file_results.values())
+        inactive_lines = sum(r.inactive_lines for r in file_results.values())
+
+        print_heading('LINE-LEVEL STATISTICS (within compiled files)', width=70)
+        print(f'Files analysed:           {len(file_results):6}')
+        print(f'Total lines in used files:{total_lines_analysed:6}')
+        active_pct = percent(active_lines, total_lines_analysed)
+        print(f'Active lines:             {active_lines:6} ' +
+              f'({active_pct:.1f}%)')
+        inactive_pct = percent(inactive_lines, total_lines_analysed)
+        print(f'Inactive lines:           {inactive_lines:6} ' +
+              f'({inactive_pct:.1f}%)')
+        print_heading('', width=70)
+        print()
+
+        # Show top files with most inactive code
+        files_by_inactive = sorted(
+            file_results.items(),
+            key=lambda x: x[1].inactive_lines,
+            reverse=True
+        )
+
+        print(f'TOP {top_n} FILES WITH MOST INACTIVE CODE:')
+        print('-' * 70)
+        for source_file, result in files_by_inactive[:top_n]:
+            rel_path = os.path.relpath(source_file, srcdir)
+            pct_inactive = percent(result.inactive_lines, result.total_lines)
+            print(f'  {result.inactive_lines:5} inactive lines ' +
+                  f'({pct_inactive:4.1f}%) - {rel_path}')
+
+    return True