new file mode 100644
@@ -0,0 +1,200 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright 2025 Canonical Ltd
+#
+"""DWARF debug info-based line-level analysis for source code.
+
+This module provides functionality to analyse which lines in source files
+were compiled by extracting line information from DWARF debug data in
+object files.
+"""
+
+import multiprocessing
+import os
+import subprocess
+from collections import defaultdict
+
+from u_boot_pylib import tout
+from analyser import Analyser, FileResult
+
+
+def worker(args):
+ """Extract line numbers from DWARF debug info in an object file.
+
+ Uses readelf --debug-dump=decodedline to get the line table, then parses
+ section headers and line entries to determine which source lines were
+ compiled into the object.
+
+ Args:
+ args (tuple): Tuple of (obj_path, build_dir, srcdir)
+
+ Returns:
+ tuple: (source_lines_dict, error_msg) where source_lines_dict is a
+ mapping of source file paths to sets of line numbers, and
+ error_msg is None on success or an error string on failure
+ """
+ obj_path, build_dir, srcdir = args
+ source_lines = defaultdict(set)
+
+ # Get the directory of the .o file relative to build_dir
+ rel_to_build = os.path.relpath(obj_path, build_dir)
+ obj_dir = os.path.dirname(rel_to_build)
+
+ # Use readelf to extract decoded line information
+ try:
+ result = subprocess.run(
+ ['readelf', '--debug-dump=decodedline', obj_path],
+ capture_output=True, text=True, check=False,
+ encoding='utf-8', errors='ignore')
+ if result.returncode != 0:
+ error_msg = (f'readelf failed on {obj_path} with return code '
+ f'{result.returncode}\nstderr: {result.stderr}')
+ return (source_lines, error_msg)
+
+ # Parse the output
+ # Format is: Section header with full path, then data lines
+ current_file = None
+ for line in result.stdout.splitlines():
+ # Skip header lines and empty lines
+ if not line or line.startswith('Contents of') or \
+ line.startswith('File name') or line.strip() == '' or \
+ line.startswith(' '):
+ continue
+
+ # Look for section headers with full path (e.g., '/path/to/file.c:')
+ if line.endswith(':'):
+ header_path = line.rstrip(':')
+ # Try to resolve the path
+ if os.path.isabs(header_path):
+ # Absolute path in DWARF
+ abs_path = os.path.realpath(header_path)
+ else:
+ # Relative path - try relative to srcdir and obj_dir
+ abs_path = os.path.realpath(
+ os.path.join(srcdir, obj_dir, header_path))
+ if not os.path.exists(abs_path):
+ abs_path = os.path.realpath(
+ os.path.join(srcdir, header_path))
+
+ if os.path.exists(abs_path):
+ current_file = abs_path
+ continue
+
+ # Parse data lines - use current_file from section header
+ if current_file:
+ parts = line.split()
+ if len(parts) >= 2:
+ try:
+ line_num = int(parts[1])
+ # Skip special line numbers (like '-')
+ if line_num > 0:
+ source_lines[current_file].add(line_num)
+ except (ValueError, IndexError):
+ continue
+ except (OSError, subprocess.SubprocessError) as e:
+ error_msg = f'Failed to execute readelf on {obj_path}: {e}'
+ return (source_lines, error_msg)
+
+ return (source_lines, None)
+
+
+# pylint: disable=too-few-public-methods
+class DwarfAnalyser(Analyser):
+ """Analyser that uses DWARF debug info to determine active lines.
+
+ This analyser extracts line number information from DWARF debug data in
+ compiled object files to determine which source lines generated code.
+ """
+ def __init__(self, build_dir, srcdir, used_sources, keep_temps=False):
+ """Initialise the DWARF analyser.
+
+ Args:
+ build_dir (str): Build directory containing .o files
+ srcdir (str): Path to source root directory
+ used_sources (set): Set of source files that are compiled
+ keep_temps (bool): If True, keep temporary files for debugging
+ """
+ super().__init__(srcdir, keep_temps)
+ self.build_dir = build_dir
+ self.used_sources = used_sources
+
+ def extract_lines(self, jobs=None):
+ """Extract used line numbers from DWARF debug info in object files.
+
+ Args:
+ jobs (int): Number of parallel jobs (None = use all CPUs)
+
+ Returns:
+ dict: Mapping of source file paths to sets of line numbers that
+ generated code
+ """
+ # Find all .o files
+ obj_files = self.find_object_files(self.build_dir)
+
+ if not obj_files:
+ return defaultdict(set)
+
+ # Prepare arguments for parallel processing
+ args_list = [(obj_path, self.build_dir, self.srcdir)
+ for obj_path in obj_files]
+
+ # Process in parallel
+ num_jobs = jobs if jobs else multiprocessing.cpu_count()
+ with multiprocessing.Pool(num_jobs) as pool:
+ results = pool.map(worker, args_list)
+
+ # Merge results from all workers and check for errors
+ source_lines = defaultdict(set)
+ errors = []
+ for result_dict, error_msg in results:
+ if error_msg:
+ errors.append(error_msg)
+ else:
+ for source_file, lines in result_dict.items():
+ source_lines[source_file].update(lines)
+
+ # Report any errors
+ if errors:
+ for error in errors:
+ tout.error(error)
+ tout.fatal(f'readelf failed on {len(errors)} object file(s)')
+
+ return source_lines
+
+ def process(self, jobs=None):
+ """Perform line-level analysis using DWARF debug info.
+
+ Args:
+ jobs (int): Number of parallel jobs (None = use all CPUs)
+
+ Returns:
+ dict: Mapping of source file paths to FileResult named tuples
+ """
+ tout.progress('Extracting DWARF line information...')
+ dwarf_line_map = self.extract_lines(jobs)
+
+ file_results = {}
+ for source_file in self.used_sources:
+ abs_path = os.path.realpath(source_file)
+ used_lines = dwarf_line_map.get(abs_path, set())
+
+ # Count total lines in the file
+ total_lines = self.count_lines(abs_path)
+
+ active_lines = len(used_lines)
+ inactive_lines = total_lines - active_lines
+
+ # Create line status dict
+ line_status = {}
+ for i in range(1, total_lines + 1):
+ line_status[i] = 'active' if i in used_lines else 'inactive'
+
+ file_results[abs_path] = FileResult(
+ total_lines=total_lines,
+ active_lines=active_lines,
+ inactive_lines=inactive_lines,
+ line_status=line_status
+ )
+
+ tout.info(f'Analysed {len(file_results)} files using DWARF debug info')
+ return file_results