@@ -488,6 +488,14 @@ def parse_args(argv=None):
help='Show line counts in kilolines (kLOC) instead of lines')
dirs.add_argument('--html', type=str, metavar='FILE',
help='Output results as HTML to the specified file')
+ dirs.add_argument('--csv', type=str, metavar='FILE',
+ help='Output results as CSV to the specified file')
+ dirs.add_argument('-u', '--show-unmatched', action='store_true',
+ help='List all files without a category match')
+ dirs.add_argument('-F', '--files-only', action='store_true',
+ help='Only output file rows in CSV (exclude directories)')
+ dirs.add_argument('-E', '--show-empty-features', action='store_true',
+ help='List features with no files defined')
# detail command
detail = subparsers.add_parser('detail',
@@ -611,8 +619,9 @@ def do_output(args, all_srcs, used, skipped, results, srcdir, analysis_method):
elif args.cmd == 'copy-used':
ok = output.copy_used_files(used, srcdir, args.copy_used)
elif args.cmd == 'dirs':
- # Check if HTML output is requested
+ # Check if HTML or CSV output is requested
html_file = getattr(args, 'html', None)
+ csv_file = getattr(args, 'csv', None)
if html_file:
ok = output.generate_html_breakdown(all_srcs, used, results, srcdir,
args.subdirs, args.show_files,
@@ -620,6 +629,14 @@ def do_output(args, all_srcs, used, skipped, results, srcdir, analysis_method):
getattr(args, 'kloc', False),
html_file, args.board,
analysis_method)
+ elif csv_file:
+ ok = output.generate_csv(
+ all_srcs, used, results, srcdir, args.subdirs,
+ args.show_files, args.show_empty,
+ getattr(args, 'kloc', False), csv_file,
+ getattr(args, 'show_unmatched', False),
+ getattr(args, 'files_only', False),
+ getattr(args, 'show_empty_features', False))
else:
ok = output.show_dir_breakdown(all_srcs, used, results, srcdir,
args.subdirs, args.show_files,
@@ -138,6 +138,10 @@ The ``dirs command`` has a few extra options:
* ``-e, --show-empty`` - Show directories/files with 0 lines used
* ``-k, --kloc`` - Show line counts in kilolines (kLOC) instead of raw lines
* ``--html <file>`` - Generate an HTML report with collapsible drill-down
+* ``--csv <file>`` - Generate a CSV report for spreadsheet analysis
+* ``-F, --files-only`` - Only output file rows in CSV (exclude directories)
+* ``-u, --show-unmatched`` - List files without a category match
+* ``-E, --show-empty-features`` - List features with no files defined
Other:
@@ -312,6 +316,39 @@ The HTML report includes:
This is useful for sharing reports or exploring large codebases interactively
in a web browser.
+CSV Reports (``dirs --csv``)
+----------------------------
+
+Generate a CSV report for spreadsheet analysis or further processing::
+
+ codman -b qemu-x86 dirs -sf --csv report.csv
+
+The CSV includes columns for Type, Path, Category, Feature, file counts, and
+line statistics::
+
+ Type,Path,Category,Feature,Files,Used,%Used,%Code,Lines,Used
+ dir,arch/x86/cpu,,,20,15,75,85,3816,3227
+ file,arch/x86/cpu/call32.S,load-boot,boot-x86-bare,,,,100,61,61
+ file,arch/x86/cpu/cpu.c,load-boot,boot-x86-bare,,,,88,399,353
+ ...
+
+Use ``-F`` (``--files-only``) for a simplified output with just file rows
+(no directory summaries)::
+
+ codman -b qemu-x86 dirs -sf --csv report.csv -F
+
+This produces cleaner output with columns: Path, Category, Feature, %Code,
+Lines, Used::
+
+ Path,Category,Feature,%Code,Lines,Used
+ arch/x86/cpu/call32.S,load-boot,boot-x86-bare,100,61,61
+ arch/x86/cpu/cpu.c,load-boot,boot-x86-bare,88,399,353
+ arch/x86/cpu/cpu_x86.c,load-boot,boot-x86-bare,100,99,99
+ ...
+
+CSV reports include category information from ``category.cfg``. Other output
+formats (terminal, HTML) do not yet use categories.
+
Categories and Features
-----------------------
@@ -343,6 +380,17 @@ Example category.cfg structure::
"boot/image-board.c",
]
+When generating HTML reports, codman matches each source file to its feature
+and category, making it easy to analyse code by functional area.
+
+Use ``-u`` (``--show-unmatched``) to list files that don't match any feature::
+
+ codman -b qemu-x86 dirs -sf -u
+
+Use ``-E`` (``--show-empty-features``) to list features with no files defined::
+
+ codman -b qemu-x86 dirs -sf -E
+
**Ignoring External Code**
The ``[ignore]`` section in category.cfg can exclude external/vendored code
@@ -14,6 +14,7 @@ formats:
- File copying operations
"""
+import csv
import os
import shutil
import sys
@@ -23,6 +24,8 @@ from collections import defaultdict
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
from u_boot_pylib import terminal, tout # pylint: disable=wrong-import-position
+import category
+
class DirStats: # pylint: disable=too-few-public-methods
"""Statistics for a directory.
@@ -507,7 +510,7 @@ def generate_html_breakdown(all_sources, used_sources, file_results, srcdir,
use_kloc (bool): If True, show line counts in kLOC
html_file (str): Path to output HTML file
board (str): Board name (optional)
- analysis_method (str): Analysis method used ('unifdef', 'lsp', or 'dwarf')
+ analysis_method (str): Analysis method ('unifdef'/'lsp'/'dwarf')
Returns:
bool: True on success
@@ -907,6 +910,210 @@ def generate_html_breakdown(all_sources, used_sources, file_results, srcdir,
return False
+def _write_file_row(writer, info, features, ignore_patterns, file_results,
+ use_kloc, files_only):
+ """Write a single file row to CSV.
+
+ Args:
+ writer: CSV writer object
+ info (dict): File info with 'path', 'total', 'active' keys
+ features (dict): Features dict from category config
+ ignore_patterns (list): List of patterns to ignore
+ file_results (dict): File analysis results (or None)
+ use_kloc (bool): If True, show line counts in kLOC
+ files_only (bool): If True, use simplified row format
+
+ Returns:
+ tuple: (wrote_row, is_matched) - whether row was written, whether file
+ matched a category
+ """
+ # Skip ignored files (external code)
+ if category.should_ignore_file(info['path'], ignore_patterns):
+ return False, True # Not written, but considered matched
+
+ # Match file to feature/category
+ feat_id, cat_id = None, None
+ if features:
+ feat_id, cat_id = category.get_file_feature(info['path'], features)
+
+ is_matched = feat_id is not None
+
+ if file_results:
+ pct_active = percent(info['active'], info['total'])
+
+ if use_kloc:
+ total_str = klocs(info['total'])
+ active_str = klocs(info['active'])
+ else:
+ total_str = info['total']
+ active_str = info['active']
+
+ if files_only:
+ writer.writerow([info['path'], cat_id or '', feat_id or '',
+ f'{pct_active:.0f}', total_str, active_str])
+ else:
+ writer.writerow(['file', info['path'], cat_id or '', feat_id or '',
+ '', '', '', f'{pct_active:.0f}',
+ total_str, active_str])
+
+ return True, is_matched
+
+
+def _report_matching_stats(features, total_files, unmatched_files,
+ show_unmatched, show_empty_features):
+ """Report category matching statistics.
+
+ Args:
+ features (dict): Features dict from category config
+ total_files (int): Total number of files processed
+ unmatched_files (list): List of file paths without category match
+ show_unmatched (bool): If True, list all unmatched files
+ show_empty_features (bool): If True, list features with no files
+ """
+ if features and total_files > 0:
+ matched = total_files - len(unmatched_files)
+ print(f'Category matching: {matched}/{total_files} files matched, '
+ f'{len(unmatched_files)} unmatched')
+ if show_unmatched and unmatched_files:
+ print('Unmatched files:')
+ for filepath in sorted(unmatched_files):
+ print(f' {filepath}')
+
+ if features and show_empty_features:
+ empty_features = [
+ feat_id for feat_id, feat_data in features.items()
+ if not feat_data.get('files', [])
+ ]
+ if empty_features:
+ print(f'Features with no files ({len(empty_features)}):')
+ for feat_id in sorted(empty_features):
+ print(f' {feat_id}')
+
+
+def generate_csv(all_sources, used_sources, file_results, srcdir,
+ by_subdirs, show_files, show_empty, use_kloc, csv_file,
+ show_unmatched=False, files_only=False,
+ show_empty_features=False):
+ """Generate CSV output with directory breakdown.
+
+ Args:
+ all_sources (set): Set of all source file paths
+ used_sources (set): Set of used source file paths
+ file_results (dict): Optional dict mapping file paths to line analysis
+ results (or None)
+ srcdir (str): Root directory of the source tree
+ by_subdirs (bool): If True, show full subdirectory breakdown
+ show_files (bool): If True, show individual files within directories
+ show_empty (bool): If True, show directories with 0 lines used
+ use_kloc (bool): If True, show line counts in kLOC
+ csv_file (str): Path to output CSV file
+ show_unmatched (bool): If True, list all unmatched files to stdout
+ files_only (bool): If True, only output file rows (exclude directories)
+ show_empty_features (bool): If True, list features with no files defined
+
+ Returns:
+ bool: True on success
+ """
+
+ # Load category configuration for file-to-feature matching
+ cfg = category.load_category_config(srcdir)
+ features = cfg.features if cfg else None
+ ignore_patterns = cfg.ignore if cfg else None
+
+ # Collect directory statistics
+ dir_stats = collect_dir_stats(all_sources, used_sources, file_results,
+ srcdir, by_subdirs, show_files)
+
+ # Calculate totals
+ total_lines_all = sum(count_lines(f) for f in all_sources)
+ if file_results:
+ total_lines_used = sum(r.active_lines for r in file_results.values())
+ else:
+ total_lines_used = sum(count_lines(f) for f in used_sources)
+
+ # Track unmatched files
+ unmatched_files = []
+ total_files = 0
+
+ try:
+ with open(csv_file, 'w', newline='', encoding='utf-8') as f:
+ writer = csv.writer(f)
+
+ # Write header
+ lines_header = 'kLOC' if use_kloc else 'Lines'
+ if files_only:
+ writer.writerow(['Path', 'Category', 'Feature', '%Code',
+ lines_header, 'Used'])
+ else:
+ writer.writerow(['Type', 'Path', 'Category', 'Feature', 'Files',
+ 'Used', '%Used', '%Code', lines_header, 'Used'])
+
+ # Sort and output directories
+ for dir_path in sorted(dir_stats.keys()):
+ stats = dir_stats[dir_path]
+
+ # Skip directories with 0 lines used unless show_empty is set
+ if not show_empty and stats.lines_used == 0:
+ continue
+
+ pct_used = percent(stats.used, stats.total)
+ pct_code = percent(stats.lines_used, stats.lines_total)
+
+ if use_kloc:
+ lines_total_str = klocs(stats.lines_total)
+ lines_used_str = klocs(stats.lines_used)
+ else:
+ lines_total_str = stats.lines_total
+ lines_used_str = stats.lines_used
+
+ if not files_only:
+ writer.writerow([
+ 'dir', dir_path, '', '', stats.total, stats.used,
+ f'{pct_used:.0f}', f'{pct_code:.0f}',
+ lines_total_str, lines_used_str])
+
+ # Output files if requested
+ if show_files and stats.files:
+ sorted_files = sorted(
+ stats.files, key=lambda x: os.path.basename(x['path']))
+
+ for info in sorted_files:
+ if not show_empty and info['active'] == 0:
+ continue
+
+ wrote, matched = _write_file_row(
+ writer, info, features, ignore_patterns,
+ file_results, use_kloc, files_only)
+ if wrote:
+ total_files += 1
+ if not matched:
+ unmatched_files.append(info['path'])
+
+ # Write totals row
+ pct_files = percent(len(used_sources), len(all_sources))
+ pct_code = percent(total_lines_used, total_lines_all)
+
+ if use_kloc:
+ total_str = klocs(total_lines_all)
+ used_str = klocs(total_lines_used)
+ else:
+ total_str = total_lines_all
+ used_str = total_lines_used
+
+ if not files_only:
+ writer.writerow(['total', 'TOTAL', '', '', len(all_sources),
+ len(used_sources), f'{pct_files:.0f}',
+ f'{pct_code:.0f}', total_str, used_str])
+
+ tout.info(f'CSV report written to: {csv_file}')
+ _report_matching_stats(features, total_files, unmatched_files,
+ show_unmatched, show_empty_features)
+ return True
+ except IOError as e:
+ tout.error(f'Failed to write CSV file: {e}')
+ return False
+
+
def show_statistics(all_sources, used_sources, skipped_sources, file_results,
srcdir, top_n):
"""Show overall statistics about source file usage.
@@ -194,6 +194,51 @@ files = []
self.assertIn('test', result.categories)
+class TestShouldIgnoreFile(unittest.TestCase):
+ """Test cases for should_ignore_file function"""
+
+ def test_ignore_directory_prefix(self):
+ """Test ignoring files by directory prefix"""
+ ignore = ['lib/external/']
+ self.assertTrue(category.should_ignore_file(
+ 'lib/external/foo.c', ignore))
+ self.assertTrue(category.should_ignore_file(
+ 'lib/external/sub/bar.c', ignore))
+ self.assertFalse(category.should_ignore_file(
+ 'lib/internal/foo.c', ignore))
+
+ def test_ignore_exact_path(self):
+ """Test ignoring files by exact path"""
+ ignore = ['lib/external/specific.c']
+ self.assertTrue(category.should_ignore_file(
+ 'lib/external/specific.c', ignore))
+ self.assertFalse(category.should_ignore_file(
+ 'lib/external/other.c', ignore))
+
+ def test_ignore_glob_pattern(self):
+ """Test ignoring files by glob pattern"""
+ ignore = ['lib/external/*.c']
+ self.assertTrue(category.should_ignore_file(
+ 'lib/external/foo.c', ignore))
+ self.assertFalse(category.should_ignore_file(
+ 'lib/external/foo.h', ignore))
+
+ def test_empty_ignore_list(self):
+ """Test with empty ignore list"""
+ self.assertFalse(category.should_ignore_file('any/file.c', []))
+ self.assertFalse(category.should_ignore_file('any/file.c', None))
+
+ def test_multiple_ignore_patterns(self):
+ """Test with multiple ignore patterns"""
+ ignore = ['lib/external/', 'vendor/*.c']
+ self.assertTrue(category.should_ignore_file(
+ 'lib/external/foo.c', ignore))
+ self.assertTrue(category.should_ignore_file(
+ 'vendor/bar.c', ignore))
+ self.assertFalse(category.should_ignore_file(
+ 'src/main.c', ignore))
+
+
class TestHelperFunctions(unittest.TestCase):
"""Test cases for helper functions"""
new file mode 100644
@@ -0,0 +1,212 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Copyright 2025 Canonical Ltd
+#
+"""Unit tests for output.py CSV generation"""
+
+import csv
+import os
+import shutil
+import sys
+import tempfile
+import unittest
+from collections import namedtuple
+
+# Test configuration
+SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+
+# Import the module to test
+sys.path.insert(0, SCRIPT_DIR)
+sys.path.insert(0, os.path.join(SCRIPT_DIR, '..'))
+import output # pylint: disable=wrong-import-position
+from u_boot_pylib import tools # pylint: disable=wrong-import-position
+
+
+# Mock FileResult for testing
+FileResult = namedtuple('FileResult',
+ ['total_lines', 'active_lines', 'inactive_lines'])
+
+
+class TestGenerateCsv(unittest.TestCase):
+ """Test cases for generate_csv function"""
+
+ def setUp(self):
+ """Create temporary directory with test files"""
+ self.test_dir = tempfile.mkdtemp(prefix='test_output_')
+
+ # Create source files
+ self.src_dir = os.path.join(self.test_dir, 'src')
+ os.makedirs(os.path.join(self.src_dir, 'boot'))
+ os.makedirs(os.path.join(self.src_dir, 'drivers', 'net'))
+ os.makedirs(os.path.join(self.src_dir, 'tools', 'codman'))
+
+ # Create test source files with known content
+ self.files = {
+ 'boot/bootm.c': '// boot\n' * 100,
+ 'boot/image.c': '// image\n' * 50,
+ 'drivers/net/eth.c': '// eth\n' * 200,
+ }
+ for path, content in self.files.items():
+ full_path = os.path.join(self.src_dir, path)
+ tools.write_file(full_path, content, binary=False)
+
+ # Create category.cfg
+ cfg_content = '''
+[categories.load-boot]
+description = "Loading & Boot"
+
+[categories.drivers]
+description = "Drivers"
+
+[features.boot-core]
+category = "load-boot"
+description = "Core boot"
+files = ["boot/"]
+
+[features.ethernet]
+category = "drivers"
+description = "Ethernet"
+files = ["drivers/net/"]
+'''
+ cfg_path = os.path.join(self.src_dir, 'tools', 'codman', 'category.cfg')
+ tools.write_file(cfg_path, cfg_content, binary=False)
+
+ self.csv_file = os.path.join(self.test_dir, 'report.csv')
+
+ def tearDown(self):
+ """Clean up temporary directory"""
+ if os.path.exists(self.test_dir):
+ shutil.rmtree(self.test_dir)
+
+ def test_csv_basic(self):
+ """Test basic CSV generation"""
+ all_sources = {
+ os.path.join(self.src_dir, p) for p in self.files
+ }
+ used_sources = all_sources.copy()
+
+ result = output.generate_csv(
+ all_sources, used_sources, None, self.src_dir,
+ by_subdirs=True, show_files=True, show_empty=False,
+ use_kloc=False, csv_file=self.csv_file)
+
+ self.assertTrue(result)
+ self.assertTrue(os.path.exists(self.csv_file))
+
+ # Read and verify CSV content
+ data = tools.read_file(self.csv_file, binary=False)
+ rows = list(csv.reader(data.splitlines()))
+
+ # Check header
+ self.assertEqual(rows[0][0], 'Type')
+ self.assertEqual(rows[0][1], 'Path')
+ self.assertEqual(rows[0][2], 'Category')
+ self.assertEqual(rows[0][3], 'Feature')
+
+ def test_csv_files_only(self):
+ """Test CSV generation with files_only option"""
+ all_sources = {
+ os.path.join(self.src_dir, p) for p in self.files
+ }
+ used_sources = all_sources.copy()
+
+ result = output.generate_csv(
+ all_sources, used_sources, None, self.src_dir,
+ by_subdirs=True, show_files=True, show_empty=False,
+ use_kloc=False, csv_file=self.csv_file, files_only=True)
+
+ self.assertTrue(result)
+
+ data = tools.read_file(self.csv_file, binary=False)
+ rows = list(csv.reader(data.splitlines()))
+
+ # Check simplified header for files_only
+ self.assertEqual(rows[0][0], 'Path')
+ self.assertEqual(rows[0][1], 'Category')
+ self.assertEqual(rows[0][2], 'Feature')
+ self.assertEqual(rows[0][3], '%Code')
+
+ # No 'dir' or 'total' rows
+ for row in rows[1:]:
+ self.assertNotIn(row[0], ['dir', 'total'])
+
+ def test_csv_category_matching(self):
+ """Test that files are matched to correct categories"""
+ all_sources = {
+ os.path.join(self.src_dir, p) for p in self.files
+ }
+ used_sources = all_sources.copy()
+
+ # Create mock file results
+ file_results = {}
+ for path, content in self.files.items():
+ full_path = os.path.join(self.src_dir, path)
+ lines = len(content.split('\n'))
+ file_results[full_path] = FileResult(lines, lines, 0)
+
+ result = output.generate_csv(
+ all_sources, used_sources, file_results, self.src_dir,
+ by_subdirs=True, show_files=True, show_empty=False,
+ use_kloc=False, csv_file=self.csv_file, files_only=True)
+
+ self.assertTrue(result)
+
+ data = tools.read_file(self.csv_file, binary=False)
+ rows = list(csv.reader(data.splitlines()))
+
+ # Find boot files and verify category
+ boot_rows = [r for r in rows[1:] if 'boot/' in r[0]]
+ self.assertEqual(len(boot_rows), 2) # bootm.c and image.c
+ for row in boot_rows:
+ self.assertEqual(row[1], 'load-boot')
+ self.assertEqual(row[2], 'boot-core')
+
+ # Find driver files and verify category
+ driver_rows = [r for r in rows[1:] if 'drivers/' in r[0]]
+ self.assertEqual(len(driver_rows), 1) # eth.c
+ for row in driver_rows:
+ self.assertEqual(row[1], 'drivers')
+ self.assertEqual(row[2], 'ethernet')
+
+ def test_csv_with_ignore(self):
+ """Test CSV generation with ignored files"""
+ # Add ignore section to config
+ cfg_path = os.path.join(self.src_dir, 'tools', 'codman', 'category.cfg')
+ existing = tools.read_file(cfg_path, binary=False)
+ tools.write_file(cfg_path,
+ existing + '\n[ignore]\nfiles = ["drivers/net/"]\n',
+ binary=False)
+
+ all_sources = {
+ os.path.join(self.src_dir, p) for p in self.files
+ }
+ used_sources = all_sources.copy()
+
+ # Create mock file results
+ file_results = {}
+ for path, content in self.files.items():
+ full_path = os.path.join(self.src_dir, path)
+ lines = len(content.split('\n'))
+ file_results[full_path] = FileResult(lines, lines, 0)
+
+ result = output.generate_csv(
+ all_sources, used_sources, file_results, self.src_dir,
+ by_subdirs=True, show_files=True, show_empty=False,
+ use_kloc=False, csv_file=self.csv_file, files_only=True)
+
+ self.assertTrue(result)
+
+ data = tools.read_file(self.csv_file, binary=False)
+ rows = list(csv.reader(data.splitlines()))
+
+ # Verify ignored files are not in output
+ paths = [r[0] for r in rows[1:]]
+ self.assertFalse(any('drivers/net/' in p for p in paths))
+
+ # Boot files should still be there
+ self.assertTrue(any('boot/' in p for p in paths))
+
+
+if __name__ == '__main__':
+ unittest.main()