#!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0 # Copyright(c) 2026: Mauro Carvalho Chehab . # # pylint: disable=R0903,R0912,R0913,R0914,R0915,R0917 """ docdiff - Check differences between kernel‑doc output between two different commits. Examples -------- Compare the kernel‑doc output between the last two 5.15 releases:: $ kdoc_diff v6.18..v6.19 Both outputs are cached Force a complete documentation scan and clean any previous cache from 6.19 to the current HEAD:: $ kdoc_diff 6.19.. --full --clean Check differences only on a single driver since origin/main:: $ kdoc_diff origin/main drivers/media Generate an YAML file and use it to check for regressions:: $ kdoc_diff HEAD~ drivers/media --regression """ import os import sys import argparse import subprocess import shutil import re import signal from glob import iglob SRC_DIR = os.path.dirname(os.path.realpath(__file__)) WORK_DIR = os.path.abspath(os.path.join(SRC_DIR, "../..")) KDOC_BINARY = os.path.join(SRC_DIR, "kernel-doc") KDOC_PARSER_TEST = os.path.join(WORK_DIR, "tools/unittests/test_kdoc_parser.py") CACHE_DIR = ".doc_diff_cache" YAML_NAME = "out.yaml" DIR_NAME = { "full": os.path.join(CACHE_DIR, "full"), "partial": os.path.join(CACHE_DIR, "partial"), "no-cache": os.path.join(CACHE_DIR, "no_cache"), "tmp": os.path.join(CACHE_DIR, "__tmp__"), } class GitHelper: """Handles all Git operations""" def __init__(self, work_dir=None): self.work_dir = work_dir def is_inside_repository(self): """Check if we're inside a Git repository""" try: output = subprocess.check_output(["git", "rev-parse", "--is-inside-work-tree"], cwd=self.work_dir, stderr=subprocess.STDOUT, universal_newlines=True) return output.strip() == "true" except subprocess.CalledProcessError: return False def is_valid_commit(self, commit_hash): """ Validate that a ref (branch, tag, commit hash, etc.) can be resolved to a commit. """ try: subprocess.check_output(["git", "rev-parse", commit_hash], cwd=self.work_dir, stderr=subprocess.STDOUT) return True except subprocess.CalledProcessError: return False def get_short_hash(self, commit_hash): """Get short commit hash""" try: return subprocess.check_output(["git", "rev-parse", "--short", commit_hash], cwd=self.work_dir, stderr=subprocess.STDOUT, universal_newlines=True).strip() except subprocess.CalledProcessError: return "" def has_uncommitted_changes(self): """Check for uncommitted changes""" try: subprocess.check_output(["git", "diff-index", "--quiet", "HEAD", "--"], cwd=self.work_dir, stderr=subprocess.STDOUT) return False except subprocess.CalledProcessError: return True def get_current_branch(self): """Get current branch name""" return subprocess.check_output(["git", "branch", "--show-current"], cwd=self.work_dir, universal_newlines=True).strip() def checkout_commit(self, commit_hash, quiet=True): """Checkout a commit safely""" args = ["git", "checkout", "-f"] if quiet: args.append("-q") args.append(commit_hash) try: subprocess.check_output(args, cwd=self.work_dir, stderr=subprocess.STDOUT) # Double-check if branch actually switched branch = self.get_short_hash("HEAD") if commit_hash != branch: raise RuntimeError(f"Branch changed to '{branch}' instead of '{commit_hash}'") return True except subprocess.CalledProcessError as e: print(f"ERROR: Failed to checkout {commit_hash}: {e}", file=sys.stderr) return False class CacheManager: """Manages persistent cache directories""" def __init__(self, work_dir): self.work_dir = work_dir def initialize(self): """Create cache directories if they don't exist""" for dir_path in DIR_NAME.values(): abs_path = os.path.join(self.work_dir, dir_path) if not os.path.exists(abs_path): os.makedirs(abs_path, exist_ok=True, mode=0o755) def get_commit_cache(self, commit_hash, path): """Generate cache path for a commit""" hash_short = GitHelper(self.work_dir).get_short_hash(commit_hash) if not hash_short: hash_short = commit_hash return os.path.join(path, hash_short) class KernelDocRunner: """Runs kernel-doc documentation generator""" def __init__(self, work_dir, kdoc_binary): self.work_dir = work_dir self.kdoc_binary = kdoc_binary self.kdoc_files = None def find_kdoc_references(self): """Find all files marked with kernel-doc:: directives""" if self.kdoc_files: print("Using cached Kdoc refs") return self.kdoc_files print("Finding kernel-doc entries in Documentation...") files = os.path.join(self.work_dir, 'Documentation/**/*.rst') pattern = re.compile(r"^\.\.\s+kernel-doc::\s*(\S+)") kdoc_files = set() for file_path in iglob(files, recursive=True): try: with open(file_path, 'r', encoding='utf-8') as fp: for line in fp: match = pattern.match(line.strip()) if match: kdoc_files.add(match.group(1)) except OSError: continue self.kdoc_files = list(kdoc_files) return self.kdoc_files def gen_yaml(self, yaml_file, kdoc_files): """Runs kernel-doc to generate a yaml file with man and rst.""" cmd = [self.kdoc_binary, "--man", "--rst", "--yaml", yaml_file] cmd += kdoc_files print(f"YAML regression test file will be stored at: {yaml_file}") try: subprocess.check_call(cmd, cwd=self.work_dir, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) except subprocess.CalledProcessError: return False return True def run_unittest(self, yaml_file): """Run unit tests with the generated yaml file""" cmd = [KDOC_PARSER_TEST, "-q", "--yaml", yaml_file] result = subprocess.run(cmd, cwd=self.work_dir) if result.returncode: print("To check for problems, try to run it again with -v\n") print("Use -k to filter results\n\n\t$", end="") print(" ".join(cmd) + "\n") return True def normal_run(self, tmp_dir, output_dir, kdoc_files): """Generate man, rst and errors, storing them at tmp_dir.""" os.makedirs(tmp_dir, exist_ok=True) try: with open(os.path.join(tmp_dir, "man.log"), "w", encoding="utf-8") as out: subprocess.check_call([self.kdoc_binary, "--man"] + kdoc_files, cwd=self.work_dir, stdout=out, stderr=subprocess.DEVNULL) with open(os.path.join(tmp_dir, "rst.log"), "w", encoding="utf-8") as out: with open(os.path.join(tmp_dir, "err.log"), "w", encoding="utf-8") as err: subprocess.check_call([self.kdoc_binary, "--rst"] + kdoc_files, cwd=self.work_dir, stdout=out, stderr=err) except subprocess.CalledProcessError: return False if output_dir: os.replace(tmp_dir, output_dir) return True def run(self, commit_hash, tmp_dir, output_dir, kdoc_files, is_regression, is_end): """Run kernel-doc on its several ways""" if not kdoc_files: raise RuntimeError("No kernel-doc references found") git_helper = GitHelper(self.work_dir) if not git_helper.checkout_commit(commit_hash, quiet=True): raise RuntimeError(f"ERROR: can't checkout commit {commit_hash}") print(f"Processing {commit_hash}...") if not is_regression: return self.normal_run(tmp_dir, output_dir, kdoc_files) yaml_file = os.path.join(tmp_dir, YAML_NAME) if not is_end: return self.gen_yaml(yaml_file, kdoc_files) return self.run_unittest(yaml_file) class DiffManager: """Compare documentation output directories with an external diff.""" def __init__(self, diff_tool="diff", diff_args=None): self.diff_tool = diff_tool # default: unified, no context, ignore whitespace changes self.diff_args = diff_args or ["-u0", "-w"] def diff_directories(self, dir1, dir2): """Compare two directories using an external diff.""" print(f"\nDiffing {dir1} and {dir2}:") dir1_files = set() dir2_files = set() has_diff = False for root, _, files in os.walk(dir1): for file in files: dir1_files.add(os.path.relpath(os.path.join(root, file), dir1)) for root, _, files in os.walk(dir2): for file in files: dir2_files.add(os.path.relpath(os.path.join(root, file), dir2)) common_files = sorted(dir1_files & dir2_files) for file in common_files: f1 = os.path.join(dir1, file) f2 = os.path.join(dir2, file) cmd = [self.diff_tool] + self.diff_args + [f1, f2] try: result = subprocess.run( cmd, capture_output=True, text=True, check=False ) if result.stdout: has_diff = True print(f"\n{file}") print(result.stdout, end="") except FileNotFoundError: print(f"ERROR: {self.diff_tool} not found") sys.exit(1) # Show files that exist only in one directory only_in_dir1 = dir1_files - dir2_files only_in_dir2 = dir2_files - dir1_files if only_in_dir1 or only_in_dir2: has_diff = True print("\nDifferential files:") for f in sorted(only_in_dir1): print(f" - {f} (only in {dir1})") for f in sorted(only_in_dir2): print(f" + {f} (only in {dir2})") if not has_diff: print("\nNo differences between those two commits") class SignalHandler(): """Signal handler class.""" def restore(self, force_exit=False): """Restore original HEAD state.""" if self.restored: return print(f"Restoring original branch: {self.original_head}") try: subprocess.check_call( ["git", "checkout", "-f", self.original_head], cwd=self.git_helper.work_dir, stderr=subprocess.STDOUT, ) except subprocess.CalledProcessError as e: print(f"Failed to restore: {e}", file=sys.stderr) for sig, handler in self.old_handler.items(): signal.signal(sig, handler) self.restored = True if force_exit: sys.exit(1) def signal_handler(self, sig, _): """Handle interrupt signals.""" print(f"\nSignal {sig} received. Restoring original state...") self.restore(force_exit=True) def __enter__(self): """Allow using it via with command.""" for sig in [signal.SIGINT, signal.SIGTERM]: self.old_handler[sig] = signal.getsignal(sig) signal.signal(sig, self.signal_handler) return self def __exit__(self, *args): """Restore signals at the end of with block.""" self.restore() def __init__(self, git_helper, original_head): self.git_helper = git_helper self.original_head = original_head self.old_handler = {} self.restored = False def parse_commit_range(value): """Handle a commit range.""" if ".." not in value: begin = value end = "HEAD" else: begin, _, end = value.partition("..") if not end: end = "HEAD" if not begin: raise argparse.ArgumentTypeError("Need a commit begginning") print(f"Range: {begin} to {end}") return begin, end def main(): """Main code""" parser = argparse.ArgumentParser(description="Compare kernel documentation between commits") parser.add_argument("commits", type=parse_commit_range, help="commit range like old..new") parser.add_argument("files", nargs="*", help="files to process – if supplied the --full flag is ignored") parser.add_argument("--full", "-f", action="store_true", help="Force a full scan of Documentation/*") parser.add_argument("--regression", "-r", action="store_true", help="Use YAML format to check for regressions") parser.add_argument("--work-dir", "-w", default=WORK_DIR, help="work dir (default: %(default)s)") parser.add_argument("--clean", "-c", action="store_true", help="Clean caches") args = parser.parse_args() if args.files and args.full: raise argparse.ArgumentError(args.full, "cannot combine '--full' with an explicit file list") work_dir = os.path.abspath(args.work_dir) # Initialize cache cache = CacheManager(work_dir) cache.initialize() # Validate git repository git_helper = GitHelper(work_dir) if not git_helper.is_inside_repository(): raise RuntimeError("Must run inside Git repository") old_commit, new_commit = args.commits old_commit = git_helper.get_short_hash(old_commit) new_commit = git_helper.get_short_hash(new_commit) # Validate commits for commit in [old_commit, new_commit]: if not git_helper.is_valid_commit(commit): raise RuntimeError(f"Commit '{commit}' does not exist") # Check for uncommitted changes if git_helper.has_uncommitted_changes(): raise RuntimeError("Uncommitted changes present. Commit or stash first.") runner = KernelDocRunner(git_helper.work_dir, KDOC_BINARY) # Get files to be parsed cache_msg = " (results will be cached)" if args.full: kdoc_files = ["."] diff_type = "full" print(f"Parsing all files at {work_dir}") if not args.files: diff_type = "partial" kdoc_files = runner.find_kdoc_references() print(f"Parsing files with kernel-doc markups at {work_dir}/Documentation") else: diff_type = "no-cache" cache_msg = "" kdoc_files = args.files tmp_dir = DIR_NAME["tmp"] out_path = DIR_NAME[diff_type] if not args.regression: print(f"Output will be stored at: {out_path}{cache_msg}") # Just in case - should never happen in practice if not kdoc_files: raise argparse.ArgumentError(args.files, "No kernel-doc references found") original_head = git_helper.get_current_branch() old_cache = cache.get_commit_cache(old_commit, out_path) new_cache = cache.get_commit_cache(new_commit, out_path) with SignalHandler(git_helper, original_head): if args.clean or diff_type == "no-cache": for cache_dir in [old_cache, new_cache]: if cache_dir and os.path.exists(cache_dir): shutil.rmtree(cache_dir) if args.regression or not os.path.exists(old_cache): old_success = runner.run(old_commit, tmp_dir, old_cache, kdoc_files, args.regression, False) else: old_success = True if args.regression or not os.path.exists(new_cache): new_success = runner.run(new_commit, tmp_dir, new_cache, kdoc_files, args.regression, True) else: new_success = True if not (old_success and new_success): raise RuntimeError("Failed to generate documentation") if not args.regression: diff_manager = DiffManager() diff_manager.diff_directories(old_cache, new_cache) if __name__ == "__main__": main()