From 2cdd27a70887f8206809fae5c2c08c768fd8daba Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 1 Oct 2025 16:49:29 +0200 Subject: tools: docs: parse_data_structs.py: get rid of process_exceptions() Add an extra parameter to parse_file to make it handle exceptions internally, cleaning up the API. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: <8575bbc94ff706aa7e7cc3a188399ca17a3169e6.1759329363.git.mchehab+huawei@kernel.org> --- tools/docs/parse-headers.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'tools/docs/parse-headers.py') diff --git a/tools/docs/parse-headers.py b/tools/docs/parse-headers.py index bfa4e46a53e3..6716c7300258 100755 --- a/tools/docs/parse-headers.py +++ b/tools/docs/parse-headers.py @@ -47,10 +47,7 @@ def main(): args = parser.parse_args() parser = ParseDataStructs(debug=args.debug) - parser.parse_file(args.file_in) - - if args.file_rules: - parser.process_exceptions(args.file_rules) + parser.parse_file(args.file_in, args.file_rules) parser.debug_print() parser.write_output(args.file_in, args.file_out, args.toc) -- cgit v1.2.3 From 778b8ebe5192e7a7f00563a7456517dfa63e1d90 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Mon, 10 Nov 2025 15:04:29 -0700 Subject: docs: Move the python libraries to tools/lib/python "scripts/lib" was always a bit of an awkward place for Python modules. We already have tools/lib; create a tools/lib/python, move the libraries there, and update the users accordingly. While at it, move the contents of tools/docs/lib. Rather than make another directory, just put these documentation-oriented modules under "kdoc". Signed-off-by: Jonathan Corbet Message-ID: <20251110220430.726665-2-corbet@lwn.net> --- .pylintrc | 2 +- Documentation/Makefile | 2 +- Documentation/sphinx/kernel_abi.py | 2 +- Documentation/sphinx/kernel_include.py | 2 +- Documentation/sphinx/kerneldoc.py | 2 +- MAINTAINERS | 3 +- scripts/jobserver-exec | 2 +- scripts/kernel-doc.py | 2 +- scripts/lib/abi/abi_parser.py | 628 ---------- scripts/lib/abi/abi_regex.py | 234 ---- scripts/lib/abi/helpers.py | 38 - scripts/lib/abi/system_symbols.py | 378 ------ scripts/lib/jobserver.py | 149 --- scripts/lib/kdoc/kdoc_files.py | 294 ----- scripts/lib/kdoc/kdoc_item.py | 43 - scripts/lib/kdoc/kdoc_output.py | 824 ------------- scripts/lib/kdoc/kdoc_parser.py | 1667 --------------------------- scripts/lib/kdoc/kdoc_re.py | 270 ----- tools/docs/check-variable-fonts.py | 8 +- tools/docs/get_abi.py | 2 +- tools/docs/lib/__init__.py | 0 tools/docs/lib/enrich_formatter.py | 70 -- tools/docs/lib/latex_fonts.py | 167 --- tools/docs/lib/parse_data_structs.py | 482 -------- tools/docs/lib/python_version.py | 178 --- tools/docs/parse-headers.py | 9 +- tools/docs/sphinx-build-wrapper | 7 +- tools/docs/sphinx-pre-install | 5 +- tools/lib/python/abi/abi_parser.py | 628 ++++++++++ tools/lib/python/abi/abi_regex.py | 234 ++++ tools/lib/python/abi/helpers.py | 38 + tools/lib/python/abi/system_symbols.py | 378 ++++++ tools/lib/python/jobserver.py | 149 +++ tools/lib/python/kdoc/enrich_formatter.py | 70 ++ tools/lib/python/kdoc/kdoc_files.py | 294 +++++ tools/lib/python/kdoc/kdoc_item.py | 43 + tools/lib/python/kdoc/kdoc_output.py | 824 +++++++++++++ tools/lib/python/kdoc/kdoc_parser.py | 1667 +++++++++++++++++++++++++++ tools/lib/python/kdoc/kdoc_re.py | 270 +++++ tools/lib/python/kdoc/latex_fonts.py | 167 +++ tools/lib/python/kdoc/parse_data_structs.py | 482 ++++++++ tools/lib/python/kdoc/python_version.py | 178 +++ 42 files changed, 5451 insertions(+), 5441 deletions(-) delete mode 100644 scripts/lib/abi/abi_parser.py delete mode 100644 scripts/lib/abi/abi_regex.py delete mode 100644 scripts/lib/abi/helpers.py delete mode 100644 scripts/lib/abi/system_symbols.py delete mode 100755 scripts/lib/jobserver.py delete mode 100644 scripts/lib/kdoc/kdoc_files.py delete mode 100644 scripts/lib/kdoc/kdoc_item.py delete mode 100644 scripts/lib/kdoc/kdoc_output.py delete mode 100644 scripts/lib/kdoc/kdoc_parser.py delete mode 100644 scripts/lib/kdoc/kdoc_re.py delete mode 100644 tools/docs/lib/__init__.py delete mode 100644 tools/docs/lib/enrich_formatter.py delete mode 100755 tools/docs/lib/latex_fonts.py delete mode 100755 tools/docs/lib/parse_data_structs.py delete mode 100644 tools/docs/lib/python_version.py create mode 100644 tools/lib/python/abi/abi_parser.py create mode 100644 tools/lib/python/abi/abi_regex.py create mode 100644 tools/lib/python/abi/helpers.py create mode 100644 tools/lib/python/abi/system_symbols.py create mode 100755 tools/lib/python/jobserver.py create mode 100644 tools/lib/python/kdoc/enrich_formatter.py create mode 100644 tools/lib/python/kdoc/kdoc_files.py create mode 100644 tools/lib/python/kdoc/kdoc_item.py create mode 100644 tools/lib/python/kdoc/kdoc_output.py create mode 100644 tools/lib/python/kdoc/kdoc_parser.py create mode 100644 tools/lib/python/kdoc/kdoc_re.py create mode 100755 tools/lib/python/kdoc/latex_fonts.py create mode 100755 tools/lib/python/kdoc/parse_data_structs.py create mode 100644 tools/lib/python/kdoc/python_version.py (limited to 'tools/docs/parse-headers.py') diff --git a/.pylintrc b/.pylintrc index 89eaf2100edd..8c6fc2b628b3 100644 --- a/.pylintrc +++ b/.pylintrc @@ -1,2 +1,2 @@ [MASTER] -init-hook='import sys; sys.path += ["scripts/lib/kdoc", "scripts/lib/abi", "tools/docs/lib"]' +init-hook='import sys; sys.path += ["tools/lib/python"]' diff --git a/Documentation/Makefile b/Documentation/Makefile index c66df29cf0a3..fda2bef8d9d8 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -115,6 +115,6 @@ dochelp: @echo ' make PAPER={a4|letter} Specifies the paper size used for LaTeX/PDF output.' @echo @echo ' make FONTS_CONF_DENY_VF={path} sets a deny list to block variable Noto CJK fonts' - @echo ' for PDF build. See tools/docs/lib/latex_fonts.py for more details' + @echo ' for PDF build. See tools/lib/python/kdoc/latex_fonts.py for more details' @echo @echo ' Default location for the generated documents is Documentation/output' diff --git a/Documentation/sphinx/kernel_abi.py b/Documentation/sphinx/kernel_abi.py index 32e39fb8bc3b..7ec832da8444 100644 --- a/Documentation/sphinx/kernel_abi.py +++ b/Documentation/sphinx/kernel_abi.py @@ -43,7 +43,7 @@ from sphinx.util.docutils import switch_source_input from sphinx.util import logging srctree = os.path.abspath(os.environ["srctree"]) -sys.path.insert(0, os.path.join(srctree, "scripts/lib/abi")) +sys.path.insert(0, os.path.join(srctree, "tools/lib/python/abi")) from abi_parser import AbiParser diff --git a/Documentation/sphinx/kernel_include.py b/Documentation/sphinx/kernel_include.py index 75e139287d50..a12455daa6d7 100755 --- a/Documentation/sphinx/kernel_include.py +++ b/Documentation/sphinx/kernel_include.py @@ -97,7 +97,7 @@ from docutils.parsers.rst.directives.body import CodeBlock, NumberLines from sphinx.util import logging srctree = os.path.abspath(os.environ["srctree"]) -sys.path.insert(0, os.path.join(srctree, "tools/docs/lib")) +sys.path.insert(0, os.path.join(srctree, "tools/lib/python/kdoc")) from parse_data_structs import ParseDataStructs diff --git a/Documentation/sphinx/kerneldoc.py b/Documentation/sphinx/kerneldoc.py index 2586b4d4e494..56f382a6bdf1 100644 --- a/Documentation/sphinx/kerneldoc.py +++ b/Documentation/sphinx/kerneldoc.py @@ -42,7 +42,7 @@ from sphinx.util import logging from pprint import pformat srctree = os.path.abspath(os.environ["srctree"]) -sys.path.insert(0, os.path.join(srctree, "scripts/lib/kdoc")) +sys.path.insert(0, os.path.join(srctree, "tools/lib/python/kdoc")) from kdoc_files import KernelFiles from kdoc_output import RestFormat diff --git a/MAINTAINERS b/MAINTAINERS index 8a9411e5c1e1..efe98e680c14 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7412,8 +7412,7 @@ P: Documentation/doc-guide/maintainer-profile.rst T: git git://git.lwn.net/linux.git docs-next F: Documentation/ F: scripts/kernel-doc* -F: scripts/lib/abi/* -F: scripts/lib/kdoc/* +F: tools/lib/python/* F: tools/docs/ F: tools/net/ynl/pyynl/lib/doc_generator.py X: Documentation/ABI/ diff --git a/scripts/jobserver-exec b/scripts/jobserver-exec index ae23afd344ec..758e947a6fb9 100755 --- a/scripts/jobserver-exec +++ b/scripts/jobserver-exec @@ -13,7 +13,7 @@ See: import os import sys -LIB_DIR = "lib" +LIB_DIR = "../tools/lib/python" SRC_DIR = os.path.dirname(os.path.realpath(__file__)) sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR)) diff --git a/scripts/kernel-doc.py b/scripts/kernel-doc.py index d9fe2bcbd39c..bb24bbf73167 100755 --- a/scripts/kernel-doc.py +++ b/scripts/kernel-doc.py @@ -111,7 +111,7 @@ import sys # Import Python modules -LIB_DIR = "lib/kdoc" +LIB_DIR = "../tools/lib/python/kdoc" SRC_DIR = os.path.dirname(os.path.realpath(__file__)) sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR)) diff --git a/scripts/lib/abi/abi_parser.py b/scripts/lib/abi/abi_parser.py deleted file mode 100644 index 66a738013ce1..000000000000 --- a/scripts/lib/abi/abi_parser.py +++ /dev/null @@ -1,628 +0,0 @@ -#!/usr/bin/env python3 -# pylint: disable=R0902,R0903,R0911,R0912,R0913,R0914,R0915,R0917,C0302 -# Copyright(c) 2025: Mauro Carvalho Chehab . -# SPDX-License-Identifier: GPL-2.0 - -""" -Parse ABI documentation and produce results from it. -""" - -from argparse import Namespace -import logging -import os -import re - -from pprint import pformat -from random import randrange, seed - -# Import Python modules - -from helpers import AbiDebug, ABI_DIR - - -class AbiParser: - """Main class to parse ABI files""" - - TAGS = r"(what|where|date|kernelversion|contact|description|users)" - XREF = r"(?:^|\s|\()(\/(?:sys|config|proc|dev|kvd)\/[^,.:;\)\s]+)(?:[,.:;\)\s]|\Z)" - - def __init__(self, directory, logger=None, - enable_lineno=False, show_warnings=True, debug=0): - """Stores arguments for the class and initialize class vars""" - - self.directory = directory - self.enable_lineno = enable_lineno - self.show_warnings = show_warnings - self.debug = debug - - if not logger: - self.log = logging.getLogger("get_abi") - else: - self.log = logger - - self.data = {} - self.what_symbols = {} - self.file_refs = {} - self.what_refs = {} - - # Ignore files that contain such suffixes - self.ignore_suffixes = (".rej", ".org", ".orig", ".bak", "~") - - # Regular expressions used on parser - self.re_abi_dir = re.compile(r"(.*)" + ABI_DIR) - self.re_tag = re.compile(r"(\S+)(:\s*)(.*)", re.I) - self.re_valid = re.compile(self.TAGS) - self.re_start_spc = re.compile(r"(\s*)(\S.*)") - self.re_whitespace = re.compile(r"^\s+") - - # Regular used on print - self.re_what = re.compile(r"(\/?(?:[\w\-]+\/?){1,2})") - self.re_escape = re.compile(r"([\.\x01-\x08\x0e-\x1f\x21-\x2f\x3a-\x40\x7b-\xff])") - self.re_unprintable = re.compile(r"([\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\xff]+)") - self.re_title_mark = re.compile(r"\n[\-\*\=\^\~]+\n") - self.re_doc = re.compile(r"Documentation/(?!devicetree)(\S+)\.rst") - self.re_abi = re.compile(r"(Documentation/ABI/)([\w\/\-]+)") - self.re_xref_node = re.compile(self.XREF) - - def warn(self, fdata, msg, extra=None): - """Displays a parse error if warning is enabled""" - - if not self.show_warnings: - return - - msg = f"{fdata.fname}:{fdata.ln}: {msg}" - if extra: - msg += "\n\t\t" + extra - - self.log.warning(msg) - - def add_symbol(self, what, fname, ln=None, xref=None): - """Create a reference table describing where each 'what' is located""" - - if what not in self.what_symbols: - self.what_symbols[what] = {"file": {}} - - if fname not in self.what_symbols[what]["file"]: - self.what_symbols[what]["file"][fname] = [] - - if ln and ln not in self.what_symbols[what]["file"][fname]: - self.what_symbols[what]["file"][fname].append(ln) - - if xref: - self.what_symbols[what]["xref"] = xref - - def _parse_line(self, fdata, line): - """Parse a single line of an ABI file""" - - new_what = False - new_tag = False - content = None - - match = self.re_tag.match(line) - if match: - new = match.group(1).lower() - sep = match.group(2) - content = match.group(3) - - match = self.re_valid.search(new) - if match: - new_tag = match.group(1) - else: - if fdata.tag == "description": - # New "tag" is actually part of description. - # Don't consider it a tag - new_tag = False - elif fdata.tag != "": - self.warn(fdata, f"tag '{fdata.tag}' is invalid", line) - - if new_tag: - # "where" is Invalid, but was a common mistake. Warn if found - if new_tag == "where": - self.warn(fdata, "tag 'Where' is invalid. Should be 'What:' instead") - new_tag = "what" - - if new_tag == "what": - fdata.space = None - - if content not in self.what_symbols: - self.add_symbol(what=content, fname=fdata.fname, ln=fdata.ln) - - if fdata.tag == "what": - fdata.what.append(content.strip("\n")) - else: - if fdata.key: - if "description" not in self.data.get(fdata.key, {}): - self.warn(fdata, f"{fdata.key} doesn't have a description") - - for w in fdata.what: - self.add_symbol(what=w, fname=fdata.fname, - ln=fdata.what_ln, xref=fdata.key) - - fdata.label = content - new_what = True - - key = "abi_" + content.lower() - fdata.key = self.re_unprintable.sub("_", key).strip("_") - - # Avoid duplicated keys but using a defined seed, to make - # the namespace identical if there aren't changes at the - # ABI symbols - seed(42) - - while fdata.key in self.data: - char = randrange(0, 51) + ord("A") - if char > ord("Z"): - char += ord("a") - ord("Z") - 1 - - fdata.key += chr(char) - - if fdata.key and fdata.key not in self.data: - self.data[fdata.key] = { - "what": [content], - "file": [fdata.file_ref], - "path": fdata.ftype, - "line_no": fdata.ln, - } - - fdata.what = self.data[fdata.key]["what"] - - self.what_refs[content] = fdata.key - fdata.tag = new_tag - fdata.what_ln = fdata.ln - - if fdata.nametag["what"]: - t = (content, fdata.key) - if t not in fdata.nametag["symbols"]: - fdata.nametag["symbols"].append(t) - - return - - if fdata.tag and new_tag: - fdata.tag = new_tag - - if new_what: - fdata.label = "" - - if "description" in self.data[fdata.key]: - self.data[fdata.key]["description"] += "\n\n" - - if fdata.file_ref not in self.data[fdata.key]["file"]: - self.data[fdata.key]["file"].append(fdata.file_ref) - - if self.debug == AbiDebug.WHAT_PARSING: - self.log.debug("what: %s", fdata.what) - - if not fdata.what: - self.warn(fdata, "'What:' should come first:", line) - return - - if new_tag == "description": - fdata.space = None - - if content: - sep = sep.replace(":", " ") - - c = " " * len(new_tag) + sep + content - c = c.expandtabs() - - match = self.re_start_spc.match(c) - if match: - # Preserve initial spaces for the first line - fdata.space = match.group(1) - content = match.group(2) + "\n" - - self.data[fdata.key][fdata.tag] = content - - return - - # Store any contents before tags at the database - if not fdata.tag and "what" in fdata.nametag: - fdata.nametag["description"] += line - return - - if fdata.tag == "description": - content = line.expandtabs() - - if self.re_whitespace.sub("", content) == "": - self.data[fdata.key][fdata.tag] += "\n" - return - - if fdata.space is None: - match = self.re_start_spc.match(content) - if match: - # Preserve initial spaces for the first line - fdata.space = match.group(1) - - content = match.group(2) + "\n" - else: - if content.startswith(fdata.space): - content = content[len(fdata.space):] - - else: - fdata.space = "" - - if fdata.tag == "what": - w = content.strip("\n") - if w: - self.data[fdata.key][fdata.tag].append(w) - else: - self.data[fdata.key][fdata.tag] += content - return - - content = line.strip() - if fdata.tag: - if fdata.tag == "what": - w = content.strip("\n") - if w: - self.data[fdata.key][fdata.tag].append(w) - else: - self.data[fdata.key][fdata.tag] += "\n" + content.rstrip("\n") - return - - # Everything else is error - if content: - self.warn(fdata, "Unexpected content", line) - - def parse_readme(self, nametag, fname): - """Parse ABI README file""" - - nametag["what"] = ["Introduction"] - nametag["path"] = "README" - with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp: - for line in fp: - match = self.re_tag.match(line) - if match: - new = match.group(1).lower() - - match = self.re_valid.search(new) - if match: - nametag["description"] += "\n:" + line - continue - - nametag["description"] += line - - def parse_file(self, fname, path, basename): - """Parse a single file""" - - ref = f"abi_file_{path}_{basename}" - ref = self.re_unprintable.sub("_", ref).strip("_") - - # Store per-file state into a namespace variable. This will be used - # by the per-line parser state machine and by the warning function. - fdata = Namespace - - fdata.fname = fname - fdata.name = basename - - pos = fname.find(ABI_DIR) - if pos > 0: - f = fname[pos:] - else: - f = fname - - fdata.file_ref = (f, ref) - self.file_refs[f] = ref - - fdata.ln = 0 - fdata.what_ln = 0 - fdata.tag = "" - fdata.label = "" - fdata.what = [] - fdata.key = None - fdata.xrefs = None - fdata.space = None - fdata.ftype = path.split("/")[0] - - fdata.nametag = {} - fdata.nametag["what"] = [f"ABI file {path}/{basename}"] - fdata.nametag["type"] = "File" - fdata.nametag["path"] = fdata.ftype - fdata.nametag["file"] = [fdata.file_ref] - fdata.nametag["line_no"] = 1 - fdata.nametag["description"] = "" - fdata.nametag["symbols"] = [] - - self.data[ref] = fdata.nametag - - if self.debug & AbiDebug.WHAT_OPEN: - self.log.debug("Opening file %s", fname) - - if basename == "README": - self.parse_readme(fdata.nametag, fname) - return - - with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp: - for line in fp: - fdata.ln += 1 - - self._parse_line(fdata, line) - - if "description" in fdata.nametag: - fdata.nametag["description"] = fdata.nametag["description"].lstrip("\n") - - if fdata.key: - if "description" not in self.data.get(fdata.key, {}): - self.warn(fdata, f"{fdata.key} doesn't have a description") - - for w in fdata.what: - self.add_symbol(what=w, fname=fname, xref=fdata.key) - - def _parse_abi(self, root=None): - """Internal function to parse documentation ABI recursively""" - - if not root: - root = self.directory - - with os.scandir(root) as obj: - for entry in obj: - name = os.path.join(root, entry.name) - - if entry.is_dir(): - self._parse_abi(name) - continue - - if not entry.is_file(): - continue - - basename = os.path.basename(name) - - if basename.startswith("."): - continue - - if basename.endswith(self.ignore_suffixes): - continue - - path = self.re_abi_dir.sub("", os.path.dirname(name)) - - self.parse_file(name, path, basename) - - def parse_abi(self, root=None): - """Parse documentation ABI""" - - self._parse_abi(root) - - if self.debug & AbiDebug.DUMP_ABI_STRUCTS: - self.log.debug(pformat(self.data)) - - def desc_txt(self, desc): - """Print description as found inside ABI files""" - - desc = desc.strip(" \t\n") - - return desc + "\n\n" - - def xref(self, fname): - """ - Converts a Documentation/ABI + basename into a ReST cross-reference - """ - - xref = self.file_refs.get(fname) - if not xref: - return None - else: - return xref - - def desc_rst(self, desc): - """Enrich ReST output by creating cross-references""" - - # Remove title markups from the description - # Having titles inside ABI files will only work if extra - # care would be taken in order to strictly follow the same - # level order for each markup. - desc = self.re_title_mark.sub("\n\n", "\n" + desc) - desc = desc.rstrip(" \t\n").lstrip("\n") - - # Python's regex performance for non-compiled expressions is a lot - # than Perl, as Perl automatically caches them at their - # first usage. Here, we'll need to do the same, as otherwise the - # performance penalty is be high - - new_desc = "" - for d in desc.split("\n"): - if d == "": - new_desc += "\n" - continue - - # Use cross-references for doc files where needed - d = self.re_doc.sub(r":doc:`/\1`", d) - - # Use cross-references for ABI generated docs where needed - matches = self.re_abi.findall(d) - for m in matches: - abi = m[0] + m[1] - - xref = self.file_refs.get(abi) - if not xref: - # This may happen if ABI is on a separate directory, - # like parsing ABI testing and symbol is at stable. - # The proper solution is to move this part of the code - # for it to be inside sphinx/kernel_abi.py - self.log.info("Didn't find ABI reference for '%s'", abi) - else: - new = self.re_escape.sub(r"\\\1", m[1]) - d = re.sub(fr"\b{abi}\b", f":ref:`{new} <{xref}>`", d) - - # Seek for cross reference symbols like /sys/... - # Need to be careful to avoid doing it on a code block - if d[0] not in [" ", "\t"]: - matches = self.re_xref_node.findall(d) - for m in matches: - # Finding ABI here is more complex due to wildcards - xref = self.what_refs.get(m) - if xref: - new = self.re_escape.sub(r"\\\1", m) - d = re.sub(fr"\b{m}\b", f":ref:`{new} <{xref}>`", d) - - new_desc += d + "\n" - - return new_desc + "\n\n" - - def doc(self, output_in_txt=False, show_symbols=True, show_file=True, - filter_path=None): - """Print ABI at stdout""" - - part = None - for key, v in sorted(self.data.items(), - key=lambda x: (x[1].get("type", ""), - x[1].get("what"))): - - wtype = v.get("type", "Symbol") - file_ref = v.get("file") - names = v.get("what", [""]) - - if wtype == "File": - if not show_file: - continue - else: - if not show_symbols: - continue - - if filter_path: - if v.get("path") != filter_path: - continue - - msg = "" - - if wtype != "File": - cur_part = names[0] - if cur_part.find("/") >= 0: - match = self.re_what.match(cur_part) - if match: - symbol = match.group(1).rstrip("/") - cur_part = "Symbols under " + symbol - - if cur_part and cur_part != part: - part = cur_part - msg += part + "\n"+ "-" * len(part) +"\n\n" - - msg += f".. _{key}:\n\n" - - max_len = 0 - for i in range(0, len(names)): # pylint: disable=C0200 - names[i] = "**" + self.re_escape.sub(r"\\\1", names[i]) + "**" - - max_len = max(max_len, len(names[i])) - - msg += "+-" + "-" * max_len + "-+\n" - for name in names: - msg += f"| {name}" + " " * (max_len - len(name)) + " |\n" - msg += "+-" + "-" * max_len + "-+\n" - msg += "\n" - - for ref in file_ref: - if wtype == "File": - msg += f".. _{ref[1]}:\n\n" - else: - base = os.path.basename(ref[0]) - msg += f"Defined on file :ref:`{base} <{ref[1]}>`\n\n" - - if wtype == "File": - msg += names[0] +"\n" + "-" * len(names[0]) +"\n\n" - - desc = v.get("description") - if not desc and wtype != "File": - msg += f"DESCRIPTION MISSING for {names[0]}\n\n" - - if desc: - if output_in_txt: - msg += self.desc_txt(desc) - else: - msg += self.desc_rst(desc) - - symbols = v.get("symbols") - if symbols: - msg += "Has the following ABI:\n\n" - - for w, label in symbols: - # Escape special chars from content - content = self.re_escape.sub(r"\\\1", w) - - msg += f"- :ref:`{content} <{label}>`\n\n" - - users = v.get("users") - if users and users.strip(" \t\n"): - users = users.strip("\n").replace('\n', '\n\t') - msg += f"Users:\n\t{users}\n\n" - - ln = v.get("line_no", 1) - - yield (msg, file_ref[0][0], ln) - - def check_issues(self): - """Warn about duplicated ABI entries""" - - for what, v in self.what_symbols.items(): - files = v.get("file") - if not files: - # Should never happen if the parser works properly - self.log.warning("%s doesn't have a file associated", what) - continue - - if len(files) == 1: - continue - - f = [] - for fname, lines in sorted(files.items()): - if not lines: - f.append(f"{fname}") - elif len(lines) == 1: - f.append(f"{fname}:{lines[0]}") - else: - m = fname + "lines " - m += ", ".join(str(x) for x in lines) - f.append(m) - - self.log.warning("%s is defined %d times: %s", what, len(f), "; ".join(f)) - - def search_symbols(self, expr): - """ Searches for ABI symbols """ - - regex = re.compile(expr, re.I) - - found_keys = 0 - for t in sorted(self.data.items(), key=lambda x: [0]): - v = t[1] - - wtype = v.get("type", "") - if wtype == "File": - continue - - for what in v.get("what", [""]): - if regex.search(what): - found_keys += 1 - - kernelversion = v.get("kernelversion", "").strip(" \t\n") - date = v.get("date", "").strip(" \t\n") - contact = v.get("contact", "").strip(" \t\n") - users = v.get("users", "").strip(" \t\n") - desc = v.get("description", "").strip(" \t\n") - - files = [] - for f in v.get("file", ()): - files.append(f[0]) - - what = str(found_keys) + ". " + what - title_tag = "-" * len(what) - - print(f"\n{what}\n{title_tag}\n") - - if kernelversion: - print(f"Kernel version:\t\t{kernelversion}") - - if date: - print(f"Date:\t\t\t{date}") - - if contact: - print(f"Contact:\t\t{contact}") - - if users: - print(f"Users:\t\t\t{users}") - - print("Defined on file(s):\t" + ", ".join(files)) - - if desc: - desc = desc.strip("\n") - print(f"\n{desc}\n") - - if not found_keys: - print(f"Regular expression /{expr}/ not found.") diff --git a/scripts/lib/abi/abi_regex.py b/scripts/lib/abi/abi_regex.py deleted file mode 100644 index 8a57846cbc69..000000000000 --- a/scripts/lib/abi/abi_regex.py +++ /dev/null @@ -1,234 +0,0 @@ -#!/usr/bin/env python3 -# xxpylint: disable=R0903 -# Copyright(c) 2025: Mauro Carvalho Chehab . -# SPDX-License-Identifier: GPL-2.0 - -""" -Convert ABI what into regular expressions -""" - -import re -import sys - -from pprint import pformat - -from abi_parser import AbiParser -from helpers import AbiDebug - -class AbiRegex(AbiParser): - """Extends AbiParser to search ABI nodes with regular expressions""" - - # Escape only ASCII visible characters - escape_symbols = r"([\x21-\x29\x2b-\x2d\x3a-\x40\x5c\x60\x7b-\x7e])" - leave_others = "others" - - # Tuples with regular expressions to be compiled and replacement data - re_whats = [ - # Drop escape characters that might exist - (re.compile("\\\\"), ""), - - # Temporarily escape dot characters - (re.compile(r"\."), "\xf6"), - - # Temporarily change [0-9]+ type of patterns - (re.compile(r"\[0\-9\]\+"), "\xff"), - - # Temporarily change [\d+-\d+] type of patterns - (re.compile(r"\[0\-\d+\]"), "\xff"), - (re.compile(r"\[0:\d+\]"), "\xff"), - (re.compile(r"\[(\d+)\]"), "\xf4\\\\d+\xf5"), - - # Temporarily change [0-9] type of patterns - (re.compile(r"\[(\d)\-(\d)\]"), "\xf4\1-\2\xf5"), - - # Handle multiple option patterns - (re.compile(r"[\{\<\[]([\w_]+)(?:[,|]+([\w_]+)){1,}[\}\>\]]"), r"(\1|\2)"), - - # Handle wildcards - (re.compile(r"([^\/])\*"), "\\1\\\\w\xf7"), - (re.compile(r"/\*/"), "/.*/"), - (re.compile(r"/\xf6\xf6\xf6"), "/.*"), - (re.compile(r"\<[^\>]+\>"), "\\\\w\xf7"), - (re.compile(r"\{[^\}]+\}"), "\\\\w\xf7"), - (re.compile(r"\[[^\]]+\]"), "\\\\w\xf7"), - - (re.compile(r"XX+"), "\\\\w\xf7"), - (re.compile(r"([^A-Z])[XYZ]([^A-Z])"), "\\1\\\\w\xf7\\2"), - (re.compile(r"([^A-Z])[XYZ]$"), "\\1\\\\w\xf7"), - (re.compile(r"_[AB]_"), "_\\\\w\xf7_"), - - # Recover [0-9] type of patterns - (re.compile(r"\xf4"), "["), - (re.compile(r"\xf5"), "]"), - - # Remove duplicated spaces - (re.compile(r"\s+"), r" "), - - # Special case: drop comparison as in: - # What: foo = - # (this happens on a few IIO definitions) - (re.compile(r"\s*\=.*$"), ""), - - # Escape all other symbols - (re.compile(escape_symbols), r"\\\1"), - (re.compile(r"\\\\"), r"\\"), - (re.compile(r"\\([\[\]\(\)\|])"), r"\1"), - (re.compile(r"(\d+)\\(-\d+)"), r"\1\2"), - - (re.compile(r"\xff"), r"\\d+"), - - # Special case: IIO ABI which a parenthesis. - (re.compile(r"sqrt(.*)"), r"sqrt(.*)"), - - # Simplify regexes with multiple .* - (re.compile(r"(?:\.\*){2,}"), ""), - - # Recover dot characters - (re.compile(r"\xf6"), "\\."), - # Recover plus characters - (re.compile(r"\xf7"), "+"), - ] - re_has_num = re.compile(r"\\d") - - # Symbol name after escape_chars that are considered a devnode basename - re_symbol_name = re.compile(r"(\w|\\[\.\-\:])+$") - - # List of popular group names to be skipped to minimize regex group size - # Use AbiDebug.SUBGROUP_SIZE to detect those - skip_names = set(["devices", "hwmon"]) - - def regex_append(self, what, new): - """ - Get a search group for a subset of regular expressions. - - As ABI may have thousands of symbols, using a for to search all - regular expressions is at least O(n^2). When there are wildcards, - the complexity increases substantially, eventually becoming exponential. - - To avoid spending too much time on them, use a logic to split - them into groups. The smaller the group, the better, as it would - mean that searches will be confined to a small number of regular - expressions. - - The conversion to a regex subset is tricky, as we need something - that can be easily obtained from the sysfs symbol and from the - regular expression. So, we need to discard nodes that have - wildcards. - - If it can't obtain a subgroup, place the regular expression inside - a special group (self.leave_others). - """ - - search_group = None - - for search_group in reversed(new.split("/")): - if not search_group or search_group in self.skip_names: - continue - if self.re_symbol_name.match(search_group): - break - - if not search_group: - search_group = self.leave_others - - if self.debug & AbiDebug.SUBGROUP_MAP: - self.log.debug("%s: mapped as %s", what, search_group) - - try: - if search_group not in self.regex_group: - self.regex_group[search_group] = [] - - self.regex_group[search_group].append(re.compile(new)) - if self.search_string: - if what.find(self.search_string) >= 0: - print(f"What: {what}") - except re.PatternError: - self.log.warning("Ignoring '%s' as it produced an invalid regex:\n" - " '%s'", what, new) - - def get_regexes(self, what): - """ - Given an ABI devnode, return a list of all regular expressions that - may match it, based on the sub-groups created by regex_append() - """ - - re_list = [] - - patches = what.split("/") - patches.reverse() - patches.append(self.leave_others) - - for search_group in patches: - if search_group in self.regex_group: - re_list += self.regex_group[search_group] - - return re_list - - def __init__(self, *args, **kwargs): - """ - Override init method to get verbose argument - """ - - self.regex_group = None - self.search_string = None - self.re_string = None - - if "search_string" in kwargs: - self.search_string = kwargs.get("search_string") - del kwargs["search_string"] - - if self.search_string: - - try: - self.re_string = re.compile(self.search_string) - except re.PatternError as e: - msg = f"{self.search_string} is not a valid regular expression" - raise ValueError(msg) from e - - super().__init__(*args, **kwargs) - - def parse_abi(self, *args, **kwargs): - - super().parse_abi(*args, **kwargs) - - self.regex_group = {} - - print("Converting ABI What fields into regexes...", file=sys.stderr) - - for t in sorted(self.data.items(), key=lambda x: x[0]): - v = t[1] - if v.get("type") == "File": - continue - - v["regex"] = [] - - for what in v.get("what", []): - if not what.startswith("/sys"): - continue - - new = what - for r, s in self.re_whats: - try: - new = r.sub(s, new) - except re.PatternError as e: - # Help debugging troubles with new regexes - raise re.PatternError(f"{e}\nwhile re.sub('{r.pattern}', {s}, str)") from e - - v["regex"].append(new) - - if self.debug & AbiDebug.REGEX: - self.log.debug("%-90s <== %s", new, what) - - # Store regex into a subgroup to speedup searches - self.regex_append(what, new) - - if self.debug & AbiDebug.SUBGROUP_DICT: - self.log.debug("%s", pformat(self.regex_group)) - - if self.debug & AbiDebug.SUBGROUP_SIZE: - biggestd_keys = sorted(self.regex_group.keys(), - key= lambda k: len(self.regex_group[k]), - reverse=True) - - print("Top regex subgroups:", file=sys.stderr) - for k in biggestd_keys[:10]: - print(f"{k} has {len(self.regex_group[k])} elements", file=sys.stderr) diff --git a/scripts/lib/abi/helpers.py b/scripts/lib/abi/helpers.py deleted file mode 100644 index 639b23e4ca33..000000000000 --- a/scripts/lib/abi/helpers.py +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env python3 -# Copyright(c) 2025: Mauro Carvalho Chehab . -# pylint: disable=R0903 -# SPDX-License-Identifier: GPL-2.0 - -""" -Helper classes for ABI parser -""" - -ABI_DIR = "Documentation/ABI/" - - -class AbiDebug: - """Debug levels""" - - WHAT_PARSING = 1 - WHAT_OPEN = 2 - DUMP_ABI_STRUCTS = 4 - UNDEFINED = 8 - REGEX = 16 - SUBGROUP_MAP = 32 - SUBGROUP_DICT = 64 - SUBGROUP_SIZE = 128 - GRAPH = 256 - - -DEBUG_HELP = """ -1 - enable debug parsing logic -2 - enable debug messages on file open -4 - enable debug for ABI parse data -8 - enable extra debug information to identify troubles - with ABI symbols found at the local machine that - weren't found on ABI documentation (used only for - undefined subcommand) -16 - enable debug for what to regex conversion -32 - enable debug for symbol regex subgroups -64 - enable debug for sysfs graph tree variable -""" diff --git a/scripts/lib/abi/system_symbols.py b/scripts/lib/abi/system_symbols.py deleted file mode 100644 index f15c94a6e33c..000000000000 --- a/scripts/lib/abi/system_symbols.py +++ /dev/null @@ -1,378 +0,0 @@ -#!/usr/bin/env python3 -# pylint: disable=R0902,R0912,R0914,R0915,R1702 -# Copyright(c) 2025: Mauro Carvalho Chehab . -# SPDX-License-Identifier: GPL-2.0 - -""" -Parse ABI documentation and produce results from it. -""" - -import os -import re -import sys - -from concurrent import futures -from datetime import datetime -from random import shuffle - -from helpers import AbiDebug - -class SystemSymbols: - """Stores arguments for the class and initialize class vars""" - - def graph_add_file(self, path, link=None): - """ - add a file path to the sysfs graph stored at self.root - """ - - if path in self.files: - return - - name = "" - ref = self.root - for edge in path.split("/"): - name += edge + "/" - if edge not in ref: - ref[edge] = {"__name": [name.rstrip("/")]} - - ref = ref[edge] - - if link and link not in ref["__name"]: - ref["__name"].append(link.rstrip("/")) - - self.files.add(path) - - def print_graph(self, root_prefix="", root=None, level=0): - """Prints a reference tree graph using UTF-8 characters""" - - if not root: - root = self.root - level = 0 - - # Prevent endless traverse - if level > 5: - return - - if level > 0: - prefix = "├──" - last_prefix = "└──" - else: - prefix = "" - last_prefix = "" - - items = list(root.items()) - - names = root.get("__name", []) - for k, edge in items: - if k == "__name": - continue - - if not k: - k = "/" - - if len(names) > 1: - k += " links: " + ",".join(names[1:]) - - if edge == items[-1][1]: - print(root_prefix + last_prefix + k) - p = root_prefix - if level > 0: - p += " " - self.print_graph(p, edge, level + 1) - else: - print(root_prefix + prefix + k) - p = root_prefix + "│ " - self.print_graph(p, edge, level + 1) - - def _walk(self, root): - """ - Walk through sysfs to get all devnodes that aren't ignored. - - By default, uses /sys as sysfs mounting point. If another - directory is used, it replaces them to /sys at the patches. - """ - - with os.scandir(root) as obj: - for entry in obj: - path = os.path.join(root, entry.name) - if self.sysfs: - p = path.replace(self.sysfs, "/sys", count=1) - else: - p = path - - if self.re_ignore.search(p): - return - - # Handle link first to avoid directory recursion - if entry.is_symlink(): - real = os.path.realpath(path) - if not self.sysfs: - self.aliases[path] = real - else: - real = real.replace(self.sysfs, "/sys", count=1) - - # Add absfile location to graph if it doesn't exist - if not self.re_ignore.search(real): - # Add link to the graph - self.graph_add_file(real, p) - - elif entry.is_file(): - self.graph_add_file(p) - - elif entry.is_dir(): - self._walk(path) - - def __init__(self, abi, sysfs="/sys", hints=False): - """ - Initialize internal variables and get a list of all files inside - sysfs that can currently be parsed. - - Please notice that there are several entries on sysfs that aren't - documented as ABI. Ignore those. - - The real paths will be stored under self.files. Aliases will be - stored in separate, as self.aliases. - """ - - self.abi = abi - self.log = abi.log - - if sysfs != "/sys": - self.sysfs = sysfs.rstrip("/") - else: - self.sysfs = None - - self.hints = hints - - self.root = {} - self.aliases = {} - self.files = set() - - dont_walk = [ - # Those require root access and aren't documented at ABI - f"^{sysfs}/kernel/debug", - f"^{sysfs}/kernel/tracing", - f"^{sysfs}/fs/pstore", - f"^{sysfs}/fs/bpf", - f"^{sysfs}/fs/fuse", - - # This is not documented at ABI - f"^{sysfs}/module", - - f"^{sysfs}/fs/cgroup", # this is big and has zero docs under ABI - f"^{sysfs}/firmware", # documented elsewhere: ACPI, DT bindings - "sections|notes", # aren't actually part of ABI - - # kernel-parameters.txt - not easy to parse - "parameters", - ] - - self.re_ignore = re.compile("|".join(dont_walk)) - - print(f"Reading {sysfs} directory contents...", file=sys.stderr) - self._walk(sysfs) - - def check_file(self, refs, found): - """Check missing ABI symbols for a given sysfs file""" - - res_list = [] - - try: - for names in refs: - fname = names[0] - - res = { - "found": False, - "fname": fname, - "msg": "", - } - res_list.append(res) - - re_what = self.abi.get_regexes(fname) - if not re_what: - self.abi.log.warning(f"missing rules for {fname}") - continue - - for name in names: - for r in re_what: - if self.abi.debug & AbiDebug.UNDEFINED: - self.log.debug("check if %s matches '%s'", name, r.pattern) - if r.match(name): - res["found"] = True - if found: - res["msg"] += f" {fname}: regex:\n\t" - continue - - if self.hints and not res["found"]: - res["msg"] += f" {fname} not found. Tested regexes:\n" - for r in re_what: - res["msg"] += " " + r.pattern + "\n" - - except KeyboardInterrupt: - pass - - return res_list - - def _ref_interactor(self, root): - """Recursive function to interact over the sysfs tree""" - - for k, v in root.items(): - if isinstance(v, dict): - yield from self._ref_interactor(v) - - if root == self.root or k == "__name": - continue - - if self.abi.re_string: - fname = v["__name"][0] - if self.abi.re_string.search(fname): - yield v - else: - yield v - - - def get_fileref(self, all_refs, chunk_size): - """Interactor to group refs into chunks""" - - n = 0 - refs = [] - - for ref in all_refs: - refs.append(ref) - - n += 1 - if n >= chunk_size: - yield refs - n = 0 - refs = [] - - yield refs - - def check_undefined_symbols(self, max_workers=None, chunk_size=50, - found=None, dry_run=None): - """Seach ABI for sysfs symbols missing documentation""" - - self.abi.parse_abi() - - if self.abi.debug & AbiDebug.GRAPH: - self.print_graph() - - all_refs = [] - for ref in self._ref_interactor(self.root): - all_refs.append(ref["__name"]) - - if dry_run: - print("Would check", file=sys.stderr) - for ref in all_refs: - print(", ".join(ref)) - - return - - print("Starting to search symbols (it may take several minutes):", - file=sys.stderr) - start = datetime.now() - old_elapsed = None - - # Python doesn't support multithreading due to limitations on its - # global lock (GIL). While Python 3.13 finally made GIL optional, - # there are still issues related to it. Also, we want to have - # backward compatibility with older versions of Python. - # - # So, use instead multiprocess. However, Python is very slow passing - # data from/to multiple processes. Also, it may consume lots of memory - # if the data to be shared is not small. So, we need to group workload - # in chunks that are big enough to generate performance gains while - # not being so big that would cause out-of-memory. - - num_refs = len(all_refs) - print(f"Number of references to parse: {num_refs}", file=sys.stderr) - - if not max_workers: - max_workers = os.cpu_count() - elif max_workers > os.cpu_count(): - max_workers = os.cpu_count() - - max_workers = max(max_workers, 1) - - max_chunk_size = int((num_refs + max_workers - 1) / max_workers) - chunk_size = min(chunk_size, max_chunk_size) - chunk_size = max(1, chunk_size) - - if max_workers > 1: - executor = futures.ProcessPoolExecutor - - # Place references in a random order. This may help improving - # performance, by mixing complex/simple expressions when creating - # chunks - shuffle(all_refs) - else: - # Python has a high overhead with processes. When there's just - # one worker, it is faster to not create a new process. - # Yet, User still deserves to have a progress print. So, use - # python's "thread", which is actually a single process, using - # an internal schedule to switch between tasks. No performance - # gains for non-IO tasks, but still it can be quickly interrupted - # from time to time to display progress. - executor = futures.ThreadPoolExecutor - - not_found = [] - f_list = [] - with executor(max_workers=max_workers) as exe: - for refs in self.get_fileref(all_refs, chunk_size): - if refs: - try: - f_list.append(exe.submit(self.check_file, refs, found)) - - except KeyboardInterrupt: - return - - total = len(f_list) - - if not total: - if self.abi.re_string: - print(f"No ABI symbol matches {self.abi.search_string}") - else: - self.abi.log.warning("No ABI symbols found") - return - - print(f"{len(f_list):6d} jobs queued on {max_workers} workers", - file=sys.stderr) - - while f_list: - try: - t = futures.wait(f_list, timeout=1, - return_when=futures.FIRST_COMPLETED) - - done = t[0] - - for fut in done: - res_list = fut.result() - - for res in res_list: - if not res["found"]: - not_found.append(res["fname"]) - if res["msg"]: - print(res["msg"]) - - f_list.remove(fut) - except KeyboardInterrupt: - return - - except RuntimeError as e: - self.abi.log.warning(f"Future: {e}") - break - - if sys.stderr.isatty(): - elapsed = str(datetime.now() - start).split(".", maxsplit=1)[0] - if len(f_list) < total: - elapsed += f" ({total - len(f_list)}/{total} jobs completed). " - if elapsed != old_elapsed: - print(elapsed + "\r", end="", flush=True, - file=sys.stderr) - old_elapsed = elapsed - - elapsed = str(datetime.now() - start).split(".", maxsplit=1)[0] - print(elapsed, file=sys.stderr) - - for f in sorted(not_found): - print(f"{f} not found.") diff --git a/scripts/lib/jobserver.py b/scripts/lib/jobserver.py deleted file mode 100755 index a24f30ef4fa8..000000000000 --- a/scripts/lib/jobserver.py +++ /dev/null @@ -1,149 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0+ -# -# pylint: disable=C0103,C0209 -# -# - -""" -Interacts with the POSIX jobserver during the Kernel build time. - -A "normal" jobserver task, like the one initiated by a make subrocess would do: - - - open read/write file descriptors to communicate with the job server; - - ask for one slot by calling: - claim = os.read(reader, 1) - - when the job finshes, call: - os.write(writer, b"+") # os.write(writer, claim) - -Here, the goal is different: This script aims to get the remaining number -of slots available, using all of them to run a command which handle tasks in -parallel. To to that, it has a loop that ends only after there are no -slots left. It then increments the number by one, in order to allow a -call equivalent to make -j$((claim+1)), e.g. having a parent make creating -$claim child to do the actual work. - -The end goal here is to keep the total number of build tasks under the -limit established by the initial make -j$n_proc call. - -See: - https://www.gnu.org/software/make/manual/html_node/POSIX-Jobserver.html#POSIX-Jobserver -""" - -import errno -import os -import subprocess -import sys - -class JobserverExec: - """ - Claim all slots from make using POSIX Jobserver. - - The main methods here are: - - open(): reserves all slots; - - close(): method returns all used slots back to make; - - run(): executes a command setting PARALLELISM= - """ - - def __init__(self): - """Initialize internal vars""" - self.claim = 0 - self.jobs = b"" - self.reader = None - self.writer = None - self.is_open = False - - def open(self): - """Reserve all available slots to be claimed later on""" - - if self.is_open: - return - - try: - # Fetch the make environment options. - flags = os.environ["MAKEFLAGS"] - # Look for "--jobserver=R,W" - # Note that GNU Make has used --jobserver-fds and --jobserver-auth - # so this handles all of them. - opts = [x for x in flags.split(" ") if x.startswith("--jobserver")] - - # Parse out R,W file descriptor numbers and set them nonblocking. - # If the MAKEFLAGS variable contains multiple instances of the - # --jobserver-auth= option, the last one is relevant. - fds = opts[-1].split("=", 1)[1] - - # Starting with GNU Make 4.4, named pipes are used for reader - # and writer. - # Example argument: --jobserver-auth=fifo:/tmp/GMfifo8134 - _, _, path = fds.partition("fifo:") - - if path: - self.reader = os.open(path, os.O_RDONLY | os.O_NONBLOCK) - self.writer = os.open(path, os.O_WRONLY) - else: - self.reader, self.writer = [int(x) for x in fds.split(",", 1)] - # Open a private copy of reader to avoid setting nonblocking - # on an unexpecting process with the same reader fd. - self.reader = os.open("/proc/self/fd/%d" % (self.reader), - os.O_RDONLY | os.O_NONBLOCK) - - # Read out as many jobserver slots as possible - while True: - try: - slot = os.read(self.reader, 8) - self.jobs += slot - except (OSError, IOError) as e: - if e.errno == errno.EWOULDBLOCK: - # Stop at the end of the jobserver queue. - break - # If something went wrong, give back the jobs. - if self.jobs: - os.write(self.writer, self.jobs) - raise e - - # Add a bump for our caller's reserveration, since we're just going - # to sit here blocked on our child. - self.claim = len(self.jobs) + 1 - - except (KeyError, IndexError, ValueError, OSError, IOError): - # Any missing environment strings or bad fds should result in just - # not being parallel. - self.claim = None - - self.is_open = True - - def close(self): - """Return all reserved slots to Jobserver""" - - if not self.is_open: - return - - # Return all the reserved slots. - if len(self.jobs): - os.write(self.writer, self.jobs) - - self.is_open = False - - def __enter__(self): - self.open() - return self - - def __exit__(self, exc_type, exc_value, exc_traceback): - self.close() - - def run(self, cmd, *args, **pwargs): - """ - Run a command setting PARALLELISM env variable to the number of - available job slots (claim) + 1, e.g. it will reserve claim slots - to do the actual build work, plus one to monitor its children. - """ - self.open() # Ensure that self.claim is set - - # We can only claim parallelism if there was a jobserver (i.e. a - # top-level "-jN" argument) and there were no other failures. Otherwise - # leave out the environment variable and let the child figure out what - # is best. - if self.claim: - os.environ["PARALLELISM"] = str(self.claim) - - return subprocess.call(cmd, *args, **pwargs) diff --git a/scripts/lib/kdoc/kdoc_files.py b/scripts/lib/kdoc/kdoc_files.py deleted file mode 100644 index 1fd8d17edb32..000000000000 --- a/scripts/lib/kdoc/kdoc_files.py +++ /dev/null @@ -1,294 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0 -# Copyright(c) 2025: Mauro Carvalho Chehab . -# -# pylint: disable=R0903,R0913,R0914,R0917 - -""" -Parse lernel-doc tags on multiple kernel source files. -""" - -import argparse -import logging -import os -import re - -from kdoc_parser import KernelDoc -from kdoc_output import OutputFormat - - -class GlobSourceFiles: - """ - Parse C source code file names and directories via an Interactor. - """ - - def __init__(self, srctree=None, valid_extensions=None): - """ - Initialize valid extensions with a tuple. - - If not defined, assume default C extensions (.c and .h) - - It would be possible to use python's glob function, but it is - very slow, and it is not interactive. So, it would wait to read all - directories before actually do something. - - So, let's use our own implementation. - """ - - if not valid_extensions: - self.extensions = (".c", ".h") - else: - self.extensions = valid_extensions - - self.srctree = srctree - - def _parse_dir(self, dirname): - """Internal function to parse files recursively""" - - with os.scandir(dirname) as obj: - for entry in obj: - name = os.path.join(dirname, entry.name) - - if entry.is_dir(follow_symlinks=False): - yield from self._parse_dir(name) - - if not entry.is_file(): - continue - - basename = os.path.basename(name) - - if not basename.endswith(self.extensions): - continue - - yield name - - def parse_files(self, file_list, file_not_found_cb): - """ - Define an interator to parse all source files from file_list, - handling directories if any - """ - - if not file_list: - return - - for fname in file_list: - if self.srctree: - f = os.path.join(self.srctree, fname) - else: - f = fname - - if os.path.isdir(f): - yield from self._parse_dir(f) - elif os.path.isfile(f): - yield f - elif file_not_found_cb: - file_not_found_cb(fname) - - -class KernelFiles(): - """ - Parse kernel-doc tags on multiple kernel source files. - - There are two type of parsers defined here: - - self.parse_file(): parses both kernel-doc markups and - EXPORT_SYMBOL* macros; - - self.process_export_file(): parses only EXPORT_SYMBOL* macros. - """ - - def warning(self, msg): - """Ancillary routine to output a warning and increment error count""" - - self.config.log.warning(msg) - self.errors += 1 - - def error(self, msg): - """Ancillary routine to output an error and increment error count""" - - self.config.log.error(msg) - self.errors += 1 - - def parse_file(self, fname): - """ - Parse a single Kernel source. - """ - - # Prevent parsing the same file twice if results are cached - if fname in self.files: - return - - doc = KernelDoc(self.config, fname) - export_table, entries = doc.parse_kdoc() - - self.export_table[fname] = export_table - - self.files.add(fname) - self.export_files.add(fname) # parse_kdoc() already check exports - - self.results[fname] = entries - - def process_export_file(self, fname): - """ - Parses EXPORT_SYMBOL* macros from a single Kernel source file. - """ - - # Prevent parsing the same file twice if results are cached - if fname in self.export_files: - return - - doc = KernelDoc(self.config, fname) - export_table = doc.parse_export() - - if not export_table: - self.error(f"Error: Cannot check EXPORT_SYMBOL* on {fname}") - export_table = set() - - self.export_table[fname] = export_table - self.export_files.add(fname) - - def file_not_found_cb(self, fname): - """ - Callback to warn if a file was not found. - """ - - self.error(f"Cannot find file {fname}") - - def __init__(self, verbose=False, out_style=None, - werror=False, wreturn=False, wshort_desc=False, - wcontents_before_sections=False, - logger=None): - """ - Initialize startup variables and parse all files - """ - - if not verbose: - verbose = bool(os.environ.get("KBUILD_VERBOSE", 0)) - - if out_style is None: - out_style = OutputFormat() - - if not werror: - kcflags = os.environ.get("KCFLAGS", None) - if kcflags: - match = re.search(r"(\s|^)-Werror(\s|$)/", kcflags) - if match: - werror = True - - # reading this variable is for backwards compat just in case - # someone was calling it with the variable from outside the - # kernel's build system - kdoc_werror = os.environ.get("KDOC_WERROR", None) - if kdoc_werror: - werror = kdoc_werror - - # Some variables are global to the parser logic as a whole as they are - # used to send control configuration to KernelDoc class. As such, - # those variables are read-only inside the KernelDoc. - self.config = argparse.Namespace - - self.config.verbose = verbose - self.config.werror = werror - self.config.wreturn = wreturn - self.config.wshort_desc = wshort_desc - self.config.wcontents_before_sections = wcontents_before_sections - - if not logger: - self.config.log = logging.getLogger("kernel-doc") - else: - self.config.log = logger - - self.config.warning = self.warning - - self.config.src_tree = os.environ.get("SRCTREE", None) - - # Initialize variables that are internal to KernelFiles - - self.out_style = out_style - - self.errors = 0 - self.results = {} - - self.files = set() - self.export_files = set() - self.export_table = {} - - def parse(self, file_list, export_file=None): - """ - Parse all files - """ - - glob = GlobSourceFiles(srctree=self.config.src_tree) - - for fname in glob.parse_files(file_list, self.file_not_found_cb): - self.parse_file(fname) - - for fname in glob.parse_files(export_file, self.file_not_found_cb): - self.process_export_file(fname) - - def out_msg(self, fname, name, arg): - """ - Return output messages from a file name using the output style - filtering. - - If output type was not handled by the syler, return None. - """ - - # NOTE: we can add rules here to filter out unwanted parts, - # although OutputFormat.msg already does that. - - return self.out_style.msg(fname, name, arg) - - def msg(self, enable_lineno=False, export=False, internal=False, - symbol=None, nosymbol=None, no_doc_sections=False, - filenames=None, export_file=None): - """ - Interacts over the kernel-doc results and output messages, - returning kernel-doc markups on each interaction - """ - - self.out_style.set_config(self.config) - - if not filenames: - filenames = sorted(self.results.keys()) - - glob = GlobSourceFiles(srctree=self.config.src_tree) - - for fname in filenames: - function_table = set() - - if internal or export: - if not export_file: - export_file = [fname] - - for f in glob.parse_files(export_file, self.file_not_found_cb): - function_table |= self.export_table[f] - - if symbol: - for s in symbol: - function_table.add(s) - - self.out_style.set_filter(export, internal, symbol, nosymbol, - function_table, enable_lineno, - no_doc_sections) - - msg = "" - if fname not in self.results: - self.config.log.warning("No kernel-doc for file %s", fname) - continue - - symbols = self.results[fname] - self.out_style.set_symbols(symbols) - - for arg in symbols: - m = self.out_msg(fname, arg.name, arg) - - if m is None: - ln = arg.get("ln", 0) - dtype = arg.get('type', "") - - self.config.log.warning("%s:%d Can't handle %s", - fname, ln, dtype) - else: - msg += m - - if msg: - yield fname, msg diff --git a/scripts/lib/kdoc/kdoc_item.py b/scripts/lib/kdoc/kdoc_item.py deleted file mode 100644 index 19805301cb2c..000000000000 --- a/scripts/lib/kdoc/kdoc_item.py +++ /dev/null @@ -1,43 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# -# A class that will, eventually, encapsulate all of the parsed data that we -# then pass into the output modules. -# - -class KdocItem: - def __init__(self, name, fname, type, start_line, **other_stuff): - self.name = name - self.fname = fname - self.type = type - self.declaration_start_line = start_line - self.sections = {} - self.sections_start_lines = {} - self.parameterlist = [] - self.parameterdesc_start_lines = [] - self.parameterdescs = {} - self.parametertypes = {} - # - # Just save everything else into our own dict so that the output - # side can grab it directly as before. As we move things into more - # structured data, this will, hopefully, fade away. - # - self.other_stuff = other_stuff - - def get(self, key, default = None): - return self.other_stuff.get(key, default) - - def __getitem__(self, key): - return self.get(key) - - # - # Tracking of section and parameter information. - # - def set_sections(self, sections, start_lines): - self.sections = sections - self.section_start_lines = start_lines - - def set_params(self, names, descs, types, starts): - self.parameterlist = names - self.parameterdescs = descs - self.parametertypes = types - self.parameterdesc_start_lines = starts diff --git a/scripts/lib/kdoc/kdoc_output.py b/scripts/lib/kdoc/kdoc_output.py deleted file mode 100644 index 58f115059e93..000000000000 --- a/scripts/lib/kdoc/kdoc_output.py +++ /dev/null @@ -1,824 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0 -# Copyright(c) 2025: Mauro Carvalho Chehab . -# -# pylint: disable=C0301,R0902,R0911,R0912,R0913,R0914,R0915,R0917 - -""" -Implement output filters to print kernel-doc documentation. - -The implementation uses a virtual base class (OutputFormat) which -contains a dispatches to virtual methods, and some code to filter -out output messages. - -The actual implementation is done on one separate class per each type -of output. Currently, there are output classes for ReST and man/troff. -""" - -import os -import re -from datetime import datetime - -from kdoc_parser import KernelDoc, type_param -from kdoc_re import KernRe - - -function_pointer = KernRe(r"([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)", cache=False) - -# match expressions used to find embedded type information -type_constant = KernRe(r"\b``([^\`]+)``\b", cache=False) -type_constant2 = KernRe(r"\%([-_*\w]+)", cache=False) -type_func = KernRe(r"(\w+)\(\)", cache=False) -type_param_ref = KernRe(r"([\!~\*]?)\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) - -# Special RST handling for func ptr params -type_fp_param = KernRe(r"\@(\w+)\(\)", cache=False) - -# Special RST handling for structs with func ptr params -type_fp_param2 = KernRe(r"\@(\w+->\S+)\(\)", cache=False) - -type_env = KernRe(r"(\$\w+)", cache=False) -type_enum = KernRe(r"\&(enum\s*([_\w]+))", cache=False) -type_struct = KernRe(r"\&(struct\s*([_\w]+))", cache=False) -type_typedef = KernRe(r"\&(typedef\s*([_\w]+))", cache=False) -type_union = KernRe(r"\&(union\s*([_\w]+))", cache=False) -type_member = KernRe(r"\&([_\w]+)(\.|->)([_\w]+)", cache=False) -type_fallback = KernRe(r"\&([_\w]+)", cache=False) -type_member_func = type_member + KernRe(r"\(\)", cache=False) - - -class OutputFormat: - """ - Base class for OutputFormat. If used as-is, it means that only - warnings will be displayed. - """ - - # output mode. - OUTPUT_ALL = 0 # output all symbols and doc sections - OUTPUT_INCLUDE = 1 # output only specified symbols - OUTPUT_EXPORTED = 2 # output exported symbols - OUTPUT_INTERNAL = 3 # output non-exported symbols - - # Virtual member to be overriden at the inherited classes - highlights = [] - - def __init__(self): - """Declare internal vars and set mode to OUTPUT_ALL""" - - self.out_mode = self.OUTPUT_ALL - self.enable_lineno = None - self.nosymbol = {} - self.symbol = None - self.function_table = None - self.config = None - self.no_doc_sections = False - - self.data = "" - - def set_config(self, config): - """ - Setup global config variables used by both parser and output. - """ - - self.config = config - - def set_filter(self, export, internal, symbol, nosymbol, function_table, - enable_lineno, no_doc_sections): - """ - Initialize filter variables according with the requested mode. - - Only one choice is valid between export, internal and symbol. - - The nosymbol filter can be used on all modes. - """ - - self.enable_lineno = enable_lineno - self.no_doc_sections = no_doc_sections - self.function_table = function_table - - if symbol: - self.out_mode = self.OUTPUT_INCLUDE - elif export: - self.out_mode = self.OUTPUT_EXPORTED - elif internal: - self.out_mode = self.OUTPUT_INTERNAL - else: - self.out_mode = self.OUTPUT_ALL - - if nosymbol: - self.nosymbol = set(nosymbol) - - - def highlight_block(self, block): - """ - Apply the RST highlights to a sub-block of text. - """ - - for r, sub in self.highlights: - block = r.sub(sub, block) - - return block - - def out_warnings(self, args): - """ - Output warnings for identifiers that will be displayed. - """ - - for log_msg in args.warnings: - self.config.warning(log_msg) - - def check_doc(self, name, args): - """Check if DOC should be output""" - - if self.no_doc_sections: - return False - - if name in self.nosymbol: - return False - - if self.out_mode == self.OUTPUT_ALL: - self.out_warnings(args) - return True - - if self.out_mode == self.OUTPUT_INCLUDE: - if name in self.function_table: - self.out_warnings(args) - return True - - return False - - def check_declaration(self, dtype, name, args): - """ - Checks if a declaration should be output or not based on the - filtering criteria. - """ - - if name in self.nosymbol: - return False - - if self.out_mode == self.OUTPUT_ALL: - self.out_warnings(args) - return True - - if self.out_mode in [self.OUTPUT_INCLUDE, self.OUTPUT_EXPORTED]: - if name in self.function_table: - return True - - if self.out_mode == self.OUTPUT_INTERNAL: - if dtype != "function": - self.out_warnings(args) - return True - - if name not in self.function_table: - self.out_warnings(args) - return True - - return False - - def msg(self, fname, name, args): - """ - Handles a single entry from kernel-doc parser - """ - - self.data = "" - - dtype = args.type - - if dtype == "doc": - self.out_doc(fname, name, args) - return self.data - - if not self.check_declaration(dtype, name, args): - return self.data - - if dtype == "function": - self.out_function(fname, name, args) - return self.data - - if dtype == "enum": - self.out_enum(fname, name, args) - return self.data - - if dtype == "typedef": - self.out_typedef(fname, name, args) - return self.data - - if dtype in ["struct", "union"]: - self.out_struct(fname, name, args) - return self.data - - # Warn if some type requires an output logic - self.config.log.warning("doesn't now how to output '%s' block", - dtype) - - return None - - # Virtual methods to be overridden by inherited classes - # At the base class, those do nothing. - def set_symbols(self, symbols): - """Get a list of all symbols from kernel_doc""" - - def out_doc(self, fname, name, args): - """Outputs a DOC block""" - - def out_function(self, fname, name, args): - """Outputs a function""" - - def out_enum(self, fname, name, args): - """Outputs an enum""" - - def out_typedef(self, fname, name, args): - """Outputs a typedef""" - - def out_struct(self, fname, name, args): - """Outputs a struct""" - - -class RestFormat(OutputFormat): - """Consts and functions used by ReST output""" - - highlights = [ - (type_constant, r"``\1``"), - (type_constant2, r"``\1``"), - - # Note: need to escape () to avoid func matching later - (type_member_func, r":c:type:`\1\2\3\\(\\) <\1>`"), - (type_member, r":c:type:`\1\2\3 <\1>`"), - (type_fp_param, r"**\1\\(\\)**"), - (type_fp_param2, r"**\1\\(\\)**"), - (type_func, r"\1()"), - (type_enum, r":c:type:`\1 <\2>`"), - (type_struct, r":c:type:`\1 <\2>`"), - (type_typedef, r":c:type:`\1 <\2>`"), - (type_union, r":c:type:`\1 <\2>`"), - - # in rst this can refer to any type - (type_fallback, r":c:type:`\1`"), - (type_param_ref, r"**\1\2**") - ] - blankline = "\n" - - sphinx_literal = KernRe(r'^[^.].*::$', cache=False) - sphinx_cblock = KernRe(r'^\.\.\ +code-block::', cache=False) - - def __init__(self): - """ - Creates class variables. - - Not really mandatory, but it is a good coding style and makes - pylint happy. - """ - - super().__init__() - self.lineprefix = "" - - def print_lineno(self, ln): - """Outputs a line number""" - - if self.enable_lineno and ln is not None: - ln += 1 - self.data += f".. LINENO {ln}\n" - - def output_highlight(self, args): - """ - Outputs a C symbol that may require being converted to ReST using - the self.highlights variable - """ - - input_text = args - output = "" - in_literal = False - litprefix = "" - block = "" - - for line in input_text.strip("\n").split("\n"): - - # If we're in a literal block, see if we should drop out of it. - # Otherwise, pass the line straight through unmunged. - if in_literal: - if line.strip(): # If the line is not blank - # If this is the first non-blank line in a literal block, - # figure out the proper indent. - if not litprefix: - r = KernRe(r'^(\s*)') - if r.match(line): - litprefix = '^' + r.group(1) - else: - litprefix = "" - - output += line + "\n" - elif not KernRe(litprefix).match(line): - in_literal = False - else: - output += line + "\n" - else: - output += line + "\n" - - # Not in a literal block (or just dropped out) - if not in_literal: - block += line + "\n" - if self.sphinx_literal.match(line) or self.sphinx_cblock.match(line): - in_literal = True - litprefix = "" - output += self.highlight_block(block) - block = "" - - # Handle any remaining block - if block: - output += self.highlight_block(block) - - # Print the output with the line prefix - for line in output.strip("\n").split("\n"): - self.data += self.lineprefix + line + "\n" - - def out_section(self, args, out_docblock=False): - """ - Outputs a block section. - - This could use some work; it's used to output the DOC: sections, and - starts by putting out the name of the doc section itself, but that - tends to duplicate a header already in the template file. - """ - for section, text in args.sections.items(): - # Skip sections that are in the nosymbol_table - if section in self.nosymbol: - continue - - if out_docblock: - if not self.out_mode == self.OUTPUT_INCLUDE: - self.data += f".. _{section}:\n\n" - self.data += f'{self.lineprefix}**{section}**\n\n' - else: - self.data += f'{self.lineprefix}**{section}**\n\n' - - self.print_lineno(args.section_start_lines.get(section, 0)) - self.output_highlight(text) - self.data += "\n" - self.data += "\n" - - def out_doc(self, fname, name, args): - if not self.check_doc(name, args): - return - self.out_section(args, out_docblock=True) - - def out_function(self, fname, name, args): - - oldprefix = self.lineprefix - signature = "" - - func_macro = args.get('func_macro', False) - if func_macro: - signature = name - else: - if args.get('functiontype'): - signature = args['functiontype'] + " " - signature += name + " (" - - ln = args.declaration_start_line - count = 0 - for parameter in args.parameterlist: - if count != 0: - signature += ", " - count += 1 - dtype = args.parametertypes.get(parameter, "") - - if function_pointer.search(dtype): - signature += function_pointer.group(1) + parameter + function_pointer.group(3) - else: - signature += dtype - - if not func_macro: - signature += ")" - - self.print_lineno(ln) - if args.get('typedef') or not args.get('functiontype'): - self.data += f".. c:macro:: {name}\n\n" - - if args.get('typedef'): - self.data += " **Typedef**: " - self.lineprefix = "" - self.output_highlight(args.get('purpose', "")) - self.data += "\n\n**Syntax**\n\n" - self.data += f" ``{signature}``\n\n" - else: - self.data += f"``{signature}``\n\n" - else: - self.data += f".. c:function:: {signature}\n\n" - - if not args.get('typedef'): - self.print_lineno(ln) - self.lineprefix = " " - self.output_highlight(args.get('purpose', "")) - self.data += "\n" - - # Put descriptive text into a container (HTML
) to help set - # function prototypes apart - self.lineprefix = " " - - if args.parameterlist: - self.data += ".. container:: kernelindent\n\n" - self.data += f"{self.lineprefix}**Parameters**\n\n" - - for parameter in args.parameterlist: - parameter_name = KernRe(r'\[.*').sub('', parameter) - dtype = args.parametertypes.get(parameter, "") - - if dtype: - self.data += f"{self.lineprefix}``{dtype}``\n" - else: - self.data += f"{self.lineprefix}``{parameter}``\n" - - self.print_lineno(args.parameterdesc_start_lines.get(parameter_name, 0)) - - self.lineprefix = " " - if parameter_name in args.parameterdescs and \ - args.parameterdescs[parameter_name] != KernelDoc.undescribed: - - self.output_highlight(args.parameterdescs[parameter_name]) - self.data += "\n" - else: - self.data += f"{self.lineprefix}*undescribed*\n\n" - self.lineprefix = " " - - self.out_section(args) - self.lineprefix = oldprefix - - def out_enum(self, fname, name, args): - - oldprefix = self.lineprefix - ln = args.declaration_start_line - - self.data += f"\n\n.. c:enum:: {name}\n\n" - - self.print_lineno(ln) - self.lineprefix = " " - self.output_highlight(args.get('purpose', '')) - self.data += "\n" - - self.data += ".. container:: kernelindent\n\n" - outer = self.lineprefix + " " - self.lineprefix = outer + " " - self.data += f"{outer}**Constants**\n\n" - - for parameter in args.parameterlist: - self.data += f"{outer}``{parameter}``\n" - - if args.parameterdescs.get(parameter, '') != KernelDoc.undescribed: - self.output_highlight(args.parameterdescs[parameter]) - else: - self.data += f"{self.lineprefix}*undescribed*\n\n" - self.data += "\n" - - self.lineprefix = oldprefix - self.out_section(args) - - def out_typedef(self, fname, name, args): - - oldprefix = self.lineprefix - ln = args.declaration_start_line - - self.data += f"\n\n.. c:type:: {name}\n\n" - - self.print_lineno(ln) - self.lineprefix = " " - - self.output_highlight(args.get('purpose', '')) - - self.data += "\n" - - self.lineprefix = oldprefix - self.out_section(args) - - def out_struct(self, fname, name, args): - - purpose = args.get('purpose', "") - declaration = args.get('definition', "") - dtype = args.type - ln = args.declaration_start_line - - self.data += f"\n\n.. c:{dtype}:: {name}\n\n" - - self.print_lineno(ln) - - oldprefix = self.lineprefix - self.lineprefix += " " - - self.output_highlight(purpose) - self.data += "\n" - - self.data += ".. container:: kernelindent\n\n" - self.data += f"{self.lineprefix}**Definition**::\n\n" - - self.lineprefix = self.lineprefix + " " - - declaration = declaration.replace("\t", self.lineprefix) - - self.data += f"{self.lineprefix}{dtype} {name}" + ' {' + "\n" - self.data += f"{declaration}{self.lineprefix}" + "};\n\n" - - self.lineprefix = " " - self.data += f"{self.lineprefix}**Members**\n\n" - for parameter in args.parameterlist: - if not parameter or parameter.startswith("#"): - continue - - parameter_name = parameter.split("[", maxsplit=1)[0] - - if args.parameterdescs.get(parameter_name) == KernelDoc.undescribed: - continue - - self.print_lineno(args.parameterdesc_start_lines.get(parameter_name, 0)) - - self.data += f"{self.lineprefix}``{parameter}``\n" - - self.lineprefix = " " - self.output_highlight(args.parameterdescs[parameter_name]) - self.lineprefix = " " - - self.data += "\n" - - self.data += "\n" - - self.lineprefix = oldprefix - self.out_section(args) - - -class ManFormat(OutputFormat): - """Consts and functions used by man pages output""" - - highlights = ( - (type_constant, r"\1"), - (type_constant2, r"\1"), - (type_func, r"\\fB\1\\fP"), - (type_enum, r"\\fI\1\\fP"), - (type_struct, r"\\fI\1\\fP"), - (type_typedef, r"\\fI\1\\fP"), - (type_union, r"\\fI\1\\fP"), - (type_param, r"\\fI\1\\fP"), - (type_param_ref, r"\\fI\1\2\\fP"), - (type_member, r"\\fI\1\2\3\\fP"), - (type_fallback, r"\\fI\1\\fP") - ) - blankline = "" - - date_formats = [ - "%a %b %d %H:%M:%S %Z %Y", - "%a %b %d %H:%M:%S %Y", - "%Y-%m-%d", - "%b %d %Y", - "%B %d %Y", - "%m %d %Y", - ] - - def __init__(self, modulename): - """ - Creates class variables. - - Not really mandatory, but it is a good coding style and makes - pylint happy. - """ - - super().__init__() - self.modulename = modulename - self.symbols = [] - - dt = None - tstamp = os.environ.get("KBUILD_BUILD_TIMESTAMP") - if tstamp: - for fmt in self.date_formats: - try: - dt = datetime.strptime(tstamp, fmt) - break - except ValueError: - pass - - if not dt: - dt = datetime.now() - - self.man_date = dt.strftime("%B %Y") - - def arg_name(self, args, name): - """ - Return the name that will be used for the man page. - - As we may have the same name on different namespaces, - prepend the data type for all types except functions and typedefs. - - The doc section is special: it uses the modulename. - """ - - dtype = args.type - - if dtype == "doc": - return self.modulename - - if dtype in ["function", "typedef"]: - return name - - return f"{dtype} {name}" - - def set_symbols(self, symbols): - """ - Get a list of all symbols from kernel_doc. - - Man pages will uses it to add a SEE ALSO section with other - symbols at the same file. - """ - self.symbols = symbols - - def out_tail(self, fname, name, args): - """Adds a tail for all man pages""" - - # SEE ALSO section - self.data += f'.SH "SEE ALSO"' + "\n.PP\n" - self.data += (f"Kernel file \\fB{args.fname}\\fR\n") - if len(self.symbols) >= 2: - cur_name = self.arg_name(args, name) - - related = [] - for arg in self.symbols: - out_name = self.arg_name(arg, arg.name) - - if cur_name == out_name: - continue - - related.append(f"\\fB{out_name}\\fR(9)") - - self.data += ",\n".join(related) + "\n" - - # TODO: does it make sense to add other sections? Maybe - # REPORTING ISSUES? LICENSE? - - def msg(self, fname, name, args): - """ - Handles a single entry from kernel-doc parser. - - Add a tail at the end of man pages output. - """ - super().msg(fname, name, args) - self.out_tail(fname, name, args) - - return self.data - - def output_highlight(self, block): - """ - Outputs a C symbol that may require being highlighted with - self.highlights variable using troff syntax - """ - - contents = self.highlight_block(block) - - if isinstance(contents, list): - contents = "\n".join(contents) - - for line in contents.strip("\n").split("\n"): - line = KernRe(r"^\s*").sub("", line) - if not line: - continue - - if line[0] == ".": - self.data += "\\&" + line + "\n" - else: - self.data += line + "\n" - - def out_doc(self, fname, name, args): - if not self.check_doc(name, args): - return - - out_name = self.arg_name(args, name) - - self.data += f'.TH "{self.modulename}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" - - for section, text in args.sections.items(): - self.data += f'.SH "{section}"' + "\n" - self.output_highlight(text) - - def out_function(self, fname, name, args): - """output function in man""" - - out_name = self.arg_name(args, name) - - self.data += f'.TH "{name}" 9 "{out_name}" "{self.man_date}" "Kernel Hacker\'s Manual" LINUX' + "\n" - - self.data += ".SH NAME\n" - self.data += f"{name} \\- {args['purpose']}\n" - - self.data += ".SH SYNOPSIS\n" - if args.get('functiontype', ''): - self.data += f'.B "{args["functiontype"]}" {name}' + "\n" - else: - self.data += f'.B "{name}' + "\n" - - count = 0 - parenth = "(" - post = "," - - for parameter in args.parameterlist: - if count == len(args.parameterlist) - 1: - post = ");" - - dtype = args.parametertypes.get(parameter, "") - if function_pointer.match(dtype): - # Pointer-to-function - self.data += f'".BI "{parenth}{function_pointer.group(1)}" " ") ({function_pointer.group(2)}){post}"' + "\n" - else: - dtype = KernRe(r'([^\*])$').sub(r'\1 ', dtype) - - self.data += f'.BI "{parenth}{dtype}" "{post}"' + "\n" - count += 1 - parenth = "" - - if args.parameterlist: - self.data += ".SH ARGUMENTS\n" - - for parameter in args.parameterlist: - parameter_name = re.sub(r'\[.*', '', parameter) - - self.data += f'.IP "{parameter}" 12' + "\n" - self.output_highlight(args.parameterdescs.get(parameter_name, "")) - - for section, text in args.sections.items(): - self.data += f'.SH "{section.upper()}"' + "\n" - self.output_highlight(text) - - def out_enum(self, fname, name, args): - out_name = self.arg_name(args, name) - - self.data += f'.TH "{self.modulename}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" - - self.data += ".SH NAME\n" - self.data += f"enum {name} \\- {args['purpose']}\n" - - self.data += ".SH SYNOPSIS\n" - self.data += f"enum {name}" + " {\n" - - count = 0 - for parameter in args.parameterlist: - self.data += f'.br\n.BI " {parameter}"' + "\n" - if count == len(args.parameterlist) - 1: - self.data += "\n};\n" - else: - self.data += ", \n.br\n" - - count += 1 - - self.data += ".SH Constants\n" - - for parameter in args.parameterlist: - parameter_name = KernRe(r'\[.*').sub('', parameter) - self.data += f'.IP "{parameter}" 12' + "\n" - self.output_highlight(args.parameterdescs.get(parameter_name, "")) - - for section, text in args.sections.items(): - self.data += f'.SH "{section}"' + "\n" - self.output_highlight(text) - - def out_typedef(self, fname, name, args): - module = self.modulename - purpose = args.get('purpose') - out_name = self.arg_name(args, name) - - self.data += f'.TH "{module}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" - - self.data += ".SH NAME\n" - self.data += f"typedef {name} \\- {purpose}\n" - - for section, text in args.sections.items(): - self.data += f'.SH "{section}"' + "\n" - self.output_highlight(text) - - def out_struct(self, fname, name, args): - module = self.modulename - purpose = args.get('purpose') - definition = args.get('definition') - out_name = self.arg_name(args, name) - - self.data += f'.TH "{module}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" - - self.data += ".SH NAME\n" - self.data += f"{args.type} {name} \\- {purpose}\n" - - # Replace tabs with two spaces and handle newlines - declaration = definition.replace("\t", " ") - declaration = KernRe(r"\n").sub('"\n.br\n.BI "', declaration) - - self.data += ".SH SYNOPSIS\n" - self.data += f"{args.type} {name} " + "{" + "\n.br\n" - self.data += f'.BI "{declaration}\n' + "};\n.br\n\n" - - self.data += ".SH Members\n" - for parameter in args.parameterlist: - if parameter.startswith("#"): - continue - - parameter_name = re.sub(r"\[.*", "", parameter) - - if args.parameterdescs.get(parameter_name) == KernelDoc.undescribed: - continue - - self.data += f'.IP "{parameter}" 12' + "\n" - self.output_highlight(args.parameterdescs.get(parameter_name)) - - for section, text in args.sections.items(): - self.data += f'.SH "{section}"' + "\n" - self.output_highlight(text) diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py deleted file mode 100644 index f7dbb0868367..000000000000 --- a/scripts/lib/kdoc/kdoc_parser.py +++ /dev/null @@ -1,1667 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0 -# Copyright(c) 2025: Mauro Carvalho Chehab . -# -# pylint: disable=C0301,C0302,R0904,R0912,R0913,R0914,R0915,R0917,R1702 - -""" -kdoc_parser -=========== - -Read a C language source or header FILE and extract embedded -documentation comments -""" - -import sys -import re -from pprint import pformat - -from kdoc_re import NestedMatch, KernRe -from kdoc_item import KdocItem - -# -# Regular expressions used to parse kernel-doc markups at KernelDoc class. -# -# Let's declare them in lowercase outside any class to make easier to -# convert from the python script. -# -# As those are evaluated at the beginning, no need to cache them -# - -# Allow whitespace at end of comment start. -doc_start = KernRe(r'^/\*\*\s*$', cache=False) - -doc_end = KernRe(r'\*/', cache=False) -doc_com = KernRe(r'\s*\*\s*', cache=False) -doc_com_body = KernRe(r'\s*\* ?', cache=False) -doc_decl = doc_com + KernRe(r'(\w+)', cache=False) - -# @params and a strictly limited set of supported section names -# Specifically: -# Match @word: -# @...: -# @{section-name}: -# while trying to not match literal block starts like "example::" -# -known_section_names = 'description|context|returns?|notes?|examples?' -known_sections = KernRe(known_section_names, flags = re.I) -doc_sect = doc_com + \ - KernRe(r'\s*(@[.\w]+|@\.\.\.|' + known_section_names + r')\s*:([^:].*)?$', - flags=re.I, cache=False) - -doc_content = doc_com_body + KernRe(r'(.*)', cache=False) -doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False) -doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False) -doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False) -doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$', cache=False) - -export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False) -export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False) - -type_param = KernRe(r"@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) - -# -# Tests for the beginning of a kerneldoc block in its various forms. -# -doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False) -doc_begin_data = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef)\b\s*(\w*)", cache = False) -doc_begin_func = KernRe(str(doc_com) + # initial " * ' - r"(?:\w+\s*\*\s*)?" + # type (not captured) - r'(?:define\s+)?' + # possible "define" (not captured) - r'(\w+)\s*(?:\(\w*\))?\s*' + # name and optional "(...)" - r'(?:[-:].*)?$', # description (not captured) - cache = False) - -# -# Here begins a long set of transformations to turn structure member prefixes -# and macro invocations into something we can parse and generate kdoc for. -# -struct_args_pattern = r'([^,)]+)' - -struct_xforms = [ - # Strip attributes - (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False), ' '), - (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__packed\s*', re.S), ' '), - (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), - (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), - (KernRe(r'\s*____cacheline_aligned', re.S), ' '), - (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''), - # - # Unwrap struct_group macros based on this definition: - # __struct_group(TAG, NAME, ATTRS, MEMBERS...) - # which has variants like: struct_group(NAME, MEMBERS...) - # Only MEMBERS arguments require documentation. - # - # Parsing them happens on two steps: - # - # 1. drop struct group arguments that aren't at MEMBERS, - # storing them as STRUCT_GROUP(MEMBERS) - # - # 2. remove STRUCT_GROUP() ancillary macro. - # - # The original logic used to remove STRUCT_GROUP() using an - # advanced regex: - # - # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; - # - # with two patterns that are incompatible with - # Python re module, as it has: - # - # - a recursive pattern: (?1) - # - an atomic grouping: (?>...) - # - # I tried a simpler version: but it didn't work either: - # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; - # - # As it doesn't properly match the end parenthesis on some cases. - # - # So, a better solution was crafted: there's now a NestedMatch - # class that ensures that delimiters after a search are properly - # matched. So, the implementation to drop STRUCT_GROUP() will be - # handled in separate. - # - (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), - (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), - (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), - (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), - # - # Replace macros - # - # TODO: use NestedMatch for FOO($1, $2, ...) matches - # - # it is better to also move those to the NestedMatch logic, - # to ensure that parenthesis will be properly matched. - # - (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), - r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), - (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), - r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), - (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', - re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), - (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', - re.S), r'unsigned long \1[1 << ((\2) - 1)]'), - (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + - r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'), - (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' + - struct_args_pattern + r'\)', re.S), r'\2 *\1'), - (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' + - struct_args_pattern + r'\)', re.S), r'\1 \2[]'), - (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'), - (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'), -] -# -# Regexes here are guaranteed to have the end limiter matching -# the start delimiter. Yet, right now, only one replace group -# is allowed. -# -struct_nested_prefixes = [ - (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), -] - -# -# Transforms for function prototypes -# -function_xforms = [ - (KernRe(r"^static +"), ""), - (KernRe(r"^extern +"), ""), - (KernRe(r"^asmlinkage +"), ""), - (KernRe(r"^inline +"), ""), - (KernRe(r"^__inline__ +"), ""), - (KernRe(r"^__inline +"), ""), - (KernRe(r"^__always_inline +"), ""), - (KernRe(r"^noinline +"), ""), - (KernRe(r"^__FORTIFY_INLINE +"), ""), - (KernRe(r"__init +"), ""), - (KernRe(r"__init_or_module +"), ""), - (KernRe(r"__deprecated +"), ""), - (KernRe(r"__flatten +"), ""), - (KernRe(r"__meminit +"), ""), - (KernRe(r"__must_check +"), ""), - (KernRe(r"__weak +"), ""), - (KernRe(r"__sched +"), ""), - (KernRe(r"_noprof"), ""), - (KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""), - (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""), - (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""), - (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"), - (KernRe(r"__attribute_const__ +"), ""), - (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""), -] - -# -# Apply a set of transforms to a block of text. -# -def apply_transforms(xforms, text): - for search, subst in xforms: - text = search.sub(subst, text) - return text - -# -# A little helper to get rid of excess white space -# -multi_space = KernRe(r'\s\s+') -def trim_whitespace(s): - return multi_space.sub(' ', s.strip()) - -# -# Remove struct/enum members that have been marked "private". -# -def trim_private_members(text): - # - # First look for a "public:" block that ends a private region, then - # handle the "private until the end" case. - # - text = KernRe(r'/\*\s*private:.*?/\*\s*public:.*?\*/', flags=re.S).sub('', text) - text = KernRe(r'/\*\s*private:.*', flags=re.S).sub('', text) - # - # We needed the comments to do the above, but now we can take them out. - # - return KernRe(r'\s*/\*.*?\*/\s*', flags=re.S).sub('', text).strip() - -class state: - """ - State machine enums - """ - - # Parser states - NORMAL = 0 # normal code - NAME = 1 # looking for function name - DECLARATION = 2 # We have seen a declaration which might not be done - BODY = 3 # the body of the comment - SPECIAL_SECTION = 4 # doc section ending with a blank line - PROTO = 5 # scanning prototype - DOCBLOCK = 6 # documentation block - INLINE_NAME = 7 # gathering doc outside main block - INLINE_TEXT = 8 # reading the body of inline docs - - name = [ - "NORMAL", - "NAME", - "DECLARATION", - "BODY", - "SPECIAL_SECTION", - "PROTO", - "DOCBLOCK", - "INLINE_NAME", - "INLINE_TEXT", - ] - - -SECTION_DEFAULT = "Description" # default section - -class KernelEntry: - - def __init__(self, config, fname, ln): - self.config = config - self.fname = fname - - self._contents = [] - self.prototype = "" - - self.warnings = [] - - self.parameterlist = [] - self.parameterdescs = {} - self.parametertypes = {} - self.parameterdesc_start_lines = {} - - self.section_start_lines = {} - self.sections = {} - - self.anon_struct_union = False - - self.leading_space = None - - self.fname = fname - - # State flags - self.brcount = 0 - self.declaration_start_line = ln + 1 - - # - # Management of section contents - # - def add_text(self, text): - self._contents.append(text) - - def contents(self): - return '\n'.join(self._contents) + '\n' - - # TODO: rename to emit_message after removal of kernel-doc.pl - def emit_msg(self, ln, msg, *, warning=True): - """Emit a message""" - - log_msg = f"{self.fname}:{ln} {msg}" - - if not warning: - self.config.log.info(log_msg) - return - - # Delegate warning output to output logic, as this way it - # will report warnings/info only for symbols that are output - - self.warnings.append(log_msg) - return - - # - # Begin a new section. - # - def begin_section(self, line_no, title = SECTION_DEFAULT, dump = False): - if dump: - self.dump_section(start_new = True) - self.section = title - self.new_start_line = line_no - - def dump_section(self, start_new=True): - """ - Dumps section contents to arrays/hashes intended for that purpose. - """ - # - # If we have accumulated no contents in the default ("description") - # section, don't bother. - # - if self.section == SECTION_DEFAULT and not self._contents: - return - name = self.section - contents = self.contents() - - if type_param.match(name): - name = type_param.group(1) - - self.parameterdescs[name] = contents - self.parameterdesc_start_lines[name] = self.new_start_line - - self.new_start_line = 0 - - else: - if name in self.sections and self.sections[name] != "": - # Only warn on user-specified duplicate section names - if name != SECTION_DEFAULT: - self.emit_msg(self.new_start_line, - f"duplicate section name '{name}'") - # Treat as a new paragraph - add a blank line - self.sections[name] += '\n' + contents - else: - self.sections[name] = contents - self.section_start_lines[name] = self.new_start_line - self.new_start_line = 0 - -# self.config.log.debug("Section: %s : %s", name, pformat(vars(self))) - - if start_new: - self.section = SECTION_DEFAULT - self._contents = [] - -python_warning = False - -class KernelDoc: - """ - Read a C language source or header FILE and extract embedded - documentation comments. - """ - - # Section names - - section_context = "Context" - section_return = "Return" - - undescribed = "-- undescribed --" - - def __init__(self, config, fname): - """Initialize internal variables""" - - self.fname = fname - self.config = config - - # Initial state for the state machines - self.state = state.NORMAL - - # Store entry currently being processed - self.entry = None - - # Place all potential outputs into an array - self.entries = [] - - # - # We need Python 3.7 for its "dicts remember the insertion - # order" guarantee - # - global python_warning - if (not python_warning and - sys.version_info.major == 3 and sys.version_info.minor < 7): - - self.emit_msg(0, - 'Python 3.7 or later is required for correct results') - python_warning = True - - def emit_msg(self, ln, msg, *, warning=True): - """Emit a message""" - - if self.entry: - self.entry.emit_msg(ln, msg, warning=warning) - return - - log_msg = f"{self.fname}:{ln} {msg}" - - if warning: - self.config.log.warning(log_msg) - else: - self.config.log.info(log_msg) - - def dump_section(self, start_new=True): - """ - Dumps section contents to arrays/hashes intended for that purpose. - """ - - if self.entry: - self.entry.dump_section(start_new) - - # TODO: rename it to store_declaration after removal of kernel-doc.pl - def output_declaration(self, dtype, name, **args): - """ - Stores the entry into an entry array. - - The actual output and output filters will be handled elsewhere - """ - - item = KdocItem(name, self.fname, dtype, - self.entry.declaration_start_line, **args) - item.warnings = self.entry.warnings - - # Drop empty sections - # TODO: improve empty sections logic to emit warnings - sections = self.entry.sections - for section in ["Description", "Return"]: - if section in sections and not sections[section].rstrip(): - del sections[section] - item.set_sections(sections, self.entry.section_start_lines) - item.set_params(self.entry.parameterlist, self.entry.parameterdescs, - self.entry.parametertypes, - self.entry.parameterdesc_start_lines) - self.entries.append(item) - - self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args)) - - def reset_state(self, ln): - """ - Ancillary routine to create a new entry. It initializes all - variables used by the state machine. - """ - - # - # Flush the warnings out before we proceed further - # - if self.entry and self.entry not in self.entries: - for log_msg in self.entry.warnings: - self.config.log.warning(log_msg) - - self.entry = KernelEntry(self.config, self.fname, ln) - - # State flags - self.state = state.NORMAL - - def push_parameter(self, ln, decl_type, param, dtype, - org_arg, declaration_name): - """ - Store parameters and their descriptions at self.entry. - """ - - if self.entry.anon_struct_union and dtype == "" and param == "}": - return # Ignore the ending }; from anonymous struct/union - - self.entry.anon_struct_union = False - - param = KernRe(r'[\[\)].*').sub('', param, count=1) - - # - # Look at various "anonymous type" cases. - # - if dtype == '': - if param.endswith("..."): - if len(param) > 3: # there is a name provided, use that - param = param[:-3] - if not self.entry.parameterdescs.get(param): - self.entry.parameterdescs[param] = "variable arguments" - - elif (not param) or param == "void": - param = "void" - self.entry.parameterdescs[param] = "no arguments" - - elif param in ["struct", "union"]: - # Handle unnamed (anonymous) union or struct - dtype = param - param = "{unnamed_" + param + "}" - self.entry.parameterdescs[param] = "anonymous\n" - self.entry.anon_struct_union = True - - # Warn if parameter has no description - # (but ignore ones starting with # as these are not parameters - # but inline preprocessor statements) - if param not in self.entry.parameterdescs and not param.startswith("#"): - self.entry.parameterdescs[param] = self.undescribed - - if "." not in param: - if decl_type == 'function': - dname = f"{decl_type} parameter" - else: - dname = f"{decl_type} member" - - self.emit_msg(ln, - f"{dname} '{param}' not described in '{declaration_name}'") - - # Strip spaces from param so that it is one continuous string on - # parameterlist. This fixes a problem where check_sections() - # cannot find a parameter like "addr[6 + 2]" because it actually - # appears as "addr[6", "+", "2]" on the parameter list. - # However, it's better to maintain the param string unchanged for - # output, so just weaken the string compare in check_sections() - # to ignore "[blah" in a parameter string. - - self.entry.parameterlist.append(param) - org_arg = KernRe(r'\s\s+').sub(' ', org_arg) - self.entry.parametertypes[param] = org_arg - - - def create_parameter_list(self, ln, decl_type, args, - splitter, declaration_name): - """ - Creates a list of parameters, storing them at self.entry. - """ - - # temporarily replace all commas inside function pointer definition - arg_expr = KernRe(r'(\([^\),]+),') - while arg_expr.search(args): - args = arg_expr.sub(r"\1#", args) - - for arg in args.split(splitter): - # Ignore argument attributes - arg = KernRe(r'\sPOS0?\s').sub(' ', arg) - - # Strip leading/trailing spaces - arg = arg.strip() - arg = KernRe(r'\s+').sub(' ', arg, count=1) - - if arg.startswith('#'): - # Treat preprocessor directive as a typeless variable just to fill - # corresponding data structures "correctly". Catch it later in - # output_* subs. - - # Treat preprocessor directive as a typeless variable - self.push_parameter(ln, decl_type, arg, "", - "", declaration_name) - # - # The pointer-to-function case. - # - elif KernRe(r'\(.+\)\s*\(').search(arg): - arg = arg.replace('#', ',') - r = KernRe(r'[^\(]+\(\*?\s*' # Everything up to "(*" - r'([\w\[\].]*)' # Capture the name and possible [array] - r'\s*\)') # Make sure the trailing ")" is there - if r.match(arg): - param = r.group(1) - else: - self.emit_msg(ln, f"Invalid param: {arg}") - param = arg - dtype = arg.replace(param, '') - self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) - # - # The array-of-pointers case. Dig the parameter name out from the middle - # of the declaration. - # - elif KernRe(r'\(.+\)\s*\[').search(arg): - r = KernRe(r'[^\(]+\(\s*\*\s*' # Up to "(" and maybe "*" - r'([\w.]*?)' # The actual pointer name - r'\s*(\[\s*\w+\s*\]\s*)*\)') # The [array portion] - if r.match(arg): - param = r.group(1) - else: - self.emit_msg(ln, f"Invalid param: {arg}") - param = arg - dtype = arg.replace(param, '') - self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) - elif arg: - # - # Clean up extraneous spaces and split the string at commas; the first - # element of the resulting list will also include the type information. - # - arg = KernRe(r'\s*:\s*').sub(":", arg) - arg = KernRe(r'\s*\[').sub('[', arg) - args = KernRe(r'\s*,\s*').split(arg) - args[0] = re.sub(r'(\*+)\s*', r' \1', args[0]) - # - # args[0] has a string of "type a". If "a" includes an [array] - # declaration, we want to not be fooled by any white space inside - # the brackets, so detect and handle that case specially. - # - r = KernRe(r'^([^[\]]*\s+)(.*)$') - if r.match(args[0]): - args[0] = r.group(2) - dtype = r.group(1) - else: - # No space in args[0]; this seems wrong but preserves previous behavior - dtype = '' - - bitfield_re = KernRe(r'(.*?):(\w+)') - for param in args: - # - # For pointers, shift the star(s) from the variable name to the - # type declaration. - # - r = KernRe(r'^(\*+)\s*(.*)') - if r.match(param): - self.push_parameter(ln, decl_type, r.group(2), - f"{dtype} {r.group(1)}", - arg, declaration_name) - # - # Perform a similar shift for bitfields. - # - elif bitfield_re.search(param): - if dtype != "": # Skip unnamed bit-fields - self.push_parameter(ln, decl_type, bitfield_re.group(1), - f"{dtype}:{bitfield_re.group(2)}", - arg, declaration_name) - else: - self.push_parameter(ln, decl_type, param, dtype, - arg, declaration_name) - - def check_sections(self, ln, decl_name, decl_type): - """ - Check for errors inside sections, emitting warnings if not found - parameters are described. - """ - for section in self.entry.sections: - if section not in self.entry.parameterlist and \ - not known_sections.search(section): - if decl_type == 'function': - dname = f"{decl_type} parameter" - else: - dname = f"{decl_type} member" - self.emit_msg(ln, - f"Excess {dname} '{section}' description in '{decl_name}'") - - def check_return_section(self, ln, declaration_name, return_type): - """ - If the function doesn't return void, warns about the lack of a - return description. - """ - - if not self.config.wreturn: - return - - # Ignore an empty return type (It's a macro) - # Ignore functions with a "void" return type (but not "void *") - if not return_type or KernRe(r'void\s*\w*\s*$').search(return_type): - return - - if not self.entry.sections.get("Return", None): - self.emit_msg(ln, - f"No description found for return value of '{declaration_name}'") - - # - # Split apart a structure prototype; returns (struct|union, name, members) or None - # - def split_struct_proto(self, proto): - type_pattern = r'(struct|union)' - qualifiers = [ - "__attribute__", - "__packed", - "__aligned", - "____cacheline_aligned_in_smp", - "____cacheline_aligned", - ] - definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?" - - r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body) - if r.search(proto): - return (r.group(1), r.group(2), r.group(3)) - else: - r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;') - if r.search(proto): - return (r.group(1), r.group(3), r.group(2)) - return None - # - # Rewrite the members of a structure or union for easier formatting later on. - # Among other things, this function will turn a member like: - # - # struct { inner_members; } foo; - # - # into: - # - # struct foo; inner_members; - # - def rewrite_struct_members(self, members): - # - # Process struct/union members from the most deeply nested outward. The - # trick is in the ^{ below - it prevents a match of an outer struct/union - # until the inner one has been munged (removing the "{" in the process). - # - struct_members = KernRe(r'(struct|union)' # 0: declaration type - r'([^\{\};]+)' # 1: possible name - r'(\{)' - r'([^\{\}]*)' # 3: Contents of declaration - r'(\})' - r'([^\{\};]*)(;)') # 5: Remaining stuff after declaration - tuples = struct_members.findall(members) - while tuples: - for t in tuples: - newmember = "" - oldmember = "".join(t) # Reconstruct the original formatting - dtype, name, lbr, content, rbr, rest, semi = t - # - # Pass through each field name, normalizing the form and formatting. - # - for s_id in rest.split(','): - s_id = s_id.strip() - newmember += f"{dtype} {s_id}; " - # - # Remove bitfield/array/pointer info, getting the bare name. - # - s_id = KernRe(r'[:\[].*').sub('', s_id) - s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id) - # - # Pass through the members of this inner structure/union. - # - for arg in content.split(';'): - arg = arg.strip() - # - # Look for (type)(*name)(args) - pointer to function - # - r = KernRe(r'^([^\(]+\(\*?\s*)([\w.]*)(\s*\).*)') - if r.match(arg): - dtype, name, extra = r.group(1), r.group(2), r.group(3) - # Pointer-to-function - if not s_id: - # Anonymous struct/union - newmember += f"{dtype}{name}{extra}; " - else: - newmember += f"{dtype}{s_id}.{name}{extra}; " - # - # Otherwise a non-function member. - # - else: - # - # Remove bitmap and array portions and spaces around commas - # - arg = KernRe(r':\s*\d+\s*').sub('', arg) - arg = KernRe(r'\[.*\]').sub('', arg) - arg = KernRe(r'\s*,\s*').sub(',', arg) - # - # Look for a normal decl - "type name[,name...]" - # - r = KernRe(r'(.*)\s+([\S+,]+)') - if r.search(arg): - for name in r.group(2).split(','): - name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name) - if not s_id: - # Anonymous struct/union - newmember += f"{r.group(1)} {name}; " - else: - newmember += f"{r.group(1)} {s_id}.{name}; " - else: - newmember += f"{arg}; " - # - # At the end of the s_id loop, replace the original declaration with - # the munged version. - # - members = members.replace(oldmember, newmember) - # - # End of the tuple loop - search again and see if there are outer members - # that now turn up. - # - tuples = struct_members.findall(members) - return members - - # - # Format the struct declaration into a standard form for inclusion in the - # resulting docs. - # - def format_struct_decl(self, declaration): - # - # Insert newlines, get rid of extra spaces. - # - declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration) - declaration = KernRe(r'\}\s+;').sub('};', declaration) - # - # Format inline enums with each member on its own line. - # - r = KernRe(r'(enum\s+\{[^\}]+),([^\n])') - while r.search(declaration): - declaration = r.sub(r'\1,\n\2', declaration) - # - # Now go through and supply the right number of tabs - # for each line. - # - def_args = declaration.split('\n') - level = 1 - declaration = "" - for clause in def_args: - clause = KernRe(r'\s+').sub(' ', clause.strip(), count=1) - if clause: - if '}' in clause and level > 1: - level -= 1 - if not clause.startswith('#'): - declaration += "\t" * level - declaration += "\t" + clause + "\n" - if "{" in clause and "}" not in clause: - level += 1 - return declaration - - - def dump_struct(self, ln, proto): - """ - Store an entry for an struct or union - """ - # - # Do the basic parse to get the pieces of the declaration. - # - struct_parts = self.split_struct_proto(proto) - if not struct_parts: - self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!") - return - decl_type, declaration_name, members = struct_parts - - if self.entry.identifier != declaration_name: - self.emit_msg(ln, f"expecting prototype for {decl_type} {self.entry.identifier}. " - f"Prototype was for {decl_type} {declaration_name} instead\n") - return - # - # Go through the list of members applying all of our transformations. - # - members = trim_private_members(members) - members = apply_transforms(struct_xforms, members) - - nested = NestedMatch() - for search, sub in struct_nested_prefixes: - members = nested.sub(search, sub, members) - # - # Deal with embedded struct and union members, and drop enums entirely. - # - declaration = members - members = self.rewrite_struct_members(members) - members = re.sub(r'(\{[^\{\}]*\})', '', members) - # - # Output the result and we are done. - # - self.create_parameter_list(ln, decl_type, members, ';', - declaration_name) - self.check_sections(ln, declaration_name, decl_type) - self.output_declaration(decl_type, declaration_name, - definition=self.format_struct_decl(declaration), - purpose=self.entry.declaration_purpose) - - def dump_enum(self, ln, proto): - """ - Stores an enum inside self.entries array. - """ - # - # Strip preprocessor directives. Note that this depends on the - # trailing semicolon we added in process_proto_type(). - # - proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) - # - # Parse out the name and members of the enum. Typedef form first. - # - r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') - if r.search(proto): - declaration_name = r.group(2) - members = trim_private_members(r.group(1)) - # - # Failing that, look for a straight enum - # - else: - r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}') - if r.match(proto): - declaration_name = r.group(1) - members = trim_private_members(r.group(2)) - # - # OK, this isn't going to work. - # - else: - self.emit_msg(ln, f"{proto}: error: Cannot parse enum!") - return - # - # Make sure we found what we were expecting. - # - if self.entry.identifier != declaration_name: - if self.entry.identifier == "": - self.emit_msg(ln, - f"{proto}: wrong kernel-doc identifier on prototype") - else: - self.emit_msg(ln, - f"expecting prototype for enum {self.entry.identifier}. " - f"Prototype was for enum {declaration_name} instead") - return - - if not declaration_name: - declaration_name = "(anonymous)" - # - # Parse out the name of each enum member, and verify that we - # have a description for it. - # - member_set = set() - members = KernRe(r'\([^;)]*\)').sub('', members) - for arg in members.split(','): - if not arg: - continue - arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg) - self.entry.parameterlist.append(arg) - if arg not in self.entry.parameterdescs: - self.entry.parameterdescs[arg] = self.undescribed - self.emit_msg(ln, - f"Enum value '{arg}' not described in enum '{declaration_name}'") - member_set.add(arg) - # - # Ensure that every described member actually exists in the enum. - # - for k in self.entry.parameterdescs: - if k not in member_set: - self.emit_msg(ln, - f"Excess enum value '%{k}' description in '{declaration_name}'") - - self.output_declaration('enum', declaration_name, - purpose=self.entry.declaration_purpose) - - def dump_declaration(self, ln, prototype): - """ - Stores a data declaration inside self.entries array. - """ - - if self.entry.decl_type == "enum": - self.dump_enum(ln, prototype) - elif self.entry.decl_type == "typedef": - self.dump_typedef(ln, prototype) - elif self.entry.decl_type in ["union", "struct"]: - self.dump_struct(ln, prototype) - else: - # This would be a bug - self.emit_message(ln, f'Unknown declaration type: {self.entry.decl_type}') - - def dump_function(self, ln, prototype): - """ - Stores a function of function macro inside self.entries array. - """ - - found = func_macro = False - return_type = '' - decl_type = 'function' - # - # Apply the initial transformations. - # - prototype = apply_transforms(function_xforms, prototype) - # - # If we have a macro, remove the "#define" at the front. - # - new_proto = KernRe(r"^#\s*define\s+").sub("", prototype) - if new_proto != prototype: - prototype = new_proto - # - # Dispense with the simple "#define A B" case here; the key - # is the space after the name of the symbol being defined. - # NOTE that the seemingly misnamed "func_macro" indicates a - # macro *without* arguments. - # - r = KernRe(r'^(\w+)\s+') - if r.search(prototype): - return_type = '' - declaration_name = r.group(1) - func_macro = True - found = True - - # Yes, this truly is vile. We are looking for: - # 1. Return type (may be nothing if we're looking at a macro) - # 2. Function name - # 3. Function parameters. - # - # All the while we have to watch out for function pointer parameters - # (which IIRC is what the two sections are for), C types (these - # regexps don't even start to express all the possibilities), and - # so on. - # - # If you mess with these regexps, it's a good idea to check that - # the following functions' documentation still comes out right: - # - parport_register_device (function pointer parameters) - # - atomic_set (macro) - # - pci_match_device, __copy_to_user (long return type) - - name = r'\w+' - type1 = r'(?:[\w\s]+)?' - type2 = r'(?:[\w\s]+\*+)+' - # - # Attempt to match first on (args) with no internal parentheses; this - # lets us easily filter out __acquires() and other post-args stuff. If - # that fails, just grab the rest of the line to the last closing - # parenthesis. - # - proto_args = r'\(([^\(]*|.*)\)' - # - # (Except for the simple macro case) attempt to split up the prototype - # in the various ways we understand. - # - if not found: - patterns = [ - rf'^()({name})\s*{proto_args}', - rf'^({type1})\s+({name})\s*{proto_args}', - rf'^({type2})\s*({name})\s*{proto_args}', - ] - - for p in patterns: - r = KernRe(p) - if r.match(prototype): - return_type = r.group(1) - declaration_name = r.group(2) - args = r.group(3) - self.create_parameter_list(ln, decl_type, args, ',', - declaration_name) - found = True - break - # - # Parsing done; make sure that things are as we expect. - # - if not found: - self.emit_msg(ln, - f"cannot understand function prototype: '{prototype}'") - return - if self.entry.identifier != declaration_name: - self.emit_msg(ln, f"expecting prototype for {self.entry.identifier}(). " - f"Prototype was for {declaration_name}() instead") - return - self.check_sections(ln, declaration_name, "function") - self.check_return_section(ln, declaration_name, return_type) - # - # Store the result. - # - self.output_declaration(decl_type, declaration_name, - typedef=('typedef' in return_type), - functiontype=return_type, - purpose=self.entry.declaration_purpose, - func_macro=func_macro) - - - def dump_typedef(self, ln, proto): - """ - Stores a typedef inside self.entries array. - """ - # - # We start by looking for function typedefs. - # - typedef_type = r'typedef((?:\s+[\w*]+\b){0,7}\s+(?:\w+\b|\*+))\s*' - typedef_ident = r'\*?\s*(\w\S+)\s*' - typedef_args = r'\s*\((.*)\);' - - typedef1 = KernRe(typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) - typedef2 = KernRe(typedef_type + typedef_ident + typedef_args) - - # Parse function typedef prototypes - for r in [typedef1, typedef2]: - if not r.match(proto): - continue - - return_type = r.group(1).strip() - declaration_name = r.group(2) - args = r.group(3) - - if self.entry.identifier != declaration_name: - self.emit_msg(ln, - f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") - return - - self.create_parameter_list(ln, 'function', args, ',', declaration_name) - - self.output_declaration('function', declaration_name, - typedef=True, - functiontype=return_type, - purpose=self.entry.declaration_purpose) - return - # - # Not a function, try to parse a simple typedef. - # - r = KernRe(r'typedef.*\s+(\w+)\s*;') - if r.match(proto): - declaration_name = r.group(1) - - if self.entry.identifier != declaration_name: - self.emit_msg(ln, - f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") - return - - self.output_declaration('typedef', declaration_name, - purpose=self.entry.declaration_purpose) - return - - self.emit_msg(ln, "error: Cannot parse typedef!") - - @staticmethod - def process_export(function_set, line): - """ - process EXPORT_SYMBOL* tags - - This method doesn't use any variable from the class, so declare it - with a staticmethod decorator. - """ - - # We support documenting some exported symbols with different - # names. A horrible hack. - suffixes = [ '_noprof' ] - - # Note: it accepts only one EXPORT_SYMBOL* per line, as having - # multiple export lines would violate Kernel coding style. - - if export_symbol.search(line): - symbol = export_symbol.group(2) - elif export_symbol_ns.search(line): - symbol = export_symbol_ns.group(2) - else: - return False - # - # Found an export, trim out any special suffixes - # - for suffix in suffixes: - # Be backward compatible with Python < 3.9 - if symbol.endswith(suffix): - symbol = symbol[:-len(suffix)] - function_set.add(symbol) - return True - - def process_normal(self, ln, line): - """ - STATE_NORMAL: looking for the /** to begin everything. - """ - - if not doc_start.match(line): - return - - # start a new entry - self.reset_state(ln) - - # next line is always the function name - self.state = state.NAME - - def process_name(self, ln, line): - """ - STATE_NAME: Looking for the "name - description" line - """ - # - # Check for a DOC: block and handle them specially. - # - if doc_block.search(line): - - if not doc_block.group(1): - self.entry.begin_section(ln, "Introduction") - else: - self.entry.begin_section(ln, doc_block.group(1)) - - self.entry.identifier = self.entry.section - self.state = state.DOCBLOCK - # - # Otherwise we're looking for a normal kerneldoc declaration line. - # - elif doc_decl.search(line): - self.entry.identifier = doc_decl.group(1) - - # Test for data declaration - if doc_begin_data.search(line): - self.entry.decl_type = doc_begin_data.group(1) - self.entry.identifier = doc_begin_data.group(2) - # - # Look for a function description - # - elif doc_begin_func.search(line): - self.entry.identifier = doc_begin_func.group(1) - self.entry.decl_type = "function" - # - # We struck out. - # - else: - self.emit_msg(ln, - f"This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst\n{line}") - self.state = state.NORMAL - return - # - # OK, set up for a new kerneldoc entry. - # - self.state = state.BODY - self.entry.identifier = self.entry.identifier.strip(" ") - # if there's no @param blocks need to set up default section here - self.entry.begin_section(ln + 1) - # - # Find the description portion, which *should* be there but - # isn't always. - # (We should be able to capture this from the previous parsing - someday) - # - r = KernRe("[-:](.*)") - if r.search(line): - self.entry.declaration_purpose = trim_whitespace(r.group(1)) - self.state = state.DECLARATION - else: - self.entry.declaration_purpose = "" - - if not self.entry.declaration_purpose and self.config.wshort_desc: - self.emit_msg(ln, - f"missing initial short description on line:\n{line}") - - if not self.entry.identifier and self.entry.decl_type != "enum": - self.emit_msg(ln, - f"wrong kernel-doc identifier on line:\n{line}") - self.state = state.NORMAL - - if self.config.verbose: - self.emit_msg(ln, - f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}", - warning=False) - # - # Failed to find an identifier. Emit a warning - # - else: - self.emit_msg(ln, f"Cannot find identifier on line:\n{line}") - - # - # Helper function to determine if a new section is being started. - # - def is_new_section(self, ln, line): - if doc_sect.search(line): - self.state = state.BODY - # - # Pick out the name of our new section, tweaking it if need be. - # - newsection = doc_sect.group(1) - if newsection.lower() == 'description': - newsection = 'Description' - elif newsection.lower() == 'context': - newsection = 'Context' - self.state = state.SPECIAL_SECTION - elif newsection.lower() in ["@return", "@returns", - "return", "returns"]: - newsection = "Return" - self.state = state.SPECIAL_SECTION - elif newsection[0] == '@': - self.state = state.SPECIAL_SECTION - # - # Initialize the contents, and get the new section going. - # - newcontents = doc_sect.group(2) - if not newcontents: - newcontents = "" - self.dump_section() - self.entry.begin_section(ln, newsection) - self.entry.leading_space = None - - self.entry.add_text(newcontents.lstrip()) - return True - return False - - # - # Helper function to detect (and effect) the end of a kerneldoc comment. - # - def is_comment_end(self, ln, line): - if doc_end.search(line): - self.dump_section() - - # Look for doc_com + + doc_end: - r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:.]+\*/') - if r.match(line): - self.emit_msg(ln, f"suspicious ending line: {line}") - - self.entry.prototype = "" - self.entry.new_start_line = ln + 1 - - self.state = state.PROTO - return True - return False - - - def process_decl(self, ln, line): - """ - STATE_DECLARATION: We've seen the beginning of a declaration - """ - if self.is_new_section(ln, line) or self.is_comment_end(ln, line): - return - # - # Look for anything with the " * " line beginning. - # - if doc_content.search(line): - cont = doc_content.group(1) - # - # A blank line means that we have moved out of the declaration - # part of the comment (without any "special section" parameter - # descriptions). - # - if cont == "": - self.state = state.BODY - # - # Otherwise we have more of the declaration section to soak up. - # - else: - self.entry.declaration_purpose = \ - trim_whitespace(self.entry.declaration_purpose + ' ' + cont) - else: - # Unknown line, ignore - self.emit_msg(ln, f"bad line: {line}") - - - def process_special(self, ln, line): - """ - STATE_SPECIAL_SECTION: a section ending with a blank line - """ - # - # If we have hit a blank line (only the " * " marker), then this - # section is done. - # - if KernRe(r"\s*\*\s*$").match(line): - self.entry.begin_section(ln, dump = True) - self.state = state.BODY - return - # - # Not a blank line, look for the other ways to end the section. - # - if self.is_new_section(ln, line) or self.is_comment_end(ln, line): - return - # - # OK, we should have a continuation of the text for this section. - # - if doc_content.search(line): - cont = doc_content.group(1) - # - # If the lines of text after the first in a special section have - # leading white space, we need to trim it out or Sphinx will get - # confused. For the second line (the None case), see what we - # find there and remember it. - # - if self.entry.leading_space is None: - r = KernRe(r'^(\s+)') - if r.match(cont): - self.entry.leading_space = len(r.group(1)) - else: - self.entry.leading_space = 0 - # - # Otherwise, before trimming any leading chars, be *sure* - # that they are white space. We should maybe warn if this - # isn't the case. - # - for i in range(0, self.entry.leading_space): - if cont[i] != " ": - self.entry.leading_space = i - break - # - # Add the trimmed result to the section and we're done. - # - self.entry.add_text(cont[self.entry.leading_space:]) - else: - # Unknown line, ignore - self.emit_msg(ln, f"bad line: {line}") - - def process_body(self, ln, line): - """ - STATE_BODY: the bulk of a kerneldoc comment. - """ - if self.is_new_section(ln, line) or self.is_comment_end(ln, line): - return - - if doc_content.search(line): - cont = doc_content.group(1) - self.entry.add_text(cont) - else: - # Unknown line, ignore - self.emit_msg(ln, f"bad line: {line}") - - def process_inline_name(self, ln, line): - """STATE_INLINE_NAME: beginning of docbook comments within a prototype.""" - - if doc_inline_sect.search(line): - self.entry.begin_section(ln, doc_inline_sect.group(1)) - self.entry.add_text(doc_inline_sect.group(2).lstrip()) - self.state = state.INLINE_TEXT - elif doc_inline_end.search(line): - self.dump_section() - self.state = state.PROTO - elif doc_content.search(line): - self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}") - self.state = state.PROTO - # else ... ?? - - def process_inline_text(self, ln, line): - """STATE_INLINE_TEXT: docbook comments within a prototype.""" - - if doc_inline_end.search(line): - self.dump_section() - self.state = state.PROTO - elif doc_content.search(line): - self.entry.add_text(doc_content.group(1)) - # else ... ?? - - def syscall_munge(self, ln, proto): # pylint: disable=W0613 - """ - Handle syscall definitions - """ - - is_void = False - - # Strip newlines/CR's - proto = re.sub(r'[\r\n]+', ' ', proto) - - # Check if it's a SYSCALL_DEFINE0 - if 'SYSCALL_DEFINE0' in proto: - is_void = True - - # Replace SYSCALL_DEFINE with correct return type & function name - proto = KernRe(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto) - - r = KernRe(r'long\s+(sys_.*?),') - if r.search(proto): - proto = KernRe(',').sub('(', proto, count=1) - elif is_void: - proto = KernRe(r'\)').sub('(void)', proto, count=1) - - # Now delete all of the odd-numbered commas in the proto - # so that argument types & names don't have a comma between them - count = 0 - length = len(proto) - - if is_void: - length = 0 # skip the loop if is_void - - for ix in range(length): - if proto[ix] == ',': - count += 1 - if count % 2 == 1: - proto = proto[:ix] + ' ' + proto[ix + 1:] - - return proto - - def tracepoint_munge(self, ln, proto): - """ - Handle tracepoint definitions - """ - - tracepointname = None - tracepointargs = None - - # Match tracepoint name based on different patterns - r = KernRe(r'TRACE_EVENT\((.*?),') - if r.search(proto): - tracepointname = r.group(1) - - r = KernRe(r'DEFINE_SINGLE_EVENT\((.*?),') - if r.search(proto): - tracepointname = r.group(1) - - r = KernRe(r'DEFINE_EVENT\((.*?),(.*?),') - if r.search(proto): - tracepointname = r.group(2) - - if tracepointname: - tracepointname = tracepointname.lstrip() - - r = KernRe(r'TP_PROTO\((.*?)\)') - if r.search(proto): - tracepointargs = r.group(1) - - if not tracepointname or not tracepointargs: - self.emit_msg(ln, - f"Unrecognized tracepoint format:\n{proto}\n") - else: - proto = f"static inline void trace_{tracepointname}({tracepointargs})" - self.entry.identifier = f"trace_{self.entry.identifier}" - - return proto - - def process_proto_function(self, ln, line): - """Ancillary routine to process a function prototype""" - - # strip C99-style comments to end of line - line = KernRe(r"//.*$", re.S).sub('', line) - # - # Soak up the line's worth of prototype text, stopping at { or ; if present. - # - if KernRe(r'\s*#\s*define').match(line): - self.entry.prototype = line - elif not line.startswith('#'): # skip other preprocessor stuff - r = KernRe(r'([^\{]*)') - if r.match(line): - self.entry.prototype += r.group(1) + " " - # - # If we now have the whole prototype, clean it up and declare victory. - # - if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line): - # strip comments and surrounding spaces - self.entry.prototype = KernRe(r'/\*.*\*/').sub('', self.entry.prototype).strip() - # - # Handle self.entry.prototypes for function pointers like: - # int (*pcs_config)(struct foo) - # by turning it into - # int pcs_config(struct foo) - # - r = KernRe(r'^(\S+\s+)\(\s*\*(\S+)\)') - self.entry.prototype = r.sub(r'\1\2', self.entry.prototype) - # - # Handle special declaration syntaxes - # - if 'SYSCALL_DEFINE' in self.entry.prototype: - self.entry.prototype = self.syscall_munge(ln, - self.entry.prototype) - else: - r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT') - if r.search(self.entry.prototype): - self.entry.prototype = self.tracepoint_munge(ln, - self.entry.prototype) - # - # ... and we're done - # - self.dump_function(ln, self.entry.prototype) - self.reset_state(ln) - - def process_proto_type(self, ln, line): - """Ancillary routine to process a type""" - - # Strip C99-style comments and surrounding whitespace - line = KernRe(r"//.*$", re.S).sub('', line).strip() - if not line: - return # nothing to see here - - # To distinguish preprocessor directive from regular declaration later. - if line.startswith('#'): - line += ";" - # - # Split the declaration on any of { } or ;, and accumulate pieces - # until we hit a semicolon while not inside {brackets} - # - r = KernRe(r'(.*?)([{};])') - for chunk in r.split(line): - if chunk: # Ignore empty matches - self.entry.prototype += chunk - # - # This cries out for a match statement ... someday after we can - # drop Python 3.9 ... - # - if chunk == '{': - self.entry.brcount += 1 - elif chunk == '}': - self.entry.brcount -= 1 - elif chunk == ';' and self.entry.brcount <= 0: - self.dump_declaration(ln, self.entry.prototype) - self.reset_state(ln) - return - # - # We hit the end of the line while still in the declaration; put - # in a space to represent the newline. - # - self.entry.prototype += ' ' - - def process_proto(self, ln, line): - """STATE_PROTO: reading a function/whatever prototype.""" - - if doc_inline_oneline.search(line): - self.entry.begin_section(ln, doc_inline_oneline.group(1)) - self.entry.add_text(doc_inline_oneline.group(2)) - self.dump_section() - - elif doc_inline_start.search(line): - self.state = state.INLINE_NAME - - elif self.entry.decl_type == 'function': - self.process_proto_function(ln, line) - - else: - self.process_proto_type(ln, line) - - def process_docblock(self, ln, line): - """STATE_DOCBLOCK: within a DOC: block.""" - - if doc_end.search(line): - self.dump_section() - self.output_declaration("doc", self.entry.identifier) - self.reset_state(ln) - - elif doc_content.search(line): - self.entry.add_text(doc_content.group(1)) - - def parse_export(self): - """ - Parses EXPORT_SYMBOL* macros from a single Kernel source file. - """ - - export_table = set() - - try: - with open(self.fname, "r", encoding="utf8", - errors="backslashreplace") as fp: - - for line in fp: - self.process_export(export_table, line) - - except IOError: - return None - - return export_table - - # - # The state/action table telling us which function to invoke in - # each state. - # - state_actions = { - state.NORMAL: process_normal, - state.NAME: process_name, - state.BODY: process_body, - state.DECLARATION: process_decl, - state.SPECIAL_SECTION: process_special, - state.INLINE_NAME: process_inline_name, - state.INLINE_TEXT: process_inline_text, - state.PROTO: process_proto, - state.DOCBLOCK: process_docblock, - } - - def parse_kdoc(self): - """ - Open and process each line of a C source file. - The parsing is controlled via a state machine, and the line is passed - to a different process function depending on the state. The process - function may update the state as needed. - - Besides parsing kernel-doc tags, it also parses export symbols. - """ - - prev = "" - prev_ln = None - export_table = set() - - try: - with open(self.fname, "r", encoding="utf8", - errors="backslashreplace") as fp: - for ln, line in enumerate(fp): - - line = line.expandtabs().strip("\n") - - # Group continuation lines on prototypes - if self.state == state.PROTO: - if line.endswith("\\"): - prev += line.rstrip("\\") - if not prev_ln: - prev_ln = ln - continue - - if prev: - ln = prev_ln - line = prev + line - prev = "" - prev_ln = None - - self.config.log.debug("%d %s: %s", - ln, state.name[self.state], - line) - - # This is an optimization over the original script. - # There, when export_file was used for the same file, - # it was read twice. Here, we use the already-existing - # loop to parse exported symbols as well. - # - if (self.state != state.NORMAL) or \ - not self.process_export(export_table, line): - # Hand this line to the appropriate state handler - self.state_actions[self.state](self, ln, line) - - except OSError: - self.config.log.error(f"Error: Cannot open file {self.fname}") - - return export_table, self.entries diff --git a/scripts/lib/kdoc/kdoc_re.py b/scripts/lib/kdoc/kdoc_re.py deleted file mode 100644 index 612223e1e723..000000000000 --- a/scripts/lib/kdoc/kdoc_re.py +++ /dev/null @@ -1,270 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0 -# Copyright(c) 2025: Mauro Carvalho Chehab . - -""" -Regular expression ancillary classes. - -Those help caching regular expressions and do matching for kernel-doc. -""" - -import re - -# Local cache for regular expressions -re_cache = {} - - -class KernRe: - """ - Helper class to simplify regex declaration and usage, - - It calls re.compile for a given pattern. It also allows adding - regular expressions and define sub at class init time. - - Regular expressions can be cached via an argument, helping to speedup - searches. - """ - - def _add_regex(self, string, flags): - """ - Adds a new regex or re-use it from the cache. - """ - self.regex = re_cache.get(string, None) - if not self.regex: - self.regex = re.compile(string, flags=flags) - if self.cache: - re_cache[string] = self.regex - - def __init__(self, string, cache=True, flags=0): - """ - Compile a regular expression and initialize internal vars. - """ - - self.cache = cache - self.last_match = None - - self._add_regex(string, flags) - - def __str__(self): - """ - Return the regular expression pattern. - """ - return self.regex.pattern - - def __add__(self, other): - """ - Allows adding two regular expressions into one. - """ - - return KernRe(str(self) + str(other), cache=self.cache or other.cache, - flags=self.regex.flags | other.regex.flags) - - def match(self, string): - """ - Handles a re.match storing its results - """ - - self.last_match = self.regex.match(string) - return self.last_match - - def search(self, string): - """ - Handles a re.search storing its results - """ - - self.last_match = self.regex.search(string) - return self.last_match - - def findall(self, string): - """ - Alias to re.findall - """ - - return self.regex.findall(string) - - def split(self, string): - """ - Alias to re.split - """ - - return self.regex.split(string) - - def sub(self, sub, string, count=0): - """ - Alias to re.sub - """ - - return self.regex.sub(sub, string, count=count) - - def group(self, num): - """ - Returns the group results of the last match - """ - - return self.last_match.group(num) - - -class NestedMatch: - """ - Finding nested delimiters is hard with regular expressions. It is - even harder on Python with its normal re module, as there are several - advanced regular expressions that are missing. - - This is the case of this pattern: - - '\\bSTRUCT_GROUP(\\(((?:(?>[^)(]+)|(?1))*)\\))[^;]*;' - - which is used to properly match open/close parenthesis of the - string search STRUCT_GROUP(), - - Add a class that counts pairs of delimiters, using it to match and - replace nested expressions. - - The original approach was suggested by: - https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex - - Although I re-implemented it to make it more generic and match 3 types - of delimiters. The logic checks if delimiters are paired. If not, it - will ignore the search string. - """ - - # TODO: make NestedMatch handle multiple match groups - # - # Right now, regular expressions to match it are defined only up to - # the start delimiter, e.g.: - # - # \bSTRUCT_GROUP\( - # - # is similar to: STRUCT_GROUP\((.*)\) - # except that the content inside the match group is delimiter's aligned. - # - # The content inside parenthesis are converted into a single replace - # group (e.g. r`\1'). - # - # It would be nice to change such definition to support multiple - # match groups, allowing a regex equivalent to. - # - # FOO\((.*), (.*), (.*)\) - # - # it is probably easier to define it not as a regular expression, but - # with some lexical definition like: - # - # FOO(arg1, arg2, arg3) - - DELIMITER_PAIRS = { - '{': '}', - '(': ')', - '[': ']', - } - - RE_DELIM = re.compile(r'[\{\}\[\]\(\)]') - - def _search(self, regex, line): - """ - Finds paired blocks for a regex that ends with a delimiter. - - The suggestion of using finditer to match pairs came from: - https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex - but I ended using a different implementation to align all three types - of delimiters and seek for an initial regular expression. - - The algorithm seeks for open/close paired delimiters and place them - into a stack, yielding a start/stop position of each match when the - stack is zeroed. - - The algorithm shoud work fine for properly paired lines, but will - silently ignore end delimiters that preceeds an start delimiter. - This should be OK for kernel-doc parser, as unaligned delimiters - would cause compilation errors. So, we don't need to rise exceptions - to cover such issues. - """ - - stack = [] - - for match_re in regex.finditer(line): - start = match_re.start() - offset = match_re.end() - - d = line[offset - 1] - if d not in self.DELIMITER_PAIRS: - continue - - end = self.DELIMITER_PAIRS[d] - stack.append(end) - - for match in self.RE_DELIM.finditer(line[offset:]): - pos = match.start() + offset - - d = line[pos] - - if d in self.DELIMITER_PAIRS: - end = self.DELIMITER_PAIRS[d] - - stack.append(end) - continue - - # Does the end delimiter match what it is expected? - if stack and d == stack[-1]: - stack.pop() - - if not stack: - yield start, offset, pos + 1 - break - - def search(self, regex, line): - """ - This is similar to re.search: - - It matches a regex that it is followed by a delimiter, - returning occurrences only if all delimiters are paired. - """ - - for t in self._search(regex, line): - - yield line[t[0]:t[2]] - - def sub(self, regex, sub, line, count=0): - """ - This is similar to re.sub: - - It matches a regex that it is followed by a delimiter, - replacing occurrences only if all delimiters are paired. - - if r'\1' is used, it works just like re: it places there the - matched paired data with the delimiter stripped. - - If count is different than zero, it will replace at most count - items. - """ - out = "" - - cur_pos = 0 - n = 0 - - for start, end, pos in self._search(regex, line): - out += line[cur_pos:start] - - # Value, ignoring start/end delimiters - value = line[end:pos - 1] - - # replaces \1 at the sub string, if \1 is used there - new_sub = sub - new_sub = new_sub.replace(r'\1', value) - - out += new_sub - - # Drop end ';' if any - if line[pos] == ';': - pos += 1 - - cur_pos = pos - n += 1 - - if count and count >= n: - break - - # Append the remaining string - l = len(line) - out += line[cur_pos:l] - - return out diff --git a/tools/docs/check-variable-fonts.py b/tools/docs/check-variable-fonts.py index c0997d6861dc..c48bb05dad82 100755 --- a/tools/docs/check-variable-fonts.py +++ b/tools/docs/check-variable-fonts.py @@ -9,13 +9,17 @@ """ Detect problematic Noto CJK variable fonts. -or more details, see lib/latex_fonts.py. +or more details, see .../tools/lib/python/kdoc/latex_fonts.py. """ import argparse import sys +import os.path -from lib.latex_fonts import LatexFontChecker +src_dir = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, os.path.join(src_dir, '../lib/python/kdoc')) + +from latex_fonts import LatexFontChecker checker = LatexFontChecker() diff --git a/tools/docs/get_abi.py b/tools/docs/get_abi.py index da69e77559cc..e0abfe12fac7 100755 --- a/tools/docs/get_abi.py +++ b/tools/docs/get_abi.py @@ -14,7 +14,7 @@ import sys # Import Python modules -LIB_DIR = "../../scripts/lib/abi" +LIB_DIR = "../lib/python/abi" SRC_DIR = os.path.dirname(os.path.realpath(__file__)) sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR)) diff --git a/tools/docs/lib/__init__.py b/tools/docs/lib/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/tools/docs/lib/enrich_formatter.py b/tools/docs/lib/enrich_formatter.py deleted file mode 100644 index bb171567a4ca..000000000000 --- a/tools/docs/lib/enrich_formatter.py +++ /dev/null @@ -1,70 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0 -# Copyright (c) 2025 by Mauro Carvalho Chehab . - -""" -Ancillary argparse HelpFormatter class that works on a similar way as -argparse.RawDescriptionHelpFormatter, e.g. description maintains line -breaks, but it also implement transformations to the help text. The -actual transformations ar given by enrich_text(), if the output is tty. - -Currently, the follow transformations are done: - - - Positional arguments are shown in upper cases; - - if output is TTY, ``var`` and positional arguments are shown prepended - by an ANSI SGR code. This is usually translated to bold. On some - terminals, like, konsole, this is translated into a colored bold text. -""" - -import argparse -import re -import sys - -class EnrichFormatter(argparse.HelpFormatter): - """ - Better format the output, making easier to identify the positional args - and how they're used at the __doc__ description. - """ - def __init__(self, *args, **kwargs): - """Initialize class and check if is TTY""" - super().__init__(*args, **kwargs) - self._tty = sys.stdout.isatty() - - def enrich_text(self, text): - """Handle ReST markups (currently, only ``foo``)""" - if self._tty and text: - # Replace ``text`` with ANSI SGR (bold) - return re.sub(r'\`\`(.+?)\`\`', - lambda m: f'\033[1m{m.group(1)}\033[0m', text) - return text - - def _fill_text(self, text, width, indent): - """Enrich descriptions with markups on it""" - enriched = self.enrich_text(text) - return "\n".join(indent + line for line in enriched.splitlines()) - - def _format_usage(self, usage, actions, groups, prefix): - """Enrich positional arguments at usage: line""" - - prog = self._prog - parts = [] - - for action in actions: - if action.option_strings: - opt = action.option_strings[0] - if action.nargs != 0: - opt += f" {action.dest.upper()}" - parts.append(f"[{opt}]") - else: - # Positional argument - parts.append(self.enrich_text(f"``{action.dest.upper()}``")) - - usage_text = f"{prefix or 'usage: '} {prog} {' '.join(parts)}\n" - return usage_text - - def _format_action_invocation(self, action): - """Enrich argument names""" - if not action.option_strings: - return self.enrich_text(f"``{action.dest.upper()}``") - - return ", ".join(action.option_strings) diff --git a/tools/docs/lib/latex_fonts.py b/tools/docs/lib/latex_fonts.py deleted file mode 100755 index 29317f8006ea..000000000000 --- a/tools/docs/lib/latex_fonts.py +++ /dev/null @@ -1,167 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0-only -# Copyright (C) Akira Yokosawa, 2024 -# -# Ported to Python by (c) Mauro Carvalho Chehab, 2025 - -""" -Detect problematic Noto CJK variable fonts. - -For "make pdfdocs", reports of build errors of translations.pdf started -arriving early 2024 [1, 2]. It turned out that Fedora and openSUSE -tumbleweed have started deploying variable-font [3] format of "Noto CJK" -fonts [4, 5]. For PDF, a LaTeX package named xeCJK is used for CJK -(Chinese, Japanese, Korean) pages. xeCJK requires XeLaTeX/XeTeX, which -does not (and likely never will) understand variable fonts for historical -reasons. - -The build error happens even when both of variable- and non-variable-format -fonts are found on the build system. To make matters worse, Fedora enlists -variable "Noto CJK" fonts in the requirements of langpacks-ja, -ko, -zh_CN, --zh_TW, etc. Hence developers who have interest in CJK pages are more -likely to encounter the build errors. - -This script is invoked from the error path of "make pdfdocs" and emits -suggestions if variable-font files of "Noto CJK" fonts are in the list of -fonts accessible from XeTeX. - -References: -[1]: https://lore.kernel.org/r/8734tqsrt7.fsf@meer.lwn.net/ -[2]: https://lore.kernel.org/r/1708585803.600323099@f111.i.mail.ru/ -[3]: https://en.wikipedia.org/wiki/Variable_font -[4]: https://fedoraproject.org/wiki/Changes/Noto_CJK_Variable_Fonts -[5]: https://build.opensuse.org/request/show/1157217 - -#=========================================================================== -Workarounds for building translations.pdf -#=========================================================================== - -* Denylist "variable font" Noto CJK fonts. - - Create $HOME/deny-vf/fontconfig/fonts.conf from template below, with - tweaks if necessary. Remove leading "". - - Path of fontconfig/fonts.conf can be overridden by setting an env - variable FONTS_CONF_DENY_VF. - - * Template: ------------------------------------------------------------------ - - - - - - - - /usr/share/fonts/google-noto-*-cjk-vf-fonts - - /usr/share/fonts/truetype/Noto*CJK*-VF.otf - - - ------------------------------------------------------------------ - - The denylisting is activated for "make pdfdocs". - -* For skipping CJK pages in PDF - - Uninstall texlive-xecjk. - Denylisting is not needed in this case. - -* For printing CJK pages in PDF - - Need non-variable "Noto CJK" fonts. - * Fedora - - google-noto-sans-cjk-fonts - - google-noto-serif-cjk-fonts - * openSUSE tumbleweed - - Non-variable "Noto CJK" fonts are not available as distro packages - as of April, 2024. Fetch a set of font files from upstream Noto - CJK Font released at: - https://github.com/notofonts/noto-cjk/tree/main/Sans#super-otc - and at: - https://github.com/notofonts/noto-cjk/tree/main/Serif#super-otc - , then uncompress and deploy them. - - Remember to update fontconfig cache by running fc-cache. - -!!! Caution !!! - Uninstalling "variable font" packages can be dangerous. - They might be depended upon by other packages important for your work. - Denylisting should be less invasive, as it is effective only while - XeLaTeX runs in "make pdfdocs". -""" - -import os -import re -import subprocess -import textwrap -import sys - -class LatexFontChecker: - """ - Detect problems with CJK variable fonts that affect PDF builds for - translations. - """ - - def __init__(self, deny_vf=None): - if not deny_vf: - deny_vf = os.environ.get('FONTS_CONF_DENY_VF', "~/deny-vf") - - self.environ = os.environ.copy() - self.environ['XDG_CONFIG_HOME'] = os.path.expanduser(deny_vf) - - self.re_cjk = re.compile(r"([^:]+):\s*Noto\s+(Sans|Sans Mono|Serif) CJK") - - def description(self): - return __doc__ - - def get_noto_cjk_vf_fonts(self): - """Get Noto CJK fonts""" - - cjk_fonts = set() - cmd = ["fc-list", ":", "file", "family", "variable"] - try: - result = subprocess.run(cmd,stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - universal_newlines=True, - env=self.environ, - check=True) - - except subprocess.CalledProcessError as exc: - sys.exit(f"Error running fc-list: {repr(exc)}") - - for line in result.stdout.splitlines(): - if 'variable=True' not in line: - continue - - match = self.re_cjk.search(line) - if match: - cjk_fonts.add(match.group(1)) - - return sorted(cjk_fonts) - - def check(self): - """Check for problems with CJK fonts""" - - fonts = textwrap.indent("\n".join(self.get_noto_cjk_vf_fonts()), " ") - if not fonts: - return None - - rel_file = os.path.relpath(__file__, os.getcwd()) - - msg = "=" * 77 + "\n" - msg += 'XeTeX is confused by "variable font" files listed below:\n' - msg += fonts + "\n" - msg += textwrap.dedent(f""" - For CJK pages in PDF, they need to be hidden from XeTeX by denylisting. - Or, CJK pages can be skipped by uninstalling texlive-xecjk. - - For more info on denylisting, other options, and variable font, run: - - tools/docs/check-variable-fonts.py -h - """) - msg += "=" * 77 - - return msg diff --git a/tools/docs/lib/parse_data_structs.py b/tools/docs/lib/parse_data_structs.py deleted file mode 100755 index 25361996cd20..000000000000 --- a/tools/docs/lib/parse_data_structs.py +++ /dev/null @@ -1,482 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0 -# Copyright (c) 2016-2025 by Mauro Carvalho Chehab . -# pylint: disable=R0912,R0915 - -""" -Parse a source file or header, creating ReStructured Text cross references. - -It accepts an optional file to change the default symbol reference or to -suppress symbols from the output. - -It is capable of identifying defines, functions, structs, typedefs, -enums and enum symbols and create cross-references for all of them. -It is also capable of distinguish #define used for specifying a Linux -ioctl. - -The optional rules file contains a set of rules like: - - ignore ioctl VIDIOC_ENUM_FMT - replace ioctl VIDIOC_DQBUF vidioc_qbuf - replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det` -""" - -import os -import re -import sys - - -class ParseDataStructs: - """ - Creates an enriched version of a Kernel header file with cross-links - to each C data structure type. - - It is meant to allow having a more comprehensive documentation, where - uAPI headers will create cross-reference links to the code. - - It is capable of identifying defines, functions, structs, typedefs, - enums and enum symbols and create cross-references for all of them. - It is also capable of distinguish #define used for specifying a Linux - ioctl. - - By default, it create rules for all symbols and defines, but it also - allows parsing an exception file. Such file contains a set of rules - using the syntax below: - - 1. Ignore rules: - - ignore ` - - Removes the symbol from reference generation. - - 2. Replace rules: - - replace - - Replaces how old_symbol with a new reference. The new_reference can be: - - - A simple symbol name; - - A full Sphinx reference. - - 3. Namespace rules - - namespace - - Sets C namespace to be used during cross-reference generation. Can - be overridden by replace rules. - - On ignore and replace rules, can be: - - ioctl: for defines that end with _IO*, e.g. ioctl definitions - - define: for other defines - - symbol: for symbols defined within enums; - - typedef: for typedefs; - - enum: for the name of a non-anonymous enum; - - struct: for structs. - - Examples: - - ignore define __LINUX_MEDIA_H - ignore ioctl VIDIOC_ENUM_FMT - replace ioctl VIDIOC_DQBUF vidioc_qbuf - replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det` - - namespace MC - """ - - # Parser regexes with multiple ways to capture enums and structs - RE_ENUMS = [ - re.compile(r"^\s*enum\s+([\w_]+)\s*\{"), - re.compile(r"^\s*enum\s+([\w_]+)\s*$"), - re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*\{"), - re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*$"), - ] - RE_STRUCTS = [ - re.compile(r"^\s*struct\s+([_\w][\w\d_]+)\s*\{"), - re.compile(r"^\s*struct\s+([_\w][\w\d_]+)$"), - re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)\s*\{"), - re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)$"), - ] - - # FIXME: the original code was written a long time before Sphinx C - # domain to have multiple namespaces. To avoid to much turn at the - # existing hyperlinks, the code kept using "c:type" instead of the - # right types. To change that, we need to change the types not only - # here, but also at the uAPI media documentation. - DEF_SYMBOL_TYPES = { - "ioctl": { - "prefix": "\\ ", - "suffix": "\\ ", - "ref_type": ":ref", - "description": "IOCTL Commands", - }, - "define": { - "prefix": "\\ ", - "suffix": "\\ ", - "ref_type": ":ref", - "description": "Macros and Definitions", - }, - # We're calling each definition inside an enum as "symbol" - "symbol": { - "prefix": "\\ ", - "suffix": "\\ ", - "ref_type": ":ref", - "description": "Enumeration values", - }, - "typedef": { - "prefix": "\\ ", - "suffix": "\\ ", - "ref_type": ":c:type", - "description": "Type Definitions", - }, - # This is the description of the enum itself - "enum": { - "prefix": "\\ ", - "suffix": "\\ ", - "ref_type": ":c:type", - "description": "Enumerations", - }, - "struct": { - "prefix": "\\ ", - "suffix": "\\ ", - "ref_type": ":c:type", - "description": "Structures", - }, - } - - def __init__(self, debug: bool = False): - """Initialize internal vars""" - self.debug = debug - self.data = "" - - self.symbols = {} - - self.namespace = None - self.ignore = [] - self.replace = [] - - for symbol_type in self.DEF_SYMBOL_TYPES: - self.symbols[symbol_type] = {} - - def read_exceptions(self, fname: str): - if not fname: - return - - name = os.path.basename(fname) - - with open(fname, "r", encoding="utf-8", errors="backslashreplace") as f: - for ln, line in enumerate(f): - ln += 1 - line = line.strip() - if not line or line.startswith("#"): - continue - - # ignore rules - match = re.match(r"^ignore\s+(\w+)\s+(\S+)", line) - - if match: - self.ignore.append((ln, match.group(1), match.group(2))) - continue - - # replace rules - match = re.match(r"^replace\s+(\S+)\s+(\S+)\s+(\S+)", line) - if match: - self.replace.append((ln, match.group(1), match.group(2), - match.group(3))) - continue - - match = re.match(r"^namespace\s+(\S+)", line) - if match: - self.namespace = match.group(1) - continue - - sys.exit(f"{name}:{ln}: invalid line: {line}") - - def apply_exceptions(self): - """ - Process exceptions file with rules to ignore or replace references. - """ - - # Handle ignore rules - for ln, c_type, symbol in self.ignore: - if c_type not in self.DEF_SYMBOL_TYPES: - sys.exit(f"{name}:{ln}: {c_type} is invalid") - - d = self.symbols[c_type] - if symbol in d: - del d[symbol] - - # Handle replace rules - for ln, c_type, old, new in self.replace: - if c_type not in self.DEF_SYMBOL_TYPES: - sys.exit(f"{name}:{ln}: {c_type} is invalid") - - reftype = None - - # Parse reference type when the type is specified - - match = re.match(r"^\:c\:(\w+)\:\`(.+)\`", new) - if match: - reftype = f":c:{match.group(1)}" - new = match.group(2) - else: - match = re.search(r"(\:ref)\:\`(.+)\`", new) - if match: - reftype = match.group(1) - new = match.group(2) - - # If the replacement rule doesn't have a type, get default - if not reftype: - reftype = self.DEF_SYMBOL_TYPES[c_type].get("ref_type") - if not reftype: - reftype = self.DEF_SYMBOL_TYPES[c_type].get("real_type") - - new_ref = f"{reftype}:`{old} <{new}>`" - - # Change self.symbols to use the replacement rule - if old in self.symbols[c_type]: - (_, ln) = self.symbols[c_type][old] - self.symbols[c_type][old] = (new_ref, ln) - else: - print(f"{name}:{ln}: Warning: can't find {old} {c_type}") - - def store_type(self, ln, symbol_type: str, symbol: str, - ref_name: str = None, replace_underscores: bool = True): - """ - Stores a new symbol at self.symbols under symbol_type. - - By default, underscores are replaced by "-" - """ - defs = self.DEF_SYMBOL_TYPES[symbol_type] - - prefix = defs.get("prefix", "") - suffix = defs.get("suffix", "") - ref_type = defs.get("ref_type") - - # Determine ref_link based on symbol type - if ref_type or self.namespace: - if not ref_name: - ref_name = symbol.lower() - - # c-type references don't support hash - if ref_type == ":ref" and replace_underscores: - ref_name = ref_name.replace("_", "-") - - # C domain references may have namespaces - if ref_type.startswith(":c:"): - if self.namespace: - ref_name = f"{self.namespace}.{ref_name}" - - if ref_type: - ref_link = f"{ref_type}:`{symbol} <{ref_name}>`" - else: - ref_link = f"`{symbol} <{ref_name}>`" - else: - ref_link = symbol - - self.symbols[symbol_type][symbol] = (f"{prefix}{ref_link}{suffix}", ln) - - def store_line(self, line): - """Stores a line at self.data, properly indented""" - line = " " + line.expandtabs() - self.data += line.rstrip(" ") - - def parse_file(self, file_in: str, exceptions: str = None): - """Reads a C source file and get identifiers""" - self.data = "" - is_enum = False - is_comment = False - multiline = "" - - self.read_exceptions(exceptions) - - with open(file_in, "r", - encoding="utf-8", errors="backslashreplace") as f: - for line_no, line in enumerate(f): - self.store_line(line) - line = line.strip("\n") - - # Handle continuation lines - if line.endswith(r"\\"): - multiline += line[-1] - continue - - if multiline: - line = multiline + line - multiline = "" - - # Handle comments. They can be multilined - if not is_comment: - if re.search(r"/\*.*", line): - is_comment = True - else: - # Strip C99-style comments - line = re.sub(r"(//.*)", "", line) - - if is_comment: - if re.search(r".*\*/", line): - is_comment = False - else: - multiline = line - continue - - # At this point, line variable may be a multilined statement, - # if lines end with \ or if they have multi-line comments - # With that, it can safely remove the entire comments, - # and there's no need to use re.DOTALL for the logic below - - line = re.sub(r"(/\*.*\*/)", "", line) - if not line.strip(): - continue - - # It can be useful for debug purposes to print the file after - # having comments stripped and multi-lines grouped. - if self.debug > 1: - print(f"line {line_no + 1}: {line}") - - # Now the fun begins: parse each type and store it. - - # We opted for a two parsing logic here due to: - # 1. it makes easier to debug issues not-parsed symbols; - # 2. we want symbol replacement at the entire content, not - # just when the symbol is detected. - - if is_enum: - match = re.match(r"^\s*([_\w][\w\d_]+)\s*[\,=]?", line) - if match: - self.store_type(line_no, "symbol", match.group(1)) - if "}" in line: - is_enum = False - continue - - match = re.match(r"^\s*#\s*define\s+([\w_]+)\s+_IO", line) - if match: - self.store_type(line_no, "ioctl", match.group(1), - replace_underscores=False) - continue - - match = re.match(r"^\s*#\s*define\s+([\w_]+)(\s+|$)", line) - if match: - self.store_type(line_no, "define", match.group(1)) - continue - - match = re.match(r"^\s*typedef\s+([_\w][\w\d_]+)\s+(.*)\s+([_\w][\w\d_]+);", - line) - if match: - name = match.group(2).strip() - symbol = match.group(3) - self.store_type(line_no, "typedef", symbol, ref_name=name) - continue - - for re_enum in self.RE_ENUMS: - match = re_enum.match(line) - if match: - self.store_type(line_no, "enum", match.group(1)) - is_enum = True - break - - for re_struct in self.RE_STRUCTS: - match = re_struct.match(line) - if match: - self.store_type(line_no, "struct", match.group(1)) - break - - self.apply_exceptions() - - def debug_print(self): - """ - Print debug information containing the replacement rules per symbol. - To make easier to check, group them per type. - """ - if not self.debug: - return - - for c_type, refs in self.symbols.items(): - if not refs: # Skip empty dictionaries - continue - - print(f"{c_type}:") - - for symbol, (ref, ln) in sorted(refs.items()): - print(f" #{ln:<5d} {symbol} -> {ref}") - - print() - - def gen_output(self): - """Write the formatted output to a file.""" - - # Avoid extra blank lines - text = re.sub(r"\s+$", "", self.data) + "\n" - text = re.sub(r"\n\s+\n", "\n\n", text) - - # Escape Sphinx special characters - text = re.sub(r"([\_\`\*\<\>\&\\\\:\/\|\%\$\#\{\}\~\^])", r"\\\1", text) - - # Source uAPI files may have special notes. Use bold font for them - text = re.sub(r"DEPRECATED", "**DEPRECATED**", text) - - # Delimiters to catch the entire symbol after escaped - start_delim = r"([ \n\t\(=\*\@])" - end_delim = r"(\s|,|\\=|\\:|\;|\)|\}|\{)" - - # Process all reference types - for ref_dict in self.symbols.values(): - for symbol, (replacement, _) in ref_dict.items(): - symbol = re.escape(re.sub(r"([\_\`\*\<\>\&\\\\:\/])", r"\\\1", symbol)) - text = re.sub(fr'{start_delim}{symbol}{end_delim}', - fr'\1{replacement}\2', text) - - # Remove "\ " where not needed: before spaces and at the end of lines - text = re.sub(r"\\ ([\n ])", r"\1", text) - text = re.sub(r" \\ ", " ", text) - - return text - - def gen_toc(self): - """ - Create a list of symbols to be part of a TOC contents table - """ - text = [] - - # Sort symbol types per description - symbol_descriptions = [] - for k, v in self.DEF_SYMBOL_TYPES.items(): - symbol_descriptions.append((v['description'], k)) - - symbol_descriptions.sort() - - # Process each category - for description, c_type in symbol_descriptions: - - refs = self.symbols[c_type] - if not refs: # Skip empty categories - continue - - text.append(f"{description}") - text.append("-" * len(description)) - text.append("") - - # Sort symbols alphabetically - for symbol, (ref, ln) in sorted(refs.items()): - text.append(f"- LINENO_{ln}: {ref}") - - text.append("") # Add empty line between categories - - return "\n".join(text) - - def write_output(self, file_in: str, file_out: str, toc: bool): - title = os.path.basename(file_in) - - if toc: - text = self.gen_toc() - else: - text = self.gen_output() - - with open(file_out, "w", encoding="utf-8", errors="backslashreplace") as f: - f.write(".. -*- coding: utf-8; mode: rst -*-\n\n") - f.write(f"{title}\n") - f.write("=" * len(title) + "\n\n") - - if not toc: - f.write(".. parsed-literal::\n\n") - - f.write(text) diff --git a/tools/docs/lib/python_version.py b/tools/docs/lib/python_version.py deleted file mode 100644 index 4fde1b882164..000000000000 --- a/tools/docs/lib/python_version.py +++ /dev/null @@ -1,178 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0-or-later -# Copyright (c) 2017-2025 Mauro Carvalho Chehab - -""" -Handle Python version check logic. - -Not all Python versions are supported by scripts. Yet, on some cases, -like during documentation build, a newer version of python could be -available. - -This class allows checking if the minimal requirements are followed. - -Better than that, PythonVersion.check_python() not only checks the minimal -requirements, but it automatically switches to a the newest available -Python version if present. - -""" - -import os -import re -import subprocess -import shlex -import sys - -from glob import glob -from textwrap import indent - -class PythonVersion: - """ - Ancillary methods that checks for missing dependencies for different - types of types, like binaries, python modules, rpm deps, etc. - """ - - def __init__(self, version): - """Ïnitialize self.version tuple from a version string""" - self.version = self.parse_version(version) - - @staticmethod - def parse_version(version): - """Convert a major.minor.patch version into a tuple""" - return tuple(int(x) for x in version.split(".")) - - @staticmethod - def ver_str(version): - """Returns a version tuple as major.minor.patch""" - return ".".join([str(x) for x in version]) - - @staticmethod - def cmd_print(cmd, max_len=80): - cmd_line = [] - - for w in cmd: - w = shlex.quote(w) - - if cmd_line: - if not max_len or len(cmd_line[-1]) + len(w) < max_len: - cmd_line[-1] += " " + w - continue - else: - cmd_line[-1] += " \\" - cmd_line.append(w) - else: - cmd_line.append(w) - - return "\n ".join(cmd_line) - - def __str__(self): - """Returns a version tuple as major.minor.patch from self.version""" - return self.ver_str(self.version) - - @staticmethod - def get_python_version(cmd): - """ - Get python version from a Python binary. As we need to detect if - are out there newer python binaries, we can't rely on sys.release here. - """ - - kwargs = {} - if sys.version_info < (3, 7): - kwargs['universal_newlines'] = True - else: - kwargs['text'] = True - - result = subprocess.run([cmd, "--version"], - stdout = subprocess.PIPE, - stderr = subprocess.PIPE, - **kwargs, check=False) - - version = result.stdout.strip() - - match = re.search(r"(\d+\.\d+\.\d+)", version) - if match: - return PythonVersion.parse_version(match.group(1)) - - print(f"Can't parse version {version}") - return (0, 0, 0) - - @staticmethod - def find_python(min_version): - """ - Detect if are out there any python 3.xy version newer than the - current one. - - Note: this routine is limited to up to 2 digits for python3. We - may need to update it one day, hopefully on a distant future. - """ - patterns = [ - "python3.[0-9][0-9]", - "python3.[0-9]", - ] - - python_cmd = [] - - # Seek for a python binary newer than min_version - for path in os.getenv("PATH", "").split(":"): - for pattern in patterns: - for cmd in glob(os.path.join(path, pattern)): - if os.path.isfile(cmd) and os.access(cmd, os.X_OK): - version = PythonVersion.get_python_version(cmd) - if version >= min_version: - python_cmd.append((version, cmd)) - - return sorted(python_cmd, reverse=True) - - @staticmethod - def check_python(min_version, show_alternatives=False, bail_out=False, - success_on_error=False): - """ - Check if the current python binary satisfies our minimal requirement - for Sphinx build. If not, re-run with a newer version if found. - """ - cur_ver = sys.version_info[:3] - if cur_ver >= min_version: - ver = PythonVersion.ver_str(cur_ver) - return - - python_ver = PythonVersion.ver_str(cur_ver) - - available_versions = PythonVersion.find_python(min_version) - if not available_versions: - print(f"ERROR: Python version {python_ver} is not spported anymore\n") - print(" Can't find a new version. This script may fail") - return - - script_path = os.path.abspath(sys.argv[0]) - - # Check possible alternatives - if available_versions: - new_python_cmd = available_versions[0][1] - else: - new_python_cmd = None - - if show_alternatives and available_versions: - print("You could run, instead:") - for _, cmd in available_versions: - args = [cmd, script_path] + sys.argv[1:] - - cmd_str = indent(PythonVersion.cmd_print(args), " ") - print(f"{cmd_str}\n") - - if bail_out: - msg = f"Python {python_ver} not supported. Bailing out" - if success_on_error: - print(msg, file=sys.stderr) - sys.exit(0) - else: - sys.exit(msg) - - print(f"Python {python_ver} not supported. Changing to {new_python_cmd}") - - # Restart script using the newer version - args = [new_python_cmd, script_path] + sys.argv[1:] - - try: - os.execv(new_python_cmd, args) - except OSError as e: - sys.exit(f"Failed to restart with {new_python_cmd}: {e}") diff --git a/tools/docs/parse-headers.py b/tools/docs/parse-headers.py index 6716c7300258..ed9cf2bf22de 100755 --- a/tools/docs/parse-headers.py +++ b/tools/docs/parse-headers.py @@ -24,10 +24,13 @@ The optional ``FILE_RULES`` contains a set of rules like: replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det` """ -import argparse +import argparse, sys +import os.path -from lib.parse_data_structs import ParseDataStructs -from lib.enrich_formatter import EnrichFormatter +src_dir = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, os.path.join(src_dir, '../lib/python/kdoc')) +from parse_data_structs import ParseDataStructs +from enrich_formatter import EnrichFormatter def main(): """Main function""" diff --git a/tools/docs/sphinx-build-wrapper b/tools/docs/sphinx-build-wrapper index 1efaca3d16aa..ce0b1b5292da 100755 --- a/tools/docs/sphinx-build-wrapper +++ b/tools/docs/sphinx-build-wrapper @@ -56,14 +56,15 @@ import sys from concurrent import futures from glob import glob -from lib.python_version import PythonVersion -from lib.latex_fonts import LatexFontChecker -LIB_DIR = "../../scripts/lib" +LIB_DIR = "../lib/python" SRC_DIR = os.path.dirname(os.path.realpath(__file__)) sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR)) +sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR + '/kdoc')) # temporary +from python_version import PythonVersion +from latex_fonts import LatexFontChecker from jobserver import JobserverExec # pylint: disable=C0413,C0411,E0401 # diff --git a/tools/docs/sphinx-pre-install b/tools/docs/sphinx-pre-install index 647e1f60357f..d8c9fb76948d 100755 --- a/tools/docs/sphinx-pre-install +++ b/tools/docs/sphinx-pre-install @@ -32,8 +32,11 @@ import re import subprocess import sys from glob import glob +import os.path -from lib.python_version import PythonVersion +src_dir = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, os.path.join(src_dir, '../lib/python/kdoc')) +from python_version import PythonVersion RECOMMENDED_VERSION = PythonVersion("3.4.3").version MIN_PYTHON_VERSION = PythonVersion("3.7").version diff --git a/tools/lib/python/abi/abi_parser.py b/tools/lib/python/abi/abi_parser.py new file mode 100644 index 000000000000..66a738013ce1 --- /dev/null +++ b/tools/lib/python/abi/abi_parser.py @@ -0,0 +1,628 @@ +#!/usr/bin/env python3 +# pylint: disable=R0902,R0903,R0911,R0912,R0913,R0914,R0915,R0917,C0302 +# Copyright(c) 2025: Mauro Carvalho Chehab . +# SPDX-License-Identifier: GPL-2.0 + +""" +Parse ABI documentation and produce results from it. +""" + +from argparse import Namespace +import logging +import os +import re + +from pprint import pformat +from random import randrange, seed + +# Import Python modules + +from helpers import AbiDebug, ABI_DIR + + +class AbiParser: + """Main class to parse ABI files""" + + TAGS = r"(what|where|date|kernelversion|contact|description|users)" + XREF = r"(?:^|\s|\()(\/(?:sys|config|proc|dev|kvd)\/[^,.:;\)\s]+)(?:[,.:;\)\s]|\Z)" + + def __init__(self, directory, logger=None, + enable_lineno=False, show_warnings=True, debug=0): + """Stores arguments for the class and initialize class vars""" + + self.directory = directory + self.enable_lineno = enable_lineno + self.show_warnings = show_warnings + self.debug = debug + + if not logger: + self.log = logging.getLogger("get_abi") + else: + self.log = logger + + self.data = {} + self.what_symbols = {} + self.file_refs = {} + self.what_refs = {} + + # Ignore files that contain such suffixes + self.ignore_suffixes = (".rej", ".org", ".orig", ".bak", "~") + + # Regular expressions used on parser + self.re_abi_dir = re.compile(r"(.*)" + ABI_DIR) + self.re_tag = re.compile(r"(\S+)(:\s*)(.*)", re.I) + self.re_valid = re.compile(self.TAGS) + self.re_start_spc = re.compile(r"(\s*)(\S.*)") + self.re_whitespace = re.compile(r"^\s+") + + # Regular used on print + self.re_what = re.compile(r"(\/?(?:[\w\-]+\/?){1,2})") + self.re_escape = re.compile(r"([\.\x01-\x08\x0e-\x1f\x21-\x2f\x3a-\x40\x7b-\xff])") + self.re_unprintable = re.compile(r"([\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\xff]+)") + self.re_title_mark = re.compile(r"\n[\-\*\=\^\~]+\n") + self.re_doc = re.compile(r"Documentation/(?!devicetree)(\S+)\.rst") + self.re_abi = re.compile(r"(Documentation/ABI/)([\w\/\-]+)") + self.re_xref_node = re.compile(self.XREF) + + def warn(self, fdata, msg, extra=None): + """Displays a parse error if warning is enabled""" + + if not self.show_warnings: + return + + msg = f"{fdata.fname}:{fdata.ln}: {msg}" + if extra: + msg += "\n\t\t" + extra + + self.log.warning(msg) + + def add_symbol(self, what, fname, ln=None, xref=None): + """Create a reference table describing where each 'what' is located""" + + if what not in self.what_symbols: + self.what_symbols[what] = {"file": {}} + + if fname not in self.what_symbols[what]["file"]: + self.what_symbols[what]["file"][fname] = [] + + if ln and ln not in self.what_symbols[what]["file"][fname]: + self.what_symbols[what]["file"][fname].append(ln) + + if xref: + self.what_symbols[what]["xref"] = xref + + def _parse_line(self, fdata, line): + """Parse a single line of an ABI file""" + + new_what = False + new_tag = False + content = None + + match = self.re_tag.match(line) + if match: + new = match.group(1).lower() + sep = match.group(2) + content = match.group(3) + + match = self.re_valid.search(new) + if match: + new_tag = match.group(1) + else: + if fdata.tag == "description": + # New "tag" is actually part of description. + # Don't consider it a tag + new_tag = False + elif fdata.tag != "": + self.warn(fdata, f"tag '{fdata.tag}' is invalid", line) + + if new_tag: + # "where" is Invalid, but was a common mistake. Warn if found + if new_tag == "where": + self.warn(fdata, "tag 'Where' is invalid. Should be 'What:' instead") + new_tag = "what" + + if new_tag == "what": + fdata.space = None + + if content not in self.what_symbols: + self.add_symbol(what=content, fname=fdata.fname, ln=fdata.ln) + + if fdata.tag == "what": + fdata.what.append(content.strip("\n")) + else: + if fdata.key: + if "description" not in self.data.get(fdata.key, {}): + self.warn(fdata, f"{fdata.key} doesn't have a description") + + for w in fdata.what: + self.add_symbol(what=w, fname=fdata.fname, + ln=fdata.what_ln, xref=fdata.key) + + fdata.label = content + new_what = True + + key = "abi_" + content.lower() + fdata.key = self.re_unprintable.sub("_", key).strip("_") + + # Avoid duplicated keys but using a defined seed, to make + # the namespace identical if there aren't changes at the + # ABI symbols + seed(42) + + while fdata.key in self.data: + char = randrange(0, 51) + ord("A") + if char > ord("Z"): + char += ord("a") - ord("Z") - 1 + + fdata.key += chr(char) + + if fdata.key and fdata.key not in self.data: + self.data[fdata.key] = { + "what": [content], + "file": [fdata.file_ref], + "path": fdata.ftype, + "line_no": fdata.ln, + } + + fdata.what = self.data[fdata.key]["what"] + + self.what_refs[content] = fdata.key + fdata.tag = new_tag + fdata.what_ln = fdata.ln + + if fdata.nametag["what"]: + t = (content, fdata.key) + if t not in fdata.nametag["symbols"]: + fdata.nametag["symbols"].append(t) + + return + + if fdata.tag and new_tag: + fdata.tag = new_tag + + if new_what: + fdata.label = "" + + if "description" in self.data[fdata.key]: + self.data[fdata.key]["description"] += "\n\n" + + if fdata.file_ref not in self.data[fdata.key]["file"]: + self.data[fdata.key]["file"].append(fdata.file_ref) + + if self.debug == AbiDebug.WHAT_PARSING: + self.log.debug("what: %s", fdata.what) + + if not fdata.what: + self.warn(fdata, "'What:' should come first:", line) + return + + if new_tag == "description": + fdata.space = None + + if content: + sep = sep.replace(":", " ") + + c = " " * len(new_tag) + sep + content + c = c.expandtabs() + + match = self.re_start_spc.match(c) + if match: + # Preserve initial spaces for the first line + fdata.space = match.group(1) + content = match.group(2) + "\n" + + self.data[fdata.key][fdata.tag] = content + + return + + # Store any contents before tags at the database + if not fdata.tag and "what" in fdata.nametag: + fdata.nametag["description"] += line + return + + if fdata.tag == "description": + content = line.expandtabs() + + if self.re_whitespace.sub("", content) == "": + self.data[fdata.key][fdata.tag] += "\n" + return + + if fdata.space is None: + match = self.re_start_spc.match(content) + if match: + # Preserve initial spaces for the first line + fdata.space = match.group(1) + + content = match.group(2) + "\n" + else: + if content.startswith(fdata.space): + content = content[len(fdata.space):] + + else: + fdata.space = "" + + if fdata.tag == "what": + w = content.strip("\n") + if w: + self.data[fdata.key][fdata.tag].append(w) + else: + self.data[fdata.key][fdata.tag] += content + return + + content = line.strip() + if fdata.tag: + if fdata.tag == "what": + w = content.strip("\n") + if w: + self.data[fdata.key][fdata.tag].append(w) + else: + self.data[fdata.key][fdata.tag] += "\n" + content.rstrip("\n") + return + + # Everything else is error + if content: + self.warn(fdata, "Unexpected content", line) + + def parse_readme(self, nametag, fname): + """Parse ABI README file""" + + nametag["what"] = ["Introduction"] + nametag["path"] = "README" + with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp: + for line in fp: + match = self.re_tag.match(line) + if match: + new = match.group(1).lower() + + match = self.re_valid.search(new) + if match: + nametag["description"] += "\n:" + line + continue + + nametag["description"] += line + + def parse_file(self, fname, path, basename): + """Parse a single file""" + + ref = f"abi_file_{path}_{basename}" + ref = self.re_unprintable.sub("_", ref).strip("_") + + # Store per-file state into a namespace variable. This will be used + # by the per-line parser state machine and by the warning function. + fdata = Namespace + + fdata.fname = fname + fdata.name = basename + + pos = fname.find(ABI_DIR) + if pos > 0: + f = fname[pos:] + else: + f = fname + + fdata.file_ref = (f, ref) + self.file_refs[f] = ref + + fdata.ln = 0 + fdata.what_ln = 0 + fdata.tag = "" + fdata.label = "" + fdata.what = [] + fdata.key = None + fdata.xrefs = None + fdata.space = None + fdata.ftype = path.split("/")[0] + + fdata.nametag = {} + fdata.nametag["what"] = [f"ABI file {path}/{basename}"] + fdata.nametag["type"] = "File" + fdata.nametag["path"] = fdata.ftype + fdata.nametag["file"] = [fdata.file_ref] + fdata.nametag["line_no"] = 1 + fdata.nametag["description"] = "" + fdata.nametag["symbols"] = [] + + self.data[ref] = fdata.nametag + + if self.debug & AbiDebug.WHAT_OPEN: + self.log.debug("Opening file %s", fname) + + if basename == "README": + self.parse_readme(fdata.nametag, fname) + return + + with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp: + for line in fp: + fdata.ln += 1 + + self._parse_line(fdata, line) + + if "description" in fdata.nametag: + fdata.nametag["description"] = fdata.nametag["description"].lstrip("\n") + + if fdata.key: + if "description" not in self.data.get(fdata.key, {}): + self.warn(fdata, f"{fdata.key} doesn't have a description") + + for w in fdata.what: + self.add_symbol(what=w, fname=fname, xref=fdata.key) + + def _parse_abi(self, root=None): + """Internal function to parse documentation ABI recursively""" + + if not root: + root = self.directory + + with os.scandir(root) as obj: + for entry in obj: + name = os.path.join(root, entry.name) + + if entry.is_dir(): + self._parse_abi(name) + continue + + if not entry.is_file(): + continue + + basename = os.path.basename(name) + + if basename.startswith("."): + continue + + if basename.endswith(self.ignore_suffixes): + continue + + path = self.re_abi_dir.sub("", os.path.dirname(name)) + + self.parse_file(name, path, basename) + + def parse_abi(self, root=None): + """Parse documentation ABI""" + + self._parse_abi(root) + + if self.debug & AbiDebug.DUMP_ABI_STRUCTS: + self.log.debug(pformat(self.data)) + + def desc_txt(self, desc): + """Print description as found inside ABI files""" + + desc = desc.strip(" \t\n") + + return desc + "\n\n" + + def xref(self, fname): + """ + Converts a Documentation/ABI + basename into a ReST cross-reference + """ + + xref = self.file_refs.get(fname) + if not xref: + return None + else: + return xref + + def desc_rst(self, desc): + """Enrich ReST output by creating cross-references""" + + # Remove title markups from the description + # Having titles inside ABI files will only work if extra + # care would be taken in order to strictly follow the same + # level order for each markup. + desc = self.re_title_mark.sub("\n\n", "\n" + desc) + desc = desc.rstrip(" \t\n").lstrip("\n") + + # Python's regex performance for non-compiled expressions is a lot + # than Perl, as Perl automatically caches them at their + # first usage. Here, we'll need to do the same, as otherwise the + # performance penalty is be high + + new_desc = "" + for d in desc.split("\n"): + if d == "": + new_desc += "\n" + continue + + # Use cross-references for doc files where needed + d = self.re_doc.sub(r":doc:`/\1`", d) + + # Use cross-references for ABI generated docs where needed + matches = self.re_abi.findall(d) + for m in matches: + abi = m[0] + m[1] + + xref = self.file_refs.get(abi) + if not xref: + # This may happen if ABI is on a separate directory, + # like parsing ABI testing and symbol is at stable. + # The proper solution is to move this part of the code + # for it to be inside sphinx/kernel_abi.py + self.log.info("Didn't find ABI reference for '%s'", abi) + else: + new = self.re_escape.sub(r"\\\1", m[1]) + d = re.sub(fr"\b{abi}\b", f":ref:`{new} <{xref}>`", d) + + # Seek for cross reference symbols like /sys/... + # Need to be careful to avoid doing it on a code block + if d[0] not in [" ", "\t"]: + matches = self.re_xref_node.findall(d) + for m in matches: + # Finding ABI here is more complex due to wildcards + xref = self.what_refs.get(m) + if xref: + new = self.re_escape.sub(r"\\\1", m) + d = re.sub(fr"\b{m}\b", f":ref:`{new} <{xref}>`", d) + + new_desc += d + "\n" + + return new_desc + "\n\n" + + def doc(self, output_in_txt=False, show_symbols=True, show_file=True, + filter_path=None): + """Print ABI at stdout""" + + part = None + for key, v in sorted(self.data.items(), + key=lambda x: (x[1].get("type", ""), + x[1].get("what"))): + + wtype = v.get("type", "Symbol") + file_ref = v.get("file") + names = v.get("what", [""]) + + if wtype == "File": + if not show_file: + continue + else: + if not show_symbols: + continue + + if filter_path: + if v.get("path") != filter_path: + continue + + msg = "" + + if wtype != "File": + cur_part = names[0] + if cur_part.find("/") >= 0: + match = self.re_what.match(cur_part) + if match: + symbol = match.group(1).rstrip("/") + cur_part = "Symbols under " + symbol + + if cur_part and cur_part != part: + part = cur_part + msg += part + "\n"+ "-" * len(part) +"\n\n" + + msg += f".. _{key}:\n\n" + + max_len = 0 + for i in range(0, len(names)): # pylint: disable=C0200 + names[i] = "**" + self.re_escape.sub(r"\\\1", names[i]) + "**" + + max_len = max(max_len, len(names[i])) + + msg += "+-" + "-" * max_len + "-+\n" + for name in names: + msg += f"| {name}" + " " * (max_len - len(name)) + " |\n" + msg += "+-" + "-" * max_len + "-+\n" + msg += "\n" + + for ref in file_ref: + if wtype == "File": + msg += f".. _{ref[1]}:\n\n" + else: + base = os.path.basename(ref[0]) + msg += f"Defined on file :ref:`{base} <{ref[1]}>`\n\n" + + if wtype == "File": + msg += names[0] +"\n" + "-" * len(names[0]) +"\n\n" + + desc = v.get("description") + if not desc and wtype != "File": + msg += f"DESCRIPTION MISSING for {names[0]}\n\n" + + if desc: + if output_in_txt: + msg += self.desc_txt(desc) + else: + msg += self.desc_rst(desc) + + symbols = v.get("symbols") + if symbols: + msg += "Has the following ABI:\n\n" + + for w, label in symbols: + # Escape special chars from content + content = self.re_escape.sub(r"\\\1", w) + + msg += f"- :ref:`{content} <{label}>`\n\n" + + users = v.get("users") + if users and users.strip(" \t\n"): + users = users.strip("\n").replace('\n', '\n\t') + msg += f"Users:\n\t{users}\n\n" + + ln = v.get("line_no", 1) + + yield (msg, file_ref[0][0], ln) + + def check_issues(self): + """Warn about duplicated ABI entries""" + + for what, v in self.what_symbols.items(): + files = v.get("file") + if not files: + # Should never happen if the parser works properly + self.log.warning("%s doesn't have a file associated", what) + continue + + if len(files) == 1: + continue + + f = [] + for fname, lines in sorted(files.items()): + if not lines: + f.append(f"{fname}") + elif len(lines) == 1: + f.append(f"{fname}:{lines[0]}") + else: + m = fname + "lines " + m += ", ".join(str(x) for x in lines) + f.append(m) + + self.log.warning("%s is defined %d times: %s", what, len(f), "; ".join(f)) + + def search_symbols(self, expr): + """ Searches for ABI symbols """ + + regex = re.compile(expr, re.I) + + found_keys = 0 + for t in sorted(self.data.items(), key=lambda x: [0]): + v = t[1] + + wtype = v.get("type", "") + if wtype == "File": + continue + + for what in v.get("what", [""]): + if regex.search(what): + found_keys += 1 + + kernelversion = v.get("kernelversion", "").strip(" \t\n") + date = v.get("date", "").strip(" \t\n") + contact = v.get("contact", "").strip(" \t\n") + users = v.get("users", "").strip(" \t\n") + desc = v.get("description", "").strip(" \t\n") + + files = [] + for f in v.get("file", ()): + files.append(f[0]) + + what = str(found_keys) + ". " + what + title_tag = "-" * len(what) + + print(f"\n{what}\n{title_tag}\n") + + if kernelversion: + print(f"Kernel version:\t\t{kernelversion}") + + if date: + print(f"Date:\t\t\t{date}") + + if contact: + print(f"Contact:\t\t{contact}") + + if users: + print(f"Users:\t\t\t{users}") + + print("Defined on file(s):\t" + ", ".join(files)) + + if desc: + desc = desc.strip("\n") + print(f"\n{desc}\n") + + if not found_keys: + print(f"Regular expression /{expr}/ not found.") diff --git a/tools/lib/python/abi/abi_regex.py b/tools/lib/python/abi/abi_regex.py new file mode 100644 index 000000000000..8a57846cbc69 --- /dev/null +++ b/tools/lib/python/abi/abi_regex.py @@ -0,0 +1,234 @@ +#!/usr/bin/env python3 +# xxpylint: disable=R0903 +# Copyright(c) 2025: Mauro Carvalho Chehab . +# SPDX-License-Identifier: GPL-2.0 + +""" +Convert ABI what into regular expressions +""" + +import re +import sys + +from pprint import pformat + +from abi_parser import AbiParser +from helpers import AbiDebug + +class AbiRegex(AbiParser): + """Extends AbiParser to search ABI nodes with regular expressions""" + + # Escape only ASCII visible characters + escape_symbols = r"([\x21-\x29\x2b-\x2d\x3a-\x40\x5c\x60\x7b-\x7e])" + leave_others = "others" + + # Tuples with regular expressions to be compiled and replacement data + re_whats = [ + # Drop escape characters that might exist + (re.compile("\\\\"), ""), + + # Temporarily escape dot characters + (re.compile(r"\."), "\xf6"), + + # Temporarily change [0-9]+ type of patterns + (re.compile(r"\[0\-9\]\+"), "\xff"), + + # Temporarily change [\d+-\d+] type of patterns + (re.compile(r"\[0\-\d+\]"), "\xff"), + (re.compile(r"\[0:\d+\]"), "\xff"), + (re.compile(r"\[(\d+)\]"), "\xf4\\\\d+\xf5"), + + # Temporarily change [0-9] type of patterns + (re.compile(r"\[(\d)\-(\d)\]"), "\xf4\1-\2\xf5"), + + # Handle multiple option patterns + (re.compile(r"[\{\<\[]([\w_]+)(?:[,|]+([\w_]+)){1,}[\}\>\]]"), r"(\1|\2)"), + + # Handle wildcards + (re.compile(r"([^\/])\*"), "\\1\\\\w\xf7"), + (re.compile(r"/\*/"), "/.*/"), + (re.compile(r"/\xf6\xf6\xf6"), "/.*"), + (re.compile(r"\<[^\>]+\>"), "\\\\w\xf7"), + (re.compile(r"\{[^\}]+\}"), "\\\\w\xf7"), + (re.compile(r"\[[^\]]+\]"), "\\\\w\xf7"), + + (re.compile(r"XX+"), "\\\\w\xf7"), + (re.compile(r"([^A-Z])[XYZ]([^A-Z])"), "\\1\\\\w\xf7\\2"), + (re.compile(r"([^A-Z])[XYZ]$"), "\\1\\\\w\xf7"), + (re.compile(r"_[AB]_"), "_\\\\w\xf7_"), + + # Recover [0-9] type of patterns + (re.compile(r"\xf4"), "["), + (re.compile(r"\xf5"), "]"), + + # Remove duplicated spaces + (re.compile(r"\s+"), r" "), + + # Special case: drop comparison as in: + # What: foo = + # (this happens on a few IIO definitions) + (re.compile(r"\s*\=.*$"), ""), + + # Escape all other symbols + (re.compile(escape_symbols), r"\\\1"), + (re.compile(r"\\\\"), r"\\"), + (re.compile(r"\\([\[\]\(\)\|])"), r"\1"), + (re.compile(r"(\d+)\\(-\d+)"), r"\1\2"), + + (re.compile(r"\xff"), r"\\d+"), + + # Special case: IIO ABI which a parenthesis. + (re.compile(r"sqrt(.*)"), r"sqrt(.*)"), + + # Simplify regexes with multiple .* + (re.compile(r"(?:\.\*){2,}"), ""), + + # Recover dot characters + (re.compile(r"\xf6"), "\\."), + # Recover plus characters + (re.compile(r"\xf7"), "+"), + ] + re_has_num = re.compile(r"\\d") + + # Symbol name after escape_chars that are considered a devnode basename + re_symbol_name = re.compile(r"(\w|\\[\.\-\:])+$") + + # List of popular group names to be skipped to minimize regex group size + # Use AbiDebug.SUBGROUP_SIZE to detect those + skip_names = set(["devices", "hwmon"]) + + def regex_append(self, what, new): + """ + Get a search group for a subset of regular expressions. + + As ABI may have thousands of symbols, using a for to search all + regular expressions is at least O(n^2). When there are wildcards, + the complexity increases substantially, eventually becoming exponential. + + To avoid spending too much time on them, use a logic to split + them into groups. The smaller the group, the better, as it would + mean that searches will be confined to a small number of regular + expressions. + + The conversion to a regex subset is tricky, as we need something + that can be easily obtained from the sysfs symbol and from the + regular expression. So, we need to discard nodes that have + wildcards. + + If it can't obtain a subgroup, place the regular expression inside + a special group (self.leave_others). + """ + + search_group = None + + for search_group in reversed(new.split("/")): + if not search_group or search_group in self.skip_names: + continue + if self.re_symbol_name.match(search_group): + break + + if not search_group: + search_group = self.leave_others + + if self.debug & AbiDebug.SUBGROUP_MAP: + self.log.debug("%s: mapped as %s", what, search_group) + + try: + if search_group not in self.regex_group: + self.regex_group[search_group] = [] + + self.regex_group[search_group].append(re.compile(new)) + if self.search_string: + if what.find(self.search_string) >= 0: + print(f"What: {what}") + except re.PatternError: + self.log.warning("Ignoring '%s' as it produced an invalid regex:\n" + " '%s'", what, new) + + def get_regexes(self, what): + """ + Given an ABI devnode, return a list of all regular expressions that + may match it, based on the sub-groups created by regex_append() + """ + + re_list = [] + + patches = what.split("/") + patches.reverse() + patches.append(self.leave_others) + + for search_group in patches: + if search_group in self.regex_group: + re_list += self.regex_group[search_group] + + return re_list + + def __init__(self, *args, **kwargs): + """ + Override init method to get verbose argument + """ + + self.regex_group = None + self.search_string = None + self.re_string = None + + if "search_string" in kwargs: + self.search_string = kwargs.get("search_string") + del kwargs["search_string"] + + if self.search_string: + + try: + self.re_string = re.compile(self.search_string) + except re.PatternError as e: + msg = f"{self.search_string} is not a valid regular expression" + raise ValueError(msg) from e + + super().__init__(*args, **kwargs) + + def parse_abi(self, *args, **kwargs): + + super().parse_abi(*args, **kwargs) + + self.regex_group = {} + + print("Converting ABI What fields into regexes...", file=sys.stderr) + + for t in sorted(self.data.items(), key=lambda x: x[0]): + v = t[1] + if v.get("type") == "File": + continue + + v["regex"] = [] + + for what in v.get("what", []): + if not what.startswith("/sys"): + continue + + new = what + for r, s in self.re_whats: + try: + new = r.sub(s, new) + except re.PatternError as e: + # Help debugging troubles with new regexes + raise re.PatternError(f"{e}\nwhile re.sub('{r.pattern}', {s}, str)") from e + + v["regex"].append(new) + + if self.debug & AbiDebug.REGEX: + self.log.debug("%-90s <== %s", new, what) + + # Store regex into a subgroup to speedup searches + self.regex_append(what, new) + + if self.debug & AbiDebug.SUBGROUP_DICT: + self.log.debug("%s", pformat(self.regex_group)) + + if self.debug & AbiDebug.SUBGROUP_SIZE: + biggestd_keys = sorted(self.regex_group.keys(), + key= lambda k: len(self.regex_group[k]), + reverse=True) + + print("Top regex subgroups:", file=sys.stderr) + for k in biggestd_keys[:10]: + print(f"{k} has {len(self.regex_group[k])} elements", file=sys.stderr) diff --git a/tools/lib/python/abi/helpers.py b/tools/lib/python/abi/helpers.py new file mode 100644 index 000000000000..639b23e4ca33 --- /dev/null +++ b/tools/lib/python/abi/helpers.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python3 +# Copyright(c) 2025: Mauro Carvalho Chehab . +# pylint: disable=R0903 +# SPDX-License-Identifier: GPL-2.0 + +""" +Helper classes for ABI parser +""" + +ABI_DIR = "Documentation/ABI/" + + +class AbiDebug: + """Debug levels""" + + WHAT_PARSING = 1 + WHAT_OPEN = 2 + DUMP_ABI_STRUCTS = 4 + UNDEFINED = 8 + REGEX = 16 + SUBGROUP_MAP = 32 + SUBGROUP_DICT = 64 + SUBGROUP_SIZE = 128 + GRAPH = 256 + + +DEBUG_HELP = """ +1 - enable debug parsing logic +2 - enable debug messages on file open +4 - enable debug for ABI parse data +8 - enable extra debug information to identify troubles + with ABI symbols found at the local machine that + weren't found on ABI documentation (used only for + undefined subcommand) +16 - enable debug for what to regex conversion +32 - enable debug for symbol regex subgroups +64 - enable debug for sysfs graph tree variable +""" diff --git a/tools/lib/python/abi/system_symbols.py b/tools/lib/python/abi/system_symbols.py new file mode 100644 index 000000000000..f15c94a6e33c --- /dev/null +++ b/tools/lib/python/abi/system_symbols.py @@ -0,0 +1,378 @@ +#!/usr/bin/env python3 +# pylint: disable=R0902,R0912,R0914,R0915,R1702 +# Copyright(c) 2025: Mauro Carvalho Chehab . +# SPDX-License-Identifier: GPL-2.0 + +""" +Parse ABI documentation and produce results from it. +""" + +import os +import re +import sys + +from concurrent import futures +from datetime import datetime +from random import shuffle + +from helpers import AbiDebug + +class SystemSymbols: + """Stores arguments for the class and initialize class vars""" + + def graph_add_file(self, path, link=None): + """ + add a file path to the sysfs graph stored at self.root + """ + + if path in self.files: + return + + name = "" + ref = self.root + for edge in path.split("/"): + name += edge + "/" + if edge not in ref: + ref[edge] = {"__name": [name.rstrip("/")]} + + ref = ref[edge] + + if link and link not in ref["__name"]: + ref["__name"].append(link.rstrip("/")) + + self.files.add(path) + + def print_graph(self, root_prefix="", root=None, level=0): + """Prints a reference tree graph using UTF-8 characters""" + + if not root: + root = self.root + level = 0 + + # Prevent endless traverse + if level > 5: + return + + if level > 0: + prefix = "├──" + last_prefix = "└──" + else: + prefix = "" + last_prefix = "" + + items = list(root.items()) + + names = root.get("__name", []) + for k, edge in items: + if k == "__name": + continue + + if not k: + k = "/" + + if len(names) > 1: + k += " links: " + ",".join(names[1:]) + + if edge == items[-1][1]: + print(root_prefix + last_prefix + k) + p = root_prefix + if level > 0: + p += " " + self.print_graph(p, edge, level + 1) + else: + print(root_prefix + prefix + k) + p = root_prefix + "│ " + self.print_graph(p, edge, level + 1) + + def _walk(self, root): + """ + Walk through sysfs to get all devnodes that aren't ignored. + + By default, uses /sys as sysfs mounting point. If another + directory is used, it replaces them to /sys at the patches. + """ + + with os.scandir(root) as obj: + for entry in obj: + path = os.path.join(root, entry.name) + if self.sysfs: + p = path.replace(self.sysfs, "/sys", count=1) + else: + p = path + + if self.re_ignore.search(p): + return + + # Handle link first to avoid directory recursion + if entry.is_symlink(): + real = os.path.realpath(path) + if not self.sysfs: + self.aliases[path] = real + else: + real = real.replace(self.sysfs, "/sys", count=1) + + # Add absfile location to graph if it doesn't exist + if not self.re_ignore.search(real): + # Add link to the graph + self.graph_add_file(real, p) + + elif entry.is_file(): + self.graph_add_file(p) + + elif entry.is_dir(): + self._walk(path) + + def __init__(self, abi, sysfs="/sys", hints=False): + """ + Initialize internal variables and get a list of all files inside + sysfs that can currently be parsed. + + Please notice that there are several entries on sysfs that aren't + documented as ABI. Ignore those. + + The real paths will be stored under self.files. Aliases will be + stored in separate, as self.aliases. + """ + + self.abi = abi + self.log = abi.log + + if sysfs != "/sys": + self.sysfs = sysfs.rstrip("/") + else: + self.sysfs = None + + self.hints = hints + + self.root = {} + self.aliases = {} + self.files = set() + + dont_walk = [ + # Those require root access and aren't documented at ABI + f"^{sysfs}/kernel/debug", + f"^{sysfs}/kernel/tracing", + f"^{sysfs}/fs/pstore", + f"^{sysfs}/fs/bpf", + f"^{sysfs}/fs/fuse", + + # This is not documented at ABI + f"^{sysfs}/module", + + f"^{sysfs}/fs/cgroup", # this is big and has zero docs under ABI + f"^{sysfs}/firmware", # documented elsewhere: ACPI, DT bindings + "sections|notes", # aren't actually part of ABI + + # kernel-parameters.txt - not easy to parse + "parameters", + ] + + self.re_ignore = re.compile("|".join(dont_walk)) + + print(f"Reading {sysfs} directory contents...", file=sys.stderr) + self._walk(sysfs) + + def check_file(self, refs, found): + """Check missing ABI symbols for a given sysfs file""" + + res_list = [] + + try: + for names in refs: + fname = names[0] + + res = { + "found": False, + "fname": fname, + "msg": "", + } + res_list.append(res) + + re_what = self.abi.get_regexes(fname) + if not re_what: + self.abi.log.warning(f"missing rules for {fname}") + continue + + for name in names: + for r in re_what: + if self.abi.debug & AbiDebug.UNDEFINED: + self.log.debug("check if %s matches '%s'", name, r.pattern) + if r.match(name): + res["found"] = True + if found: + res["msg"] += f" {fname}: regex:\n\t" + continue + + if self.hints and not res["found"]: + res["msg"] += f" {fname} not found. Tested regexes:\n" + for r in re_what: + res["msg"] += " " + r.pattern + "\n" + + except KeyboardInterrupt: + pass + + return res_list + + def _ref_interactor(self, root): + """Recursive function to interact over the sysfs tree""" + + for k, v in root.items(): + if isinstance(v, dict): + yield from self._ref_interactor(v) + + if root == self.root or k == "__name": + continue + + if self.abi.re_string: + fname = v["__name"][0] + if self.abi.re_string.search(fname): + yield v + else: + yield v + + + def get_fileref(self, all_refs, chunk_size): + """Interactor to group refs into chunks""" + + n = 0 + refs = [] + + for ref in all_refs: + refs.append(ref) + + n += 1 + if n >= chunk_size: + yield refs + n = 0 + refs = [] + + yield refs + + def check_undefined_symbols(self, max_workers=None, chunk_size=50, + found=None, dry_run=None): + """Seach ABI for sysfs symbols missing documentation""" + + self.abi.parse_abi() + + if self.abi.debug & AbiDebug.GRAPH: + self.print_graph() + + all_refs = [] + for ref in self._ref_interactor(self.root): + all_refs.append(ref["__name"]) + + if dry_run: + print("Would check", file=sys.stderr) + for ref in all_refs: + print(", ".join(ref)) + + return + + print("Starting to search symbols (it may take several minutes):", + file=sys.stderr) + start = datetime.now() + old_elapsed = None + + # Python doesn't support multithreading due to limitations on its + # global lock (GIL). While Python 3.13 finally made GIL optional, + # there are still issues related to it. Also, we want to have + # backward compatibility with older versions of Python. + # + # So, use instead multiprocess. However, Python is very slow passing + # data from/to multiple processes. Also, it may consume lots of memory + # if the data to be shared is not small. So, we need to group workload + # in chunks that are big enough to generate performance gains while + # not being so big that would cause out-of-memory. + + num_refs = len(all_refs) + print(f"Number of references to parse: {num_refs}", file=sys.stderr) + + if not max_workers: + max_workers = os.cpu_count() + elif max_workers > os.cpu_count(): + max_workers = os.cpu_count() + + max_workers = max(max_workers, 1) + + max_chunk_size = int((num_refs + max_workers - 1) / max_workers) + chunk_size = min(chunk_size, max_chunk_size) + chunk_size = max(1, chunk_size) + + if max_workers > 1: + executor = futures.ProcessPoolExecutor + + # Place references in a random order. This may help improving + # performance, by mixing complex/simple expressions when creating + # chunks + shuffle(all_refs) + else: + # Python has a high overhead with processes. When there's just + # one worker, it is faster to not create a new process. + # Yet, User still deserves to have a progress print. So, use + # python's "thread", which is actually a single process, using + # an internal schedule to switch between tasks. No performance + # gains for non-IO tasks, but still it can be quickly interrupted + # from time to time to display progress. + executor = futures.ThreadPoolExecutor + + not_found = [] + f_list = [] + with executor(max_workers=max_workers) as exe: + for refs in self.get_fileref(all_refs, chunk_size): + if refs: + try: + f_list.append(exe.submit(self.check_file, refs, found)) + + except KeyboardInterrupt: + return + + total = len(f_list) + + if not total: + if self.abi.re_string: + print(f"No ABI symbol matches {self.abi.search_string}") + else: + self.abi.log.warning("No ABI symbols found") + return + + print(f"{len(f_list):6d} jobs queued on {max_workers} workers", + file=sys.stderr) + + while f_list: + try: + t = futures.wait(f_list, timeout=1, + return_when=futures.FIRST_COMPLETED) + + done = t[0] + + for fut in done: + res_list = fut.result() + + for res in res_list: + if not res["found"]: + not_found.append(res["fname"]) + if res["msg"]: + print(res["msg"]) + + f_list.remove(fut) + except KeyboardInterrupt: + return + + except RuntimeError as e: + self.abi.log.warning(f"Future: {e}") + break + + if sys.stderr.isatty(): + elapsed = str(datetime.now() - start).split(".", maxsplit=1)[0] + if len(f_list) < total: + elapsed += f" ({total - len(f_list)}/{total} jobs completed). " + if elapsed != old_elapsed: + print(elapsed + "\r", end="", flush=True, + file=sys.stderr) + old_elapsed = elapsed + + elapsed = str(datetime.now() - start).split(".", maxsplit=1)[0] + print(elapsed, file=sys.stderr) + + for f in sorted(not_found): + print(f"{f} not found.") diff --git a/tools/lib/python/jobserver.py b/tools/lib/python/jobserver.py new file mode 100755 index 000000000000..a24f30ef4fa8 --- /dev/null +++ b/tools/lib/python/jobserver.py @@ -0,0 +1,149 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0+ +# +# pylint: disable=C0103,C0209 +# +# + +""" +Interacts with the POSIX jobserver during the Kernel build time. + +A "normal" jobserver task, like the one initiated by a make subrocess would do: + + - open read/write file descriptors to communicate with the job server; + - ask for one slot by calling: + claim = os.read(reader, 1) + - when the job finshes, call: + os.write(writer, b"+") # os.write(writer, claim) + +Here, the goal is different: This script aims to get the remaining number +of slots available, using all of them to run a command which handle tasks in +parallel. To to that, it has a loop that ends only after there are no +slots left. It then increments the number by one, in order to allow a +call equivalent to make -j$((claim+1)), e.g. having a parent make creating +$claim child to do the actual work. + +The end goal here is to keep the total number of build tasks under the +limit established by the initial make -j$n_proc call. + +See: + https://www.gnu.org/software/make/manual/html_node/POSIX-Jobserver.html#POSIX-Jobserver +""" + +import errno +import os +import subprocess +import sys + +class JobserverExec: + """ + Claim all slots from make using POSIX Jobserver. + + The main methods here are: + - open(): reserves all slots; + - close(): method returns all used slots back to make; + - run(): executes a command setting PARALLELISM= + """ + + def __init__(self): + """Initialize internal vars""" + self.claim = 0 + self.jobs = b"" + self.reader = None + self.writer = None + self.is_open = False + + def open(self): + """Reserve all available slots to be claimed later on""" + + if self.is_open: + return + + try: + # Fetch the make environment options. + flags = os.environ["MAKEFLAGS"] + # Look for "--jobserver=R,W" + # Note that GNU Make has used --jobserver-fds and --jobserver-auth + # so this handles all of them. + opts = [x for x in flags.split(" ") if x.startswith("--jobserver")] + + # Parse out R,W file descriptor numbers and set them nonblocking. + # If the MAKEFLAGS variable contains multiple instances of the + # --jobserver-auth= option, the last one is relevant. + fds = opts[-1].split("=", 1)[1] + + # Starting with GNU Make 4.4, named pipes are used for reader + # and writer. + # Example argument: --jobserver-auth=fifo:/tmp/GMfifo8134 + _, _, path = fds.partition("fifo:") + + if path: + self.reader = os.open(path, os.O_RDONLY | os.O_NONBLOCK) + self.writer = os.open(path, os.O_WRONLY) + else: + self.reader, self.writer = [int(x) for x in fds.split(",", 1)] + # Open a private copy of reader to avoid setting nonblocking + # on an unexpecting process with the same reader fd. + self.reader = os.open("/proc/self/fd/%d" % (self.reader), + os.O_RDONLY | os.O_NONBLOCK) + + # Read out as many jobserver slots as possible + while True: + try: + slot = os.read(self.reader, 8) + self.jobs += slot + except (OSError, IOError) as e: + if e.errno == errno.EWOULDBLOCK: + # Stop at the end of the jobserver queue. + break + # If something went wrong, give back the jobs. + if self.jobs: + os.write(self.writer, self.jobs) + raise e + + # Add a bump for our caller's reserveration, since we're just going + # to sit here blocked on our child. + self.claim = len(self.jobs) + 1 + + except (KeyError, IndexError, ValueError, OSError, IOError): + # Any missing environment strings or bad fds should result in just + # not being parallel. + self.claim = None + + self.is_open = True + + def close(self): + """Return all reserved slots to Jobserver""" + + if not self.is_open: + return + + # Return all the reserved slots. + if len(self.jobs): + os.write(self.writer, self.jobs) + + self.is_open = False + + def __enter__(self): + self.open() + return self + + def __exit__(self, exc_type, exc_value, exc_traceback): + self.close() + + def run(self, cmd, *args, **pwargs): + """ + Run a command setting PARALLELISM env variable to the number of + available job slots (claim) + 1, e.g. it will reserve claim slots + to do the actual build work, plus one to monitor its children. + """ + self.open() # Ensure that self.claim is set + + # We can only claim parallelism if there was a jobserver (i.e. a + # top-level "-jN" argument) and there were no other failures. Otherwise + # leave out the environment variable and let the child figure out what + # is best. + if self.claim: + os.environ["PARALLELISM"] = str(self.claim) + + return subprocess.call(cmd, *args, **pwargs) diff --git a/tools/lib/python/kdoc/enrich_formatter.py b/tools/lib/python/kdoc/enrich_formatter.py new file mode 100644 index 000000000000..bb171567a4ca --- /dev/null +++ b/tools/lib/python/kdoc/enrich_formatter.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright (c) 2025 by Mauro Carvalho Chehab . + +""" +Ancillary argparse HelpFormatter class that works on a similar way as +argparse.RawDescriptionHelpFormatter, e.g. description maintains line +breaks, but it also implement transformations to the help text. The +actual transformations ar given by enrich_text(), if the output is tty. + +Currently, the follow transformations are done: + + - Positional arguments are shown in upper cases; + - if output is TTY, ``var`` and positional arguments are shown prepended + by an ANSI SGR code. This is usually translated to bold. On some + terminals, like, konsole, this is translated into a colored bold text. +""" + +import argparse +import re +import sys + +class EnrichFormatter(argparse.HelpFormatter): + """ + Better format the output, making easier to identify the positional args + and how they're used at the __doc__ description. + """ + def __init__(self, *args, **kwargs): + """Initialize class and check if is TTY""" + super().__init__(*args, **kwargs) + self._tty = sys.stdout.isatty() + + def enrich_text(self, text): + """Handle ReST markups (currently, only ``foo``)""" + if self._tty and text: + # Replace ``text`` with ANSI SGR (bold) + return re.sub(r'\`\`(.+?)\`\`', + lambda m: f'\033[1m{m.group(1)}\033[0m', text) + return text + + def _fill_text(self, text, width, indent): + """Enrich descriptions with markups on it""" + enriched = self.enrich_text(text) + return "\n".join(indent + line for line in enriched.splitlines()) + + def _format_usage(self, usage, actions, groups, prefix): + """Enrich positional arguments at usage: line""" + + prog = self._prog + parts = [] + + for action in actions: + if action.option_strings: + opt = action.option_strings[0] + if action.nargs != 0: + opt += f" {action.dest.upper()}" + parts.append(f"[{opt}]") + else: + # Positional argument + parts.append(self.enrich_text(f"``{action.dest.upper()}``")) + + usage_text = f"{prefix or 'usage: '} {prog} {' '.join(parts)}\n" + return usage_text + + def _format_action_invocation(self, action): + """Enrich argument names""" + if not action.option_strings: + return self.enrich_text(f"``{action.dest.upper()}``") + + return ", ".join(action.option_strings) diff --git a/tools/lib/python/kdoc/kdoc_files.py b/tools/lib/python/kdoc/kdoc_files.py new file mode 100644 index 000000000000..1fd8d17edb32 --- /dev/null +++ b/tools/lib/python/kdoc/kdoc_files.py @@ -0,0 +1,294 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2025: Mauro Carvalho Chehab . +# +# pylint: disable=R0903,R0913,R0914,R0917 + +""" +Parse lernel-doc tags on multiple kernel source files. +""" + +import argparse +import logging +import os +import re + +from kdoc_parser import KernelDoc +from kdoc_output import OutputFormat + + +class GlobSourceFiles: + """ + Parse C source code file names and directories via an Interactor. + """ + + def __init__(self, srctree=None, valid_extensions=None): + """ + Initialize valid extensions with a tuple. + + If not defined, assume default C extensions (.c and .h) + + It would be possible to use python's glob function, but it is + very slow, and it is not interactive. So, it would wait to read all + directories before actually do something. + + So, let's use our own implementation. + """ + + if not valid_extensions: + self.extensions = (".c", ".h") + else: + self.extensions = valid_extensions + + self.srctree = srctree + + def _parse_dir(self, dirname): + """Internal function to parse files recursively""" + + with os.scandir(dirname) as obj: + for entry in obj: + name = os.path.join(dirname, entry.name) + + if entry.is_dir(follow_symlinks=False): + yield from self._parse_dir(name) + + if not entry.is_file(): + continue + + basename = os.path.basename(name) + + if not basename.endswith(self.extensions): + continue + + yield name + + def parse_files(self, file_list, file_not_found_cb): + """ + Define an interator to parse all source files from file_list, + handling directories if any + """ + + if not file_list: + return + + for fname in file_list: + if self.srctree: + f = os.path.join(self.srctree, fname) + else: + f = fname + + if os.path.isdir(f): + yield from self._parse_dir(f) + elif os.path.isfile(f): + yield f + elif file_not_found_cb: + file_not_found_cb(fname) + + +class KernelFiles(): + """ + Parse kernel-doc tags on multiple kernel source files. + + There are two type of parsers defined here: + - self.parse_file(): parses both kernel-doc markups and + EXPORT_SYMBOL* macros; + - self.process_export_file(): parses only EXPORT_SYMBOL* macros. + """ + + def warning(self, msg): + """Ancillary routine to output a warning and increment error count""" + + self.config.log.warning(msg) + self.errors += 1 + + def error(self, msg): + """Ancillary routine to output an error and increment error count""" + + self.config.log.error(msg) + self.errors += 1 + + def parse_file(self, fname): + """ + Parse a single Kernel source. + """ + + # Prevent parsing the same file twice if results are cached + if fname in self.files: + return + + doc = KernelDoc(self.config, fname) + export_table, entries = doc.parse_kdoc() + + self.export_table[fname] = export_table + + self.files.add(fname) + self.export_files.add(fname) # parse_kdoc() already check exports + + self.results[fname] = entries + + def process_export_file(self, fname): + """ + Parses EXPORT_SYMBOL* macros from a single Kernel source file. + """ + + # Prevent parsing the same file twice if results are cached + if fname in self.export_files: + return + + doc = KernelDoc(self.config, fname) + export_table = doc.parse_export() + + if not export_table: + self.error(f"Error: Cannot check EXPORT_SYMBOL* on {fname}") + export_table = set() + + self.export_table[fname] = export_table + self.export_files.add(fname) + + def file_not_found_cb(self, fname): + """ + Callback to warn if a file was not found. + """ + + self.error(f"Cannot find file {fname}") + + def __init__(self, verbose=False, out_style=None, + werror=False, wreturn=False, wshort_desc=False, + wcontents_before_sections=False, + logger=None): + """ + Initialize startup variables and parse all files + """ + + if not verbose: + verbose = bool(os.environ.get("KBUILD_VERBOSE", 0)) + + if out_style is None: + out_style = OutputFormat() + + if not werror: + kcflags = os.environ.get("KCFLAGS", None) + if kcflags: + match = re.search(r"(\s|^)-Werror(\s|$)/", kcflags) + if match: + werror = True + + # reading this variable is for backwards compat just in case + # someone was calling it with the variable from outside the + # kernel's build system + kdoc_werror = os.environ.get("KDOC_WERROR", None) + if kdoc_werror: + werror = kdoc_werror + + # Some variables are global to the parser logic as a whole as they are + # used to send control configuration to KernelDoc class. As such, + # those variables are read-only inside the KernelDoc. + self.config = argparse.Namespace + + self.config.verbose = verbose + self.config.werror = werror + self.config.wreturn = wreturn + self.config.wshort_desc = wshort_desc + self.config.wcontents_before_sections = wcontents_before_sections + + if not logger: + self.config.log = logging.getLogger("kernel-doc") + else: + self.config.log = logger + + self.config.warning = self.warning + + self.config.src_tree = os.environ.get("SRCTREE", None) + + # Initialize variables that are internal to KernelFiles + + self.out_style = out_style + + self.errors = 0 + self.results = {} + + self.files = set() + self.export_files = set() + self.export_table = {} + + def parse(self, file_list, export_file=None): + """ + Parse all files + """ + + glob = GlobSourceFiles(srctree=self.config.src_tree) + + for fname in glob.parse_files(file_list, self.file_not_found_cb): + self.parse_file(fname) + + for fname in glob.parse_files(export_file, self.file_not_found_cb): + self.process_export_file(fname) + + def out_msg(self, fname, name, arg): + """ + Return output messages from a file name using the output style + filtering. + + If output type was not handled by the syler, return None. + """ + + # NOTE: we can add rules here to filter out unwanted parts, + # although OutputFormat.msg already does that. + + return self.out_style.msg(fname, name, arg) + + def msg(self, enable_lineno=False, export=False, internal=False, + symbol=None, nosymbol=None, no_doc_sections=False, + filenames=None, export_file=None): + """ + Interacts over the kernel-doc results and output messages, + returning kernel-doc markups on each interaction + """ + + self.out_style.set_config(self.config) + + if not filenames: + filenames = sorted(self.results.keys()) + + glob = GlobSourceFiles(srctree=self.config.src_tree) + + for fname in filenames: + function_table = set() + + if internal or export: + if not export_file: + export_file = [fname] + + for f in glob.parse_files(export_file, self.file_not_found_cb): + function_table |= self.export_table[f] + + if symbol: + for s in symbol: + function_table.add(s) + + self.out_style.set_filter(export, internal, symbol, nosymbol, + function_table, enable_lineno, + no_doc_sections) + + msg = "" + if fname not in self.results: + self.config.log.warning("No kernel-doc for file %s", fname) + continue + + symbols = self.results[fname] + self.out_style.set_symbols(symbols) + + for arg in symbols: + m = self.out_msg(fname, arg.name, arg) + + if m is None: + ln = arg.get("ln", 0) + dtype = arg.get('type', "") + + self.config.log.warning("%s:%d Can't handle %s", + fname, ln, dtype) + else: + msg += m + + if msg: + yield fname, msg diff --git a/tools/lib/python/kdoc/kdoc_item.py b/tools/lib/python/kdoc/kdoc_item.py new file mode 100644 index 000000000000..19805301cb2c --- /dev/null +++ b/tools/lib/python/kdoc/kdoc_item.py @@ -0,0 +1,43 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# A class that will, eventually, encapsulate all of the parsed data that we +# then pass into the output modules. +# + +class KdocItem: + def __init__(self, name, fname, type, start_line, **other_stuff): + self.name = name + self.fname = fname + self.type = type + self.declaration_start_line = start_line + self.sections = {} + self.sections_start_lines = {} + self.parameterlist = [] + self.parameterdesc_start_lines = [] + self.parameterdescs = {} + self.parametertypes = {} + # + # Just save everything else into our own dict so that the output + # side can grab it directly as before. As we move things into more + # structured data, this will, hopefully, fade away. + # + self.other_stuff = other_stuff + + def get(self, key, default = None): + return self.other_stuff.get(key, default) + + def __getitem__(self, key): + return self.get(key) + + # + # Tracking of section and parameter information. + # + def set_sections(self, sections, start_lines): + self.sections = sections + self.section_start_lines = start_lines + + def set_params(self, names, descs, types, starts): + self.parameterlist = names + self.parameterdescs = descs + self.parametertypes = types + self.parameterdesc_start_lines = starts diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py new file mode 100644 index 000000000000..58f115059e93 --- /dev/null +++ b/tools/lib/python/kdoc/kdoc_output.py @@ -0,0 +1,824 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2025: Mauro Carvalho Chehab . +# +# pylint: disable=C0301,R0902,R0911,R0912,R0913,R0914,R0915,R0917 + +""" +Implement output filters to print kernel-doc documentation. + +The implementation uses a virtual base class (OutputFormat) which +contains a dispatches to virtual methods, and some code to filter +out output messages. + +The actual implementation is done on one separate class per each type +of output. Currently, there are output classes for ReST and man/troff. +""" + +import os +import re +from datetime import datetime + +from kdoc_parser import KernelDoc, type_param +from kdoc_re import KernRe + + +function_pointer = KernRe(r"([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)", cache=False) + +# match expressions used to find embedded type information +type_constant = KernRe(r"\b``([^\`]+)``\b", cache=False) +type_constant2 = KernRe(r"\%([-_*\w]+)", cache=False) +type_func = KernRe(r"(\w+)\(\)", cache=False) +type_param_ref = KernRe(r"([\!~\*]?)\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) + +# Special RST handling for func ptr params +type_fp_param = KernRe(r"\@(\w+)\(\)", cache=False) + +# Special RST handling for structs with func ptr params +type_fp_param2 = KernRe(r"\@(\w+->\S+)\(\)", cache=False) + +type_env = KernRe(r"(\$\w+)", cache=False) +type_enum = KernRe(r"\&(enum\s*([_\w]+))", cache=False) +type_struct = KernRe(r"\&(struct\s*([_\w]+))", cache=False) +type_typedef = KernRe(r"\&(typedef\s*([_\w]+))", cache=False) +type_union = KernRe(r"\&(union\s*([_\w]+))", cache=False) +type_member = KernRe(r"\&([_\w]+)(\.|->)([_\w]+)", cache=False) +type_fallback = KernRe(r"\&([_\w]+)", cache=False) +type_member_func = type_member + KernRe(r"\(\)", cache=False) + + +class OutputFormat: + """ + Base class for OutputFormat. If used as-is, it means that only + warnings will be displayed. + """ + + # output mode. + OUTPUT_ALL = 0 # output all symbols and doc sections + OUTPUT_INCLUDE = 1 # output only specified symbols + OUTPUT_EXPORTED = 2 # output exported symbols + OUTPUT_INTERNAL = 3 # output non-exported symbols + + # Virtual member to be overriden at the inherited classes + highlights = [] + + def __init__(self): + """Declare internal vars and set mode to OUTPUT_ALL""" + + self.out_mode = self.OUTPUT_ALL + self.enable_lineno = None + self.nosymbol = {} + self.symbol = None + self.function_table = None + self.config = None + self.no_doc_sections = False + + self.data = "" + + def set_config(self, config): + """ + Setup global config variables used by both parser and output. + """ + + self.config = config + + def set_filter(self, export, internal, symbol, nosymbol, function_table, + enable_lineno, no_doc_sections): + """ + Initialize filter variables according with the requested mode. + + Only one choice is valid between export, internal and symbol. + + The nosymbol filter can be used on all modes. + """ + + self.enable_lineno = enable_lineno + self.no_doc_sections = no_doc_sections + self.function_table = function_table + + if symbol: + self.out_mode = self.OUTPUT_INCLUDE + elif export: + self.out_mode = self.OUTPUT_EXPORTED + elif internal: + self.out_mode = self.OUTPUT_INTERNAL + else: + self.out_mode = self.OUTPUT_ALL + + if nosymbol: + self.nosymbol = set(nosymbol) + + + def highlight_block(self, block): + """ + Apply the RST highlights to a sub-block of text. + """ + + for r, sub in self.highlights: + block = r.sub(sub, block) + + return block + + def out_warnings(self, args): + """ + Output warnings for identifiers that will be displayed. + """ + + for log_msg in args.warnings: + self.config.warning(log_msg) + + def check_doc(self, name, args): + """Check if DOC should be output""" + + if self.no_doc_sections: + return False + + if name in self.nosymbol: + return False + + if self.out_mode == self.OUTPUT_ALL: + self.out_warnings(args) + return True + + if self.out_mode == self.OUTPUT_INCLUDE: + if name in self.function_table: + self.out_warnings(args) + return True + + return False + + def check_declaration(self, dtype, name, args): + """ + Checks if a declaration should be output or not based on the + filtering criteria. + """ + + if name in self.nosymbol: + return False + + if self.out_mode == self.OUTPUT_ALL: + self.out_warnings(args) + return True + + if self.out_mode in [self.OUTPUT_INCLUDE, self.OUTPUT_EXPORTED]: + if name in self.function_table: + return True + + if self.out_mode == self.OUTPUT_INTERNAL: + if dtype != "function": + self.out_warnings(args) + return True + + if name not in self.function_table: + self.out_warnings(args) + return True + + return False + + def msg(self, fname, name, args): + """ + Handles a single entry from kernel-doc parser + """ + + self.data = "" + + dtype = args.type + + if dtype == "doc": + self.out_doc(fname, name, args) + return self.data + + if not self.check_declaration(dtype, name, args): + return self.data + + if dtype == "function": + self.out_function(fname, name, args) + return self.data + + if dtype == "enum": + self.out_enum(fname, name, args) + return self.data + + if dtype == "typedef": + self.out_typedef(fname, name, args) + return self.data + + if dtype in ["struct", "union"]: + self.out_struct(fname, name, args) + return self.data + + # Warn if some type requires an output logic + self.config.log.warning("doesn't now how to output '%s' block", + dtype) + + return None + + # Virtual methods to be overridden by inherited classes + # At the base class, those do nothing. + def set_symbols(self, symbols): + """Get a list of all symbols from kernel_doc""" + + def out_doc(self, fname, name, args): + """Outputs a DOC block""" + + def out_function(self, fname, name, args): + """Outputs a function""" + + def out_enum(self, fname, name, args): + """Outputs an enum""" + + def out_typedef(self, fname, name, args): + """Outputs a typedef""" + + def out_struct(self, fname, name, args): + """Outputs a struct""" + + +class RestFormat(OutputFormat): + """Consts and functions used by ReST output""" + + highlights = [ + (type_constant, r"``\1``"), + (type_constant2, r"``\1``"), + + # Note: need to escape () to avoid func matching later + (type_member_func, r":c:type:`\1\2\3\\(\\) <\1>`"), + (type_member, r":c:type:`\1\2\3 <\1>`"), + (type_fp_param, r"**\1\\(\\)**"), + (type_fp_param2, r"**\1\\(\\)**"), + (type_func, r"\1()"), + (type_enum, r":c:type:`\1 <\2>`"), + (type_struct, r":c:type:`\1 <\2>`"), + (type_typedef, r":c:type:`\1 <\2>`"), + (type_union, r":c:type:`\1 <\2>`"), + + # in rst this can refer to any type + (type_fallback, r":c:type:`\1`"), + (type_param_ref, r"**\1\2**") + ] + blankline = "\n" + + sphinx_literal = KernRe(r'^[^.].*::$', cache=False) + sphinx_cblock = KernRe(r'^\.\.\ +code-block::', cache=False) + + def __init__(self): + """ + Creates class variables. + + Not really mandatory, but it is a good coding style and makes + pylint happy. + """ + + super().__init__() + self.lineprefix = "" + + def print_lineno(self, ln): + """Outputs a line number""" + + if self.enable_lineno and ln is not None: + ln += 1 + self.data += f".. LINENO {ln}\n" + + def output_highlight(self, args): + """ + Outputs a C symbol that may require being converted to ReST using + the self.highlights variable + """ + + input_text = args + output = "" + in_literal = False + litprefix = "" + block = "" + + for line in input_text.strip("\n").split("\n"): + + # If we're in a literal block, see if we should drop out of it. + # Otherwise, pass the line straight through unmunged. + if in_literal: + if line.strip(): # If the line is not blank + # If this is the first non-blank line in a literal block, + # figure out the proper indent. + if not litprefix: + r = KernRe(r'^(\s*)') + if r.match(line): + litprefix = '^' + r.group(1) + else: + litprefix = "" + + output += line + "\n" + elif not KernRe(litprefix).match(line): + in_literal = False + else: + output += line + "\n" + else: + output += line + "\n" + + # Not in a literal block (or just dropped out) + if not in_literal: + block += line + "\n" + if self.sphinx_literal.match(line) or self.sphinx_cblock.match(line): + in_literal = True + litprefix = "" + output += self.highlight_block(block) + block = "" + + # Handle any remaining block + if block: + output += self.highlight_block(block) + + # Print the output with the line prefix + for line in output.strip("\n").split("\n"): + self.data += self.lineprefix + line + "\n" + + def out_section(self, args, out_docblock=False): + """ + Outputs a block section. + + This could use some work; it's used to output the DOC: sections, and + starts by putting out the name of the doc section itself, but that + tends to duplicate a header already in the template file. + """ + for section, text in args.sections.items(): + # Skip sections that are in the nosymbol_table + if section in self.nosymbol: + continue + + if out_docblock: + if not self.out_mode == self.OUTPUT_INCLUDE: + self.data += f".. _{section}:\n\n" + self.data += f'{self.lineprefix}**{section}**\n\n' + else: + self.data += f'{self.lineprefix}**{section}**\n\n' + + self.print_lineno(args.section_start_lines.get(section, 0)) + self.output_highlight(text) + self.data += "\n" + self.data += "\n" + + def out_doc(self, fname, name, args): + if not self.check_doc(name, args): + return + self.out_section(args, out_docblock=True) + + def out_function(self, fname, name, args): + + oldprefix = self.lineprefix + signature = "" + + func_macro = args.get('func_macro', False) + if func_macro: + signature = name + else: + if args.get('functiontype'): + signature = args['functiontype'] + " " + signature += name + " (" + + ln = args.declaration_start_line + count = 0 + for parameter in args.parameterlist: + if count != 0: + signature += ", " + count += 1 + dtype = args.parametertypes.get(parameter, "") + + if function_pointer.search(dtype): + signature += function_pointer.group(1) + parameter + function_pointer.group(3) + else: + signature += dtype + + if not func_macro: + signature += ")" + + self.print_lineno(ln) + if args.get('typedef') or not args.get('functiontype'): + self.data += f".. c:macro:: {name}\n\n" + + if args.get('typedef'): + self.data += " **Typedef**: " + self.lineprefix = "" + self.output_highlight(args.get('purpose', "")) + self.data += "\n\n**Syntax**\n\n" + self.data += f" ``{signature}``\n\n" + else: + self.data += f"``{signature}``\n\n" + else: + self.data += f".. c:function:: {signature}\n\n" + + if not args.get('typedef'): + self.print_lineno(ln) + self.lineprefix = " " + self.output_highlight(args.get('purpose', "")) + self.data += "\n" + + # Put descriptive text into a container (HTML
) to help set + # function prototypes apart + self.lineprefix = " " + + if args.parameterlist: + self.data += ".. container:: kernelindent\n\n" + self.data += f"{self.lineprefix}**Parameters**\n\n" + + for parameter in args.parameterlist: + parameter_name = KernRe(r'\[.*').sub('', parameter) + dtype = args.parametertypes.get(parameter, "") + + if dtype: + self.data += f"{self.lineprefix}``{dtype}``\n" + else: + self.data += f"{self.lineprefix}``{parameter}``\n" + + self.print_lineno(args.parameterdesc_start_lines.get(parameter_name, 0)) + + self.lineprefix = " " + if parameter_name in args.parameterdescs and \ + args.parameterdescs[parameter_name] != KernelDoc.undescribed: + + self.output_highlight(args.parameterdescs[parameter_name]) + self.data += "\n" + else: + self.data += f"{self.lineprefix}*undescribed*\n\n" + self.lineprefix = " " + + self.out_section(args) + self.lineprefix = oldprefix + + def out_enum(self, fname, name, args): + + oldprefix = self.lineprefix + ln = args.declaration_start_line + + self.data += f"\n\n.. c:enum:: {name}\n\n" + + self.print_lineno(ln) + self.lineprefix = " " + self.output_highlight(args.get('purpose', '')) + self.data += "\n" + + self.data += ".. container:: kernelindent\n\n" + outer = self.lineprefix + " " + self.lineprefix = outer + " " + self.data += f"{outer}**Constants**\n\n" + + for parameter in args.parameterlist: + self.data += f"{outer}``{parameter}``\n" + + if args.parameterdescs.get(parameter, '') != KernelDoc.undescribed: + self.output_highlight(args.parameterdescs[parameter]) + else: + self.data += f"{self.lineprefix}*undescribed*\n\n" + self.data += "\n" + + self.lineprefix = oldprefix + self.out_section(args) + + def out_typedef(self, fname, name, args): + + oldprefix = self.lineprefix + ln = args.declaration_start_line + + self.data += f"\n\n.. c:type:: {name}\n\n" + + self.print_lineno(ln) + self.lineprefix = " " + + self.output_highlight(args.get('purpose', '')) + + self.data += "\n" + + self.lineprefix = oldprefix + self.out_section(args) + + def out_struct(self, fname, name, args): + + purpose = args.get('purpose', "") + declaration = args.get('definition', "") + dtype = args.type + ln = args.declaration_start_line + + self.data += f"\n\n.. c:{dtype}:: {name}\n\n" + + self.print_lineno(ln) + + oldprefix = self.lineprefix + self.lineprefix += " " + + self.output_highlight(purpose) + self.data += "\n" + + self.data += ".. container:: kernelindent\n\n" + self.data += f"{self.lineprefix}**Definition**::\n\n" + + self.lineprefix = self.lineprefix + " " + + declaration = declaration.replace("\t", self.lineprefix) + + self.data += f"{self.lineprefix}{dtype} {name}" + ' {' + "\n" + self.data += f"{declaration}{self.lineprefix}" + "};\n\n" + + self.lineprefix = " " + self.data += f"{self.lineprefix}**Members**\n\n" + for parameter in args.parameterlist: + if not parameter or parameter.startswith("#"): + continue + + parameter_name = parameter.split("[", maxsplit=1)[0] + + if args.parameterdescs.get(parameter_name) == KernelDoc.undescribed: + continue + + self.print_lineno(args.parameterdesc_start_lines.get(parameter_name, 0)) + + self.data += f"{self.lineprefix}``{parameter}``\n" + + self.lineprefix = " " + self.output_highlight(args.parameterdescs[parameter_name]) + self.lineprefix = " " + + self.data += "\n" + + self.data += "\n" + + self.lineprefix = oldprefix + self.out_section(args) + + +class ManFormat(OutputFormat): + """Consts and functions used by man pages output""" + + highlights = ( + (type_constant, r"\1"), + (type_constant2, r"\1"), + (type_func, r"\\fB\1\\fP"), + (type_enum, r"\\fI\1\\fP"), + (type_struct, r"\\fI\1\\fP"), + (type_typedef, r"\\fI\1\\fP"), + (type_union, r"\\fI\1\\fP"), + (type_param, r"\\fI\1\\fP"), + (type_param_ref, r"\\fI\1\2\\fP"), + (type_member, r"\\fI\1\2\3\\fP"), + (type_fallback, r"\\fI\1\\fP") + ) + blankline = "" + + date_formats = [ + "%a %b %d %H:%M:%S %Z %Y", + "%a %b %d %H:%M:%S %Y", + "%Y-%m-%d", + "%b %d %Y", + "%B %d %Y", + "%m %d %Y", + ] + + def __init__(self, modulename): + """ + Creates class variables. + + Not really mandatory, but it is a good coding style and makes + pylint happy. + """ + + super().__init__() + self.modulename = modulename + self.symbols = [] + + dt = None + tstamp = os.environ.get("KBUILD_BUILD_TIMESTAMP") + if tstamp: + for fmt in self.date_formats: + try: + dt = datetime.strptime(tstamp, fmt) + break + except ValueError: + pass + + if not dt: + dt = datetime.now() + + self.man_date = dt.strftime("%B %Y") + + def arg_name(self, args, name): + """ + Return the name that will be used for the man page. + + As we may have the same name on different namespaces, + prepend the data type for all types except functions and typedefs. + + The doc section is special: it uses the modulename. + """ + + dtype = args.type + + if dtype == "doc": + return self.modulename + + if dtype in ["function", "typedef"]: + return name + + return f"{dtype} {name}" + + def set_symbols(self, symbols): + """ + Get a list of all symbols from kernel_doc. + + Man pages will uses it to add a SEE ALSO section with other + symbols at the same file. + """ + self.symbols = symbols + + def out_tail(self, fname, name, args): + """Adds a tail for all man pages""" + + # SEE ALSO section + self.data += f'.SH "SEE ALSO"' + "\n.PP\n" + self.data += (f"Kernel file \\fB{args.fname}\\fR\n") + if len(self.symbols) >= 2: + cur_name = self.arg_name(args, name) + + related = [] + for arg in self.symbols: + out_name = self.arg_name(arg, arg.name) + + if cur_name == out_name: + continue + + related.append(f"\\fB{out_name}\\fR(9)") + + self.data += ",\n".join(related) + "\n" + + # TODO: does it make sense to add other sections? Maybe + # REPORTING ISSUES? LICENSE? + + def msg(self, fname, name, args): + """ + Handles a single entry from kernel-doc parser. + + Add a tail at the end of man pages output. + """ + super().msg(fname, name, args) + self.out_tail(fname, name, args) + + return self.data + + def output_highlight(self, block): + """ + Outputs a C symbol that may require being highlighted with + self.highlights variable using troff syntax + """ + + contents = self.highlight_block(block) + + if isinstance(contents, list): + contents = "\n".join(contents) + + for line in contents.strip("\n").split("\n"): + line = KernRe(r"^\s*").sub("", line) + if not line: + continue + + if line[0] == ".": + self.data += "\\&" + line + "\n" + else: + self.data += line + "\n" + + def out_doc(self, fname, name, args): + if not self.check_doc(name, args): + return + + out_name = self.arg_name(args, name) + + self.data += f'.TH "{self.modulename}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" + + for section, text in args.sections.items(): + self.data += f'.SH "{section}"' + "\n" + self.output_highlight(text) + + def out_function(self, fname, name, args): + """output function in man""" + + out_name = self.arg_name(args, name) + + self.data += f'.TH "{name}" 9 "{out_name}" "{self.man_date}" "Kernel Hacker\'s Manual" LINUX' + "\n" + + self.data += ".SH NAME\n" + self.data += f"{name} \\- {args['purpose']}\n" + + self.data += ".SH SYNOPSIS\n" + if args.get('functiontype', ''): + self.data += f'.B "{args["functiontype"]}" {name}' + "\n" + else: + self.data += f'.B "{name}' + "\n" + + count = 0 + parenth = "(" + post = "," + + for parameter in args.parameterlist: + if count == len(args.parameterlist) - 1: + post = ");" + + dtype = args.parametertypes.get(parameter, "") + if function_pointer.match(dtype): + # Pointer-to-function + self.data += f'".BI "{parenth}{function_pointer.group(1)}" " ") ({function_pointer.group(2)}){post}"' + "\n" + else: + dtype = KernRe(r'([^\*])$').sub(r'\1 ', dtype) + + self.data += f'.BI "{parenth}{dtype}" "{post}"' + "\n" + count += 1 + parenth = "" + + if args.parameterlist: + self.data += ".SH ARGUMENTS\n" + + for parameter in args.parameterlist: + parameter_name = re.sub(r'\[.*', '', parameter) + + self.data += f'.IP "{parameter}" 12' + "\n" + self.output_highlight(args.parameterdescs.get(parameter_name, "")) + + for section, text in args.sections.items(): + self.data += f'.SH "{section.upper()}"' + "\n" + self.output_highlight(text) + + def out_enum(self, fname, name, args): + out_name = self.arg_name(args, name) + + self.data += f'.TH "{self.modulename}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" + + self.data += ".SH NAME\n" + self.data += f"enum {name} \\- {args['purpose']}\n" + + self.data += ".SH SYNOPSIS\n" + self.data += f"enum {name}" + " {\n" + + count = 0 + for parameter in args.parameterlist: + self.data += f'.br\n.BI " {parameter}"' + "\n" + if count == len(args.parameterlist) - 1: + self.data += "\n};\n" + else: + self.data += ", \n.br\n" + + count += 1 + + self.data += ".SH Constants\n" + + for parameter in args.parameterlist: + parameter_name = KernRe(r'\[.*').sub('', parameter) + self.data += f'.IP "{parameter}" 12' + "\n" + self.output_highlight(args.parameterdescs.get(parameter_name, "")) + + for section, text in args.sections.items(): + self.data += f'.SH "{section}"' + "\n" + self.output_highlight(text) + + def out_typedef(self, fname, name, args): + module = self.modulename + purpose = args.get('purpose') + out_name = self.arg_name(args, name) + + self.data += f'.TH "{module}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" + + self.data += ".SH NAME\n" + self.data += f"typedef {name} \\- {purpose}\n" + + for section, text in args.sections.items(): + self.data += f'.SH "{section}"' + "\n" + self.output_highlight(text) + + def out_struct(self, fname, name, args): + module = self.modulename + purpose = args.get('purpose') + definition = args.get('definition') + out_name = self.arg_name(args, name) + + self.data += f'.TH "{module}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" + + self.data += ".SH NAME\n" + self.data += f"{args.type} {name} \\- {purpose}\n" + + # Replace tabs with two spaces and handle newlines + declaration = definition.replace("\t", " ") + declaration = KernRe(r"\n").sub('"\n.br\n.BI "', declaration) + + self.data += ".SH SYNOPSIS\n" + self.data += f"{args.type} {name} " + "{" + "\n.br\n" + self.data += f'.BI "{declaration}\n' + "};\n.br\n\n" + + self.data += ".SH Members\n" + for parameter in args.parameterlist: + if parameter.startswith("#"): + continue + + parameter_name = re.sub(r"\[.*", "", parameter) + + if args.parameterdescs.get(parameter_name) == KernelDoc.undescribed: + continue + + self.data += f'.IP "{parameter}" 12' + "\n" + self.output_highlight(args.parameterdescs.get(parameter_name)) + + for section, text in args.sections.items(): + self.data += f'.SH "{section}"' + "\n" + self.output_highlight(text) diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py new file mode 100644 index 000000000000..f7dbb0868367 --- /dev/null +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -0,0 +1,1667 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2025: Mauro Carvalho Chehab . +# +# pylint: disable=C0301,C0302,R0904,R0912,R0913,R0914,R0915,R0917,R1702 + +""" +kdoc_parser +=========== + +Read a C language source or header FILE and extract embedded +documentation comments +""" + +import sys +import re +from pprint import pformat + +from kdoc_re import NestedMatch, KernRe +from kdoc_item import KdocItem + +# +# Regular expressions used to parse kernel-doc markups at KernelDoc class. +# +# Let's declare them in lowercase outside any class to make easier to +# convert from the python script. +# +# As those are evaluated at the beginning, no need to cache them +# + +# Allow whitespace at end of comment start. +doc_start = KernRe(r'^/\*\*\s*$', cache=False) + +doc_end = KernRe(r'\*/', cache=False) +doc_com = KernRe(r'\s*\*\s*', cache=False) +doc_com_body = KernRe(r'\s*\* ?', cache=False) +doc_decl = doc_com + KernRe(r'(\w+)', cache=False) + +# @params and a strictly limited set of supported section names +# Specifically: +# Match @word: +# @...: +# @{section-name}: +# while trying to not match literal block starts like "example::" +# +known_section_names = 'description|context|returns?|notes?|examples?' +known_sections = KernRe(known_section_names, flags = re.I) +doc_sect = doc_com + \ + KernRe(r'\s*(@[.\w]+|@\.\.\.|' + known_section_names + r')\s*:([^:].*)?$', + flags=re.I, cache=False) + +doc_content = doc_com_body + KernRe(r'(.*)', cache=False) +doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False) +doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False) +doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False) +doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$', cache=False) + +export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False) +export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False) + +type_param = KernRe(r"@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) + +# +# Tests for the beginning of a kerneldoc block in its various forms. +# +doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False) +doc_begin_data = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef)\b\s*(\w*)", cache = False) +doc_begin_func = KernRe(str(doc_com) + # initial " * ' + r"(?:\w+\s*\*\s*)?" + # type (not captured) + r'(?:define\s+)?' + # possible "define" (not captured) + r'(\w+)\s*(?:\(\w*\))?\s*' + # name and optional "(...)" + r'(?:[-:].*)?$', # description (not captured) + cache = False) + +# +# Here begins a long set of transformations to turn structure member prefixes +# and macro invocations into something we can parse and generate kdoc for. +# +struct_args_pattern = r'([^,)]+)' + +struct_xforms = [ + # Strip attributes + (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False), ' '), + (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__packed\s*', re.S), ' '), + (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), + (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), + (KernRe(r'\s*____cacheline_aligned', re.S), ' '), + (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''), + # + # Unwrap struct_group macros based on this definition: + # __struct_group(TAG, NAME, ATTRS, MEMBERS...) + # which has variants like: struct_group(NAME, MEMBERS...) + # Only MEMBERS arguments require documentation. + # + # Parsing them happens on two steps: + # + # 1. drop struct group arguments that aren't at MEMBERS, + # storing them as STRUCT_GROUP(MEMBERS) + # + # 2. remove STRUCT_GROUP() ancillary macro. + # + # The original logic used to remove STRUCT_GROUP() using an + # advanced regex: + # + # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; + # + # with two patterns that are incompatible with + # Python re module, as it has: + # + # - a recursive pattern: (?1) + # - an atomic grouping: (?>...) + # + # I tried a simpler version: but it didn't work either: + # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; + # + # As it doesn't properly match the end parenthesis on some cases. + # + # So, a better solution was crafted: there's now a NestedMatch + # class that ensures that delimiters after a search are properly + # matched. So, the implementation to drop STRUCT_GROUP() will be + # handled in separate. + # + (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), + (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), + (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), + (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), + # + # Replace macros + # + # TODO: use NestedMatch for FOO($1, $2, ...) matches + # + # it is better to also move those to the NestedMatch logic, + # to ensure that parenthesis will be properly matched. + # + (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), + r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), + (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), + r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), + (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', + re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), + (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', + re.S), r'unsigned long \1[1 << ((\2) - 1)]'), + (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + + r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'), + (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' + + struct_args_pattern + r'\)', re.S), r'\2 *\1'), + (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' + + struct_args_pattern + r'\)', re.S), r'\1 \2[]'), + (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'), + (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'), +] +# +# Regexes here are guaranteed to have the end limiter matching +# the start delimiter. Yet, right now, only one replace group +# is allowed. +# +struct_nested_prefixes = [ + (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), +] + +# +# Transforms for function prototypes +# +function_xforms = [ + (KernRe(r"^static +"), ""), + (KernRe(r"^extern +"), ""), + (KernRe(r"^asmlinkage +"), ""), + (KernRe(r"^inline +"), ""), + (KernRe(r"^__inline__ +"), ""), + (KernRe(r"^__inline +"), ""), + (KernRe(r"^__always_inline +"), ""), + (KernRe(r"^noinline +"), ""), + (KernRe(r"^__FORTIFY_INLINE +"), ""), + (KernRe(r"__init +"), ""), + (KernRe(r"__init_or_module +"), ""), + (KernRe(r"__deprecated +"), ""), + (KernRe(r"__flatten +"), ""), + (KernRe(r"__meminit +"), ""), + (KernRe(r"__must_check +"), ""), + (KernRe(r"__weak +"), ""), + (KernRe(r"__sched +"), ""), + (KernRe(r"_noprof"), ""), + (KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""), + (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""), + (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""), + (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"), + (KernRe(r"__attribute_const__ +"), ""), + (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""), +] + +# +# Apply a set of transforms to a block of text. +# +def apply_transforms(xforms, text): + for search, subst in xforms: + text = search.sub(subst, text) + return text + +# +# A little helper to get rid of excess white space +# +multi_space = KernRe(r'\s\s+') +def trim_whitespace(s): + return multi_space.sub(' ', s.strip()) + +# +# Remove struct/enum members that have been marked "private". +# +def trim_private_members(text): + # + # First look for a "public:" block that ends a private region, then + # handle the "private until the end" case. + # + text = KernRe(r'/\*\s*private:.*?/\*\s*public:.*?\*/', flags=re.S).sub('', text) + text = KernRe(r'/\*\s*private:.*', flags=re.S).sub('', text) + # + # We needed the comments to do the above, but now we can take them out. + # + return KernRe(r'\s*/\*.*?\*/\s*', flags=re.S).sub('', text).strip() + +class state: + """ + State machine enums + """ + + # Parser states + NORMAL = 0 # normal code + NAME = 1 # looking for function name + DECLARATION = 2 # We have seen a declaration which might not be done + BODY = 3 # the body of the comment + SPECIAL_SECTION = 4 # doc section ending with a blank line + PROTO = 5 # scanning prototype + DOCBLOCK = 6 # documentation block + INLINE_NAME = 7 # gathering doc outside main block + INLINE_TEXT = 8 # reading the body of inline docs + + name = [ + "NORMAL", + "NAME", + "DECLARATION", + "BODY", + "SPECIAL_SECTION", + "PROTO", + "DOCBLOCK", + "INLINE_NAME", + "INLINE_TEXT", + ] + + +SECTION_DEFAULT = "Description" # default section + +class KernelEntry: + + def __init__(self, config, fname, ln): + self.config = config + self.fname = fname + + self._contents = [] + self.prototype = "" + + self.warnings = [] + + self.parameterlist = [] + self.parameterdescs = {} + self.parametertypes = {} + self.parameterdesc_start_lines = {} + + self.section_start_lines = {} + self.sections = {} + + self.anon_struct_union = False + + self.leading_space = None + + self.fname = fname + + # State flags + self.brcount = 0 + self.declaration_start_line = ln + 1 + + # + # Management of section contents + # + def add_text(self, text): + self._contents.append(text) + + def contents(self): + return '\n'.join(self._contents) + '\n' + + # TODO: rename to emit_message after removal of kernel-doc.pl + def emit_msg(self, ln, msg, *, warning=True): + """Emit a message""" + + log_msg = f"{self.fname}:{ln} {msg}" + + if not warning: + self.config.log.info(log_msg) + return + + # Delegate warning output to output logic, as this way it + # will report warnings/info only for symbols that are output + + self.warnings.append(log_msg) + return + + # + # Begin a new section. + # + def begin_section(self, line_no, title = SECTION_DEFAULT, dump = False): + if dump: + self.dump_section(start_new = True) + self.section = title + self.new_start_line = line_no + + def dump_section(self, start_new=True): + """ + Dumps section contents to arrays/hashes intended for that purpose. + """ + # + # If we have accumulated no contents in the default ("description") + # section, don't bother. + # + if self.section == SECTION_DEFAULT and not self._contents: + return + name = self.section + contents = self.contents() + + if type_param.match(name): + name = type_param.group(1) + + self.parameterdescs[name] = contents + self.parameterdesc_start_lines[name] = self.new_start_line + + self.new_start_line = 0 + + else: + if name in self.sections and self.sections[name] != "": + # Only warn on user-specified duplicate section names + if name != SECTION_DEFAULT: + self.emit_msg(self.new_start_line, + f"duplicate section name '{name}'") + # Treat as a new paragraph - add a blank line + self.sections[name] += '\n' + contents + else: + self.sections[name] = contents + self.section_start_lines[name] = self.new_start_line + self.new_start_line = 0 + +# self.config.log.debug("Section: %s : %s", name, pformat(vars(self))) + + if start_new: + self.section = SECTION_DEFAULT + self._contents = [] + +python_warning = False + +class KernelDoc: + """ + Read a C language source or header FILE and extract embedded + documentation comments. + """ + + # Section names + + section_context = "Context" + section_return = "Return" + + undescribed = "-- undescribed --" + + def __init__(self, config, fname): + """Initialize internal variables""" + + self.fname = fname + self.config = config + + # Initial state for the state machines + self.state = state.NORMAL + + # Store entry currently being processed + self.entry = None + + # Place all potential outputs into an array + self.entries = [] + + # + # We need Python 3.7 for its "dicts remember the insertion + # order" guarantee + # + global python_warning + if (not python_warning and + sys.version_info.major == 3 and sys.version_info.minor < 7): + + self.emit_msg(0, + 'Python 3.7 or later is required for correct results') + python_warning = True + + def emit_msg(self, ln, msg, *, warning=True): + """Emit a message""" + + if self.entry: + self.entry.emit_msg(ln, msg, warning=warning) + return + + log_msg = f"{self.fname}:{ln} {msg}" + + if warning: + self.config.log.warning(log_msg) + else: + self.config.log.info(log_msg) + + def dump_section(self, start_new=True): + """ + Dumps section contents to arrays/hashes intended for that purpose. + """ + + if self.entry: + self.entry.dump_section(start_new) + + # TODO: rename it to store_declaration after removal of kernel-doc.pl + def output_declaration(self, dtype, name, **args): + """ + Stores the entry into an entry array. + + The actual output and output filters will be handled elsewhere + """ + + item = KdocItem(name, self.fname, dtype, + self.entry.declaration_start_line, **args) + item.warnings = self.entry.warnings + + # Drop empty sections + # TODO: improve empty sections logic to emit warnings + sections = self.entry.sections + for section in ["Description", "Return"]: + if section in sections and not sections[section].rstrip(): + del sections[section] + item.set_sections(sections, self.entry.section_start_lines) + item.set_params(self.entry.parameterlist, self.entry.parameterdescs, + self.entry.parametertypes, + self.entry.parameterdesc_start_lines) + self.entries.append(item) + + self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args)) + + def reset_state(self, ln): + """ + Ancillary routine to create a new entry. It initializes all + variables used by the state machine. + """ + + # + # Flush the warnings out before we proceed further + # + if self.entry and self.entry not in self.entries: + for log_msg in self.entry.warnings: + self.config.log.warning(log_msg) + + self.entry = KernelEntry(self.config, self.fname, ln) + + # State flags + self.state = state.NORMAL + + def push_parameter(self, ln, decl_type, param, dtype, + org_arg, declaration_name): + """ + Store parameters and their descriptions at self.entry. + """ + + if self.entry.anon_struct_union and dtype == "" and param == "}": + return # Ignore the ending }; from anonymous struct/union + + self.entry.anon_struct_union = False + + param = KernRe(r'[\[\)].*').sub('', param, count=1) + + # + # Look at various "anonymous type" cases. + # + if dtype == '': + if param.endswith("..."): + if len(param) > 3: # there is a name provided, use that + param = param[:-3] + if not self.entry.parameterdescs.get(param): + self.entry.parameterdescs[param] = "variable arguments" + + elif (not param) or param == "void": + param = "void" + self.entry.parameterdescs[param] = "no arguments" + + elif param in ["struct", "union"]: + # Handle unnamed (anonymous) union or struct + dtype = param + param = "{unnamed_" + param + "}" + self.entry.parameterdescs[param] = "anonymous\n" + self.entry.anon_struct_union = True + + # Warn if parameter has no description + # (but ignore ones starting with # as these are not parameters + # but inline preprocessor statements) + if param not in self.entry.parameterdescs and not param.startswith("#"): + self.entry.parameterdescs[param] = self.undescribed + + if "." not in param: + if decl_type == 'function': + dname = f"{decl_type} parameter" + else: + dname = f"{decl_type} member" + + self.emit_msg(ln, + f"{dname} '{param}' not described in '{declaration_name}'") + + # Strip spaces from param so that it is one continuous string on + # parameterlist. This fixes a problem where check_sections() + # cannot find a parameter like "addr[6 + 2]" because it actually + # appears as "addr[6", "+", "2]" on the parameter list. + # However, it's better to maintain the param string unchanged for + # output, so just weaken the string compare in check_sections() + # to ignore "[blah" in a parameter string. + + self.entry.parameterlist.append(param) + org_arg = KernRe(r'\s\s+').sub(' ', org_arg) + self.entry.parametertypes[param] = org_arg + + + def create_parameter_list(self, ln, decl_type, args, + splitter, declaration_name): + """ + Creates a list of parameters, storing them at self.entry. + """ + + # temporarily replace all commas inside function pointer definition + arg_expr = KernRe(r'(\([^\),]+),') + while arg_expr.search(args): + args = arg_expr.sub(r"\1#", args) + + for arg in args.split(splitter): + # Ignore argument attributes + arg = KernRe(r'\sPOS0?\s').sub(' ', arg) + + # Strip leading/trailing spaces + arg = arg.strip() + arg = KernRe(r'\s+').sub(' ', arg, count=1) + + if arg.startswith('#'): + # Treat preprocessor directive as a typeless variable just to fill + # corresponding data structures "correctly". Catch it later in + # output_* subs. + + # Treat preprocessor directive as a typeless variable + self.push_parameter(ln, decl_type, arg, "", + "", declaration_name) + # + # The pointer-to-function case. + # + elif KernRe(r'\(.+\)\s*\(').search(arg): + arg = arg.replace('#', ',') + r = KernRe(r'[^\(]+\(\*?\s*' # Everything up to "(*" + r'([\w\[\].]*)' # Capture the name and possible [array] + r'\s*\)') # Make sure the trailing ")" is there + if r.match(arg): + param = r.group(1) + else: + self.emit_msg(ln, f"Invalid param: {arg}") + param = arg + dtype = arg.replace(param, '') + self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) + # + # The array-of-pointers case. Dig the parameter name out from the middle + # of the declaration. + # + elif KernRe(r'\(.+\)\s*\[').search(arg): + r = KernRe(r'[^\(]+\(\s*\*\s*' # Up to "(" and maybe "*" + r'([\w.]*?)' # The actual pointer name + r'\s*(\[\s*\w+\s*\]\s*)*\)') # The [array portion] + if r.match(arg): + param = r.group(1) + else: + self.emit_msg(ln, f"Invalid param: {arg}") + param = arg + dtype = arg.replace(param, '') + self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) + elif arg: + # + # Clean up extraneous spaces and split the string at commas; the first + # element of the resulting list will also include the type information. + # + arg = KernRe(r'\s*:\s*').sub(":", arg) + arg = KernRe(r'\s*\[').sub('[', arg) + args = KernRe(r'\s*,\s*').split(arg) + args[0] = re.sub(r'(\*+)\s*', r' \1', args[0]) + # + # args[0] has a string of "type a". If "a" includes an [array] + # declaration, we want to not be fooled by any white space inside + # the brackets, so detect and handle that case specially. + # + r = KernRe(r'^([^[\]]*\s+)(.*)$') + if r.match(args[0]): + args[0] = r.group(2) + dtype = r.group(1) + else: + # No space in args[0]; this seems wrong but preserves previous behavior + dtype = '' + + bitfield_re = KernRe(r'(.*?):(\w+)') + for param in args: + # + # For pointers, shift the star(s) from the variable name to the + # type declaration. + # + r = KernRe(r'^(\*+)\s*(.*)') + if r.match(param): + self.push_parameter(ln, decl_type, r.group(2), + f"{dtype} {r.group(1)}", + arg, declaration_name) + # + # Perform a similar shift for bitfields. + # + elif bitfield_re.search(param): + if dtype != "": # Skip unnamed bit-fields + self.push_parameter(ln, decl_type, bitfield_re.group(1), + f"{dtype}:{bitfield_re.group(2)}", + arg, declaration_name) + else: + self.push_parameter(ln, decl_type, param, dtype, + arg, declaration_name) + + def check_sections(self, ln, decl_name, decl_type): + """ + Check for errors inside sections, emitting warnings if not found + parameters are described. + """ + for section in self.entry.sections: + if section not in self.entry.parameterlist and \ + not known_sections.search(section): + if decl_type == 'function': + dname = f"{decl_type} parameter" + else: + dname = f"{decl_type} member" + self.emit_msg(ln, + f"Excess {dname} '{section}' description in '{decl_name}'") + + def check_return_section(self, ln, declaration_name, return_type): + """ + If the function doesn't return void, warns about the lack of a + return description. + """ + + if not self.config.wreturn: + return + + # Ignore an empty return type (It's a macro) + # Ignore functions with a "void" return type (but not "void *") + if not return_type or KernRe(r'void\s*\w*\s*$').search(return_type): + return + + if not self.entry.sections.get("Return", None): + self.emit_msg(ln, + f"No description found for return value of '{declaration_name}'") + + # + # Split apart a structure prototype; returns (struct|union, name, members) or None + # + def split_struct_proto(self, proto): + type_pattern = r'(struct|union)' + qualifiers = [ + "__attribute__", + "__packed", + "__aligned", + "____cacheline_aligned_in_smp", + "____cacheline_aligned", + ] + definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?" + + r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body) + if r.search(proto): + return (r.group(1), r.group(2), r.group(3)) + else: + r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;') + if r.search(proto): + return (r.group(1), r.group(3), r.group(2)) + return None + # + # Rewrite the members of a structure or union for easier formatting later on. + # Among other things, this function will turn a member like: + # + # struct { inner_members; } foo; + # + # into: + # + # struct foo; inner_members; + # + def rewrite_struct_members(self, members): + # + # Process struct/union members from the most deeply nested outward. The + # trick is in the ^{ below - it prevents a match of an outer struct/union + # until the inner one has been munged (removing the "{" in the process). + # + struct_members = KernRe(r'(struct|union)' # 0: declaration type + r'([^\{\};]+)' # 1: possible name + r'(\{)' + r'([^\{\}]*)' # 3: Contents of declaration + r'(\})' + r'([^\{\};]*)(;)') # 5: Remaining stuff after declaration + tuples = struct_members.findall(members) + while tuples: + for t in tuples: + newmember = "" + oldmember = "".join(t) # Reconstruct the original formatting + dtype, name, lbr, content, rbr, rest, semi = t + # + # Pass through each field name, normalizing the form and formatting. + # + for s_id in rest.split(','): + s_id = s_id.strip() + newmember += f"{dtype} {s_id}; " + # + # Remove bitfield/array/pointer info, getting the bare name. + # + s_id = KernRe(r'[:\[].*').sub('', s_id) + s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id) + # + # Pass through the members of this inner structure/union. + # + for arg in content.split(';'): + arg = arg.strip() + # + # Look for (type)(*name)(args) - pointer to function + # + r = KernRe(r'^([^\(]+\(\*?\s*)([\w.]*)(\s*\).*)') + if r.match(arg): + dtype, name, extra = r.group(1), r.group(2), r.group(3) + # Pointer-to-function + if not s_id: + # Anonymous struct/union + newmember += f"{dtype}{name}{extra}; " + else: + newmember += f"{dtype}{s_id}.{name}{extra}; " + # + # Otherwise a non-function member. + # + else: + # + # Remove bitmap and array portions and spaces around commas + # + arg = KernRe(r':\s*\d+\s*').sub('', arg) + arg = KernRe(r'\[.*\]').sub('', arg) + arg = KernRe(r'\s*,\s*').sub(',', arg) + # + # Look for a normal decl - "type name[,name...]" + # + r = KernRe(r'(.*)\s+([\S+,]+)') + if r.search(arg): + for name in r.group(2).split(','): + name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name) + if not s_id: + # Anonymous struct/union + newmember += f"{r.group(1)} {name}; " + else: + newmember += f"{r.group(1)} {s_id}.{name}; " + else: + newmember += f"{arg}; " + # + # At the end of the s_id loop, replace the original declaration with + # the munged version. + # + members = members.replace(oldmember, newmember) + # + # End of the tuple loop - search again and see if there are outer members + # that now turn up. + # + tuples = struct_members.findall(members) + return members + + # + # Format the struct declaration into a standard form for inclusion in the + # resulting docs. + # + def format_struct_decl(self, declaration): + # + # Insert newlines, get rid of extra spaces. + # + declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration) + declaration = KernRe(r'\}\s+;').sub('};', declaration) + # + # Format inline enums with each member on its own line. + # + r = KernRe(r'(enum\s+\{[^\}]+),([^\n])') + while r.search(declaration): + declaration = r.sub(r'\1,\n\2', declaration) + # + # Now go through and supply the right number of tabs + # for each line. + # + def_args = declaration.split('\n') + level = 1 + declaration = "" + for clause in def_args: + clause = KernRe(r'\s+').sub(' ', clause.strip(), count=1) + if clause: + if '}' in clause and level > 1: + level -= 1 + if not clause.startswith('#'): + declaration += "\t" * level + declaration += "\t" + clause + "\n" + if "{" in clause and "}" not in clause: + level += 1 + return declaration + + + def dump_struct(self, ln, proto): + """ + Store an entry for an struct or union + """ + # + # Do the basic parse to get the pieces of the declaration. + # + struct_parts = self.split_struct_proto(proto) + if not struct_parts: + self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!") + return + decl_type, declaration_name, members = struct_parts + + if self.entry.identifier != declaration_name: + self.emit_msg(ln, f"expecting prototype for {decl_type} {self.entry.identifier}. " + f"Prototype was for {decl_type} {declaration_name} instead\n") + return + # + # Go through the list of members applying all of our transformations. + # + members = trim_private_members(members) + members = apply_transforms(struct_xforms, members) + + nested = NestedMatch() + for search, sub in struct_nested_prefixes: + members = nested.sub(search, sub, members) + # + # Deal with embedded struct and union members, and drop enums entirely. + # + declaration = members + members = self.rewrite_struct_members(members) + members = re.sub(r'(\{[^\{\}]*\})', '', members) + # + # Output the result and we are done. + # + self.create_parameter_list(ln, decl_type, members, ';', + declaration_name) + self.check_sections(ln, declaration_name, decl_type) + self.output_declaration(decl_type, declaration_name, + definition=self.format_struct_decl(declaration), + purpose=self.entry.declaration_purpose) + + def dump_enum(self, ln, proto): + """ + Stores an enum inside self.entries array. + """ + # + # Strip preprocessor directives. Note that this depends on the + # trailing semicolon we added in process_proto_type(). + # + proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) + # + # Parse out the name and members of the enum. Typedef form first. + # + r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') + if r.search(proto): + declaration_name = r.group(2) + members = trim_private_members(r.group(1)) + # + # Failing that, look for a straight enum + # + else: + r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}') + if r.match(proto): + declaration_name = r.group(1) + members = trim_private_members(r.group(2)) + # + # OK, this isn't going to work. + # + else: + self.emit_msg(ln, f"{proto}: error: Cannot parse enum!") + return + # + # Make sure we found what we were expecting. + # + if self.entry.identifier != declaration_name: + if self.entry.identifier == "": + self.emit_msg(ln, + f"{proto}: wrong kernel-doc identifier on prototype") + else: + self.emit_msg(ln, + f"expecting prototype for enum {self.entry.identifier}. " + f"Prototype was for enum {declaration_name} instead") + return + + if not declaration_name: + declaration_name = "(anonymous)" + # + # Parse out the name of each enum member, and verify that we + # have a description for it. + # + member_set = set() + members = KernRe(r'\([^;)]*\)').sub('', members) + for arg in members.split(','): + if not arg: + continue + arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg) + self.entry.parameterlist.append(arg) + if arg not in self.entry.parameterdescs: + self.entry.parameterdescs[arg] = self.undescribed + self.emit_msg(ln, + f"Enum value '{arg}' not described in enum '{declaration_name}'") + member_set.add(arg) + # + # Ensure that every described member actually exists in the enum. + # + for k in self.entry.parameterdescs: + if k not in member_set: + self.emit_msg(ln, + f"Excess enum value '%{k}' description in '{declaration_name}'") + + self.output_declaration('enum', declaration_name, + purpose=self.entry.declaration_purpose) + + def dump_declaration(self, ln, prototype): + """ + Stores a data declaration inside self.entries array. + """ + + if self.entry.decl_type == "enum": + self.dump_enum(ln, prototype) + elif self.entry.decl_type == "typedef": + self.dump_typedef(ln, prototype) + elif self.entry.decl_type in ["union", "struct"]: + self.dump_struct(ln, prototype) + else: + # This would be a bug + self.emit_message(ln, f'Unknown declaration type: {self.entry.decl_type}') + + def dump_function(self, ln, prototype): + """ + Stores a function of function macro inside self.entries array. + """ + + found = func_macro = False + return_type = '' + decl_type = 'function' + # + # Apply the initial transformations. + # + prototype = apply_transforms(function_xforms, prototype) + # + # If we have a macro, remove the "#define" at the front. + # + new_proto = KernRe(r"^#\s*define\s+").sub("", prototype) + if new_proto != prototype: + prototype = new_proto + # + # Dispense with the simple "#define A B" case here; the key + # is the space after the name of the symbol being defined. + # NOTE that the seemingly misnamed "func_macro" indicates a + # macro *without* arguments. + # + r = KernRe(r'^(\w+)\s+') + if r.search(prototype): + return_type = '' + declaration_name = r.group(1) + func_macro = True + found = True + + # Yes, this truly is vile. We are looking for: + # 1. Return type (may be nothing if we're looking at a macro) + # 2. Function name + # 3. Function parameters. + # + # All the while we have to watch out for function pointer parameters + # (which IIRC is what the two sections are for), C types (these + # regexps don't even start to express all the possibilities), and + # so on. + # + # If you mess with these regexps, it's a good idea to check that + # the following functions' documentation still comes out right: + # - parport_register_device (function pointer parameters) + # - atomic_set (macro) + # - pci_match_device, __copy_to_user (long return type) + + name = r'\w+' + type1 = r'(?:[\w\s]+)?' + type2 = r'(?:[\w\s]+\*+)+' + # + # Attempt to match first on (args) with no internal parentheses; this + # lets us easily filter out __acquires() and other post-args stuff. If + # that fails, just grab the rest of the line to the last closing + # parenthesis. + # + proto_args = r'\(([^\(]*|.*)\)' + # + # (Except for the simple macro case) attempt to split up the prototype + # in the various ways we understand. + # + if not found: + patterns = [ + rf'^()({name})\s*{proto_args}', + rf'^({type1})\s+({name})\s*{proto_args}', + rf'^({type2})\s*({name})\s*{proto_args}', + ] + + for p in patterns: + r = KernRe(p) + if r.match(prototype): + return_type = r.group(1) + declaration_name = r.group(2) + args = r.group(3) + self.create_parameter_list(ln, decl_type, args, ',', + declaration_name) + found = True + break + # + # Parsing done; make sure that things are as we expect. + # + if not found: + self.emit_msg(ln, + f"cannot understand function prototype: '{prototype}'") + return + if self.entry.identifier != declaration_name: + self.emit_msg(ln, f"expecting prototype for {self.entry.identifier}(). " + f"Prototype was for {declaration_name}() instead") + return + self.check_sections(ln, declaration_name, "function") + self.check_return_section(ln, declaration_name, return_type) + # + # Store the result. + # + self.output_declaration(decl_type, declaration_name, + typedef=('typedef' in return_type), + functiontype=return_type, + purpose=self.entry.declaration_purpose, + func_macro=func_macro) + + + def dump_typedef(self, ln, proto): + """ + Stores a typedef inside self.entries array. + """ + # + # We start by looking for function typedefs. + # + typedef_type = r'typedef((?:\s+[\w*]+\b){0,7}\s+(?:\w+\b|\*+))\s*' + typedef_ident = r'\*?\s*(\w\S+)\s*' + typedef_args = r'\s*\((.*)\);' + + typedef1 = KernRe(typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) + typedef2 = KernRe(typedef_type + typedef_ident + typedef_args) + + # Parse function typedef prototypes + for r in [typedef1, typedef2]: + if not r.match(proto): + continue + + return_type = r.group(1).strip() + declaration_name = r.group(2) + args = r.group(3) + + if self.entry.identifier != declaration_name: + self.emit_msg(ln, + f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") + return + + self.create_parameter_list(ln, 'function', args, ',', declaration_name) + + self.output_declaration('function', declaration_name, + typedef=True, + functiontype=return_type, + purpose=self.entry.declaration_purpose) + return + # + # Not a function, try to parse a simple typedef. + # + r = KernRe(r'typedef.*\s+(\w+)\s*;') + if r.match(proto): + declaration_name = r.group(1) + + if self.entry.identifier != declaration_name: + self.emit_msg(ln, + f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") + return + + self.output_declaration('typedef', declaration_name, + purpose=self.entry.declaration_purpose) + return + + self.emit_msg(ln, "error: Cannot parse typedef!") + + @staticmethod + def process_export(function_set, line): + """ + process EXPORT_SYMBOL* tags + + This method doesn't use any variable from the class, so declare it + with a staticmethod decorator. + """ + + # We support documenting some exported symbols with different + # names. A horrible hack. + suffixes = [ '_noprof' ] + + # Note: it accepts only one EXPORT_SYMBOL* per line, as having + # multiple export lines would violate Kernel coding style. + + if export_symbol.search(line): + symbol = export_symbol.group(2) + elif export_symbol_ns.search(line): + symbol = export_symbol_ns.group(2) + else: + return False + # + # Found an export, trim out any special suffixes + # + for suffix in suffixes: + # Be backward compatible with Python < 3.9 + if symbol.endswith(suffix): + symbol = symbol[:-len(suffix)] + function_set.add(symbol) + return True + + def process_normal(self, ln, line): + """ + STATE_NORMAL: looking for the /** to begin everything. + """ + + if not doc_start.match(line): + return + + # start a new entry + self.reset_state(ln) + + # next line is always the function name + self.state = state.NAME + + def process_name(self, ln, line): + """ + STATE_NAME: Looking for the "name - description" line + """ + # + # Check for a DOC: block and handle them specially. + # + if doc_block.search(line): + + if not doc_block.group(1): + self.entry.begin_section(ln, "Introduction") + else: + self.entry.begin_section(ln, doc_block.group(1)) + + self.entry.identifier = self.entry.section + self.state = state.DOCBLOCK + # + # Otherwise we're looking for a normal kerneldoc declaration line. + # + elif doc_decl.search(line): + self.entry.identifier = doc_decl.group(1) + + # Test for data declaration + if doc_begin_data.search(line): + self.entry.decl_type = doc_begin_data.group(1) + self.entry.identifier = doc_begin_data.group(2) + # + # Look for a function description + # + elif doc_begin_func.search(line): + self.entry.identifier = doc_begin_func.group(1) + self.entry.decl_type = "function" + # + # We struck out. + # + else: + self.emit_msg(ln, + f"This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst\n{line}") + self.state = state.NORMAL + return + # + # OK, set up for a new kerneldoc entry. + # + self.state = state.BODY + self.entry.identifier = self.entry.identifier.strip(" ") + # if there's no @param blocks need to set up default section here + self.entry.begin_section(ln + 1) + # + # Find the description portion, which *should* be there but + # isn't always. + # (We should be able to capture this from the previous parsing - someday) + # + r = KernRe("[-:](.*)") + if r.search(line): + self.entry.declaration_purpose = trim_whitespace(r.group(1)) + self.state = state.DECLARATION + else: + self.entry.declaration_purpose = "" + + if not self.entry.declaration_purpose and self.config.wshort_desc: + self.emit_msg(ln, + f"missing initial short description on line:\n{line}") + + if not self.entry.identifier and self.entry.decl_type != "enum": + self.emit_msg(ln, + f"wrong kernel-doc identifier on line:\n{line}") + self.state = state.NORMAL + + if self.config.verbose: + self.emit_msg(ln, + f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}", + warning=False) + # + # Failed to find an identifier. Emit a warning + # + else: + self.emit_msg(ln, f"Cannot find identifier on line:\n{line}") + + # + # Helper function to determine if a new section is being started. + # + def is_new_section(self, ln, line): + if doc_sect.search(line): + self.state = state.BODY + # + # Pick out the name of our new section, tweaking it if need be. + # + newsection = doc_sect.group(1) + if newsection.lower() == 'description': + newsection = 'Description' + elif newsection.lower() == 'context': + newsection = 'Context' + self.state = state.SPECIAL_SECTION + elif newsection.lower() in ["@return", "@returns", + "return", "returns"]: + newsection = "Return" + self.state = state.SPECIAL_SECTION + elif newsection[0] == '@': + self.state = state.SPECIAL_SECTION + # + # Initialize the contents, and get the new section going. + # + newcontents = doc_sect.group(2) + if not newcontents: + newcontents = "" + self.dump_section() + self.entry.begin_section(ln, newsection) + self.entry.leading_space = None + + self.entry.add_text(newcontents.lstrip()) + return True + return False + + # + # Helper function to detect (and effect) the end of a kerneldoc comment. + # + def is_comment_end(self, ln, line): + if doc_end.search(line): + self.dump_section() + + # Look for doc_com + + doc_end: + r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:.]+\*/') + if r.match(line): + self.emit_msg(ln, f"suspicious ending line: {line}") + + self.entry.prototype = "" + self.entry.new_start_line = ln + 1 + + self.state = state.PROTO + return True + return False + + + def process_decl(self, ln, line): + """ + STATE_DECLARATION: We've seen the beginning of a declaration + """ + if self.is_new_section(ln, line) or self.is_comment_end(ln, line): + return + # + # Look for anything with the " * " line beginning. + # + if doc_content.search(line): + cont = doc_content.group(1) + # + # A blank line means that we have moved out of the declaration + # part of the comment (without any "special section" parameter + # descriptions). + # + if cont == "": + self.state = state.BODY + # + # Otherwise we have more of the declaration section to soak up. + # + else: + self.entry.declaration_purpose = \ + trim_whitespace(self.entry.declaration_purpose + ' ' + cont) + else: + # Unknown line, ignore + self.emit_msg(ln, f"bad line: {line}") + + + def process_special(self, ln, line): + """ + STATE_SPECIAL_SECTION: a section ending with a blank line + """ + # + # If we have hit a blank line (only the " * " marker), then this + # section is done. + # + if KernRe(r"\s*\*\s*$").match(line): + self.entry.begin_section(ln, dump = True) + self.state = state.BODY + return + # + # Not a blank line, look for the other ways to end the section. + # + if self.is_new_section(ln, line) or self.is_comment_end(ln, line): + return + # + # OK, we should have a continuation of the text for this section. + # + if doc_content.search(line): + cont = doc_content.group(1) + # + # If the lines of text after the first in a special section have + # leading white space, we need to trim it out or Sphinx will get + # confused. For the second line (the None case), see what we + # find there and remember it. + # + if self.entry.leading_space is None: + r = KernRe(r'^(\s+)') + if r.match(cont): + self.entry.leading_space = len(r.group(1)) + else: + self.entry.leading_space = 0 + # + # Otherwise, before trimming any leading chars, be *sure* + # that they are white space. We should maybe warn if this + # isn't the case. + # + for i in range(0, self.entry.leading_space): + if cont[i] != " ": + self.entry.leading_space = i + break + # + # Add the trimmed result to the section and we're done. + # + self.entry.add_text(cont[self.entry.leading_space:]) + else: + # Unknown line, ignore + self.emit_msg(ln, f"bad line: {line}") + + def process_body(self, ln, line): + """ + STATE_BODY: the bulk of a kerneldoc comment. + """ + if self.is_new_section(ln, line) or self.is_comment_end(ln, line): + return + + if doc_content.search(line): + cont = doc_content.group(1) + self.entry.add_text(cont) + else: + # Unknown line, ignore + self.emit_msg(ln, f"bad line: {line}") + + def process_inline_name(self, ln, line): + """STATE_INLINE_NAME: beginning of docbook comments within a prototype.""" + + if doc_inline_sect.search(line): + self.entry.begin_section(ln, doc_inline_sect.group(1)) + self.entry.add_text(doc_inline_sect.group(2).lstrip()) + self.state = state.INLINE_TEXT + elif doc_inline_end.search(line): + self.dump_section() + self.state = state.PROTO + elif doc_content.search(line): + self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}") + self.state = state.PROTO + # else ... ?? + + def process_inline_text(self, ln, line): + """STATE_INLINE_TEXT: docbook comments within a prototype.""" + + if doc_inline_end.search(line): + self.dump_section() + self.state = state.PROTO + elif doc_content.search(line): + self.entry.add_text(doc_content.group(1)) + # else ... ?? + + def syscall_munge(self, ln, proto): # pylint: disable=W0613 + """ + Handle syscall definitions + """ + + is_void = False + + # Strip newlines/CR's + proto = re.sub(r'[\r\n]+', ' ', proto) + + # Check if it's a SYSCALL_DEFINE0 + if 'SYSCALL_DEFINE0' in proto: + is_void = True + + # Replace SYSCALL_DEFINE with correct return type & function name + proto = KernRe(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto) + + r = KernRe(r'long\s+(sys_.*?),') + if r.search(proto): + proto = KernRe(',').sub('(', proto, count=1) + elif is_void: + proto = KernRe(r'\)').sub('(void)', proto, count=1) + + # Now delete all of the odd-numbered commas in the proto + # so that argument types & names don't have a comma between them + count = 0 + length = len(proto) + + if is_void: + length = 0 # skip the loop if is_void + + for ix in range(length): + if proto[ix] == ',': + count += 1 + if count % 2 == 1: + proto = proto[:ix] + ' ' + proto[ix + 1:] + + return proto + + def tracepoint_munge(self, ln, proto): + """ + Handle tracepoint definitions + """ + + tracepointname = None + tracepointargs = None + + # Match tracepoint name based on different patterns + r = KernRe(r'TRACE_EVENT\((.*?),') + if r.search(proto): + tracepointname = r.group(1) + + r = KernRe(r'DEFINE_SINGLE_EVENT\((.*?),') + if r.search(proto): + tracepointname = r.group(1) + + r = KernRe(r'DEFINE_EVENT\((.*?),(.*?),') + if r.search(proto): + tracepointname = r.group(2) + + if tracepointname: + tracepointname = tracepointname.lstrip() + + r = KernRe(r'TP_PROTO\((.*?)\)') + if r.search(proto): + tracepointargs = r.group(1) + + if not tracepointname or not tracepointargs: + self.emit_msg(ln, + f"Unrecognized tracepoint format:\n{proto}\n") + else: + proto = f"static inline void trace_{tracepointname}({tracepointargs})" + self.entry.identifier = f"trace_{self.entry.identifier}" + + return proto + + def process_proto_function(self, ln, line): + """Ancillary routine to process a function prototype""" + + # strip C99-style comments to end of line + line = KernRe(r"//.*$", re.S).sub('', line) + # + # Soak up the line's worth of prototype text, stopping at { or ; if present. + # + if KernRe(r'\s*#\s*define').match(line): + self.entry.prototype = line + elif not line.startswith('#'): # skip other preprocessor stuff + r = KernRe(r'([^\{]*)') + if r.match(line): + self.entry.prototype += r.group(1) + " " + # + # If we now have the whole prototype, clean it up and declare victory. + # + if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line): + # strip comments and surrounding spaces + self.entry.prototype = KernRe(r'/\*.*\*/').sub('', self.entry.prototype).strip() + # + # Handle self.entry.prototypes for function pointers like: + # int (*pcs_config)(struct foo) + # by turning it into + # int pcs_config(struct foo) + # + r = KernRe(r'^(\S+\s+)\(\s*\*(\S+)\)') + self.entry.prototype = r.sub(r'\1\2', self.entry.prototype) + # + # Handle special declaration syntaxes + # + if 'SYSCALL_DEFINE' in self.entry.prototype: + self.entry.prototype = self.syscall_munge(ln, + self.entry.prototype) + else: + r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT') + if r.search(self.entry.prototype): + self.entry.prototype = self.tracepoint_munge(ln, + self.entry.prototype) + # + # ... and we're done + # + self.dump_function(ln, self.entry.prototype) + self.reset_state(ln) + + def process_proto_type(self, ln, line): + """Ancillary routine to process a type""" + + # Strip C99-style comments and surrounding whitespace + line = KernRe(r"//.*$", re.S).sub('', line).strip() + if not line: + return # nothing to see here + + # To distinguish preprocessor directive from regular declaration later. + if line.startswith('#'): + line += ";" + # + # Split the declaration on any of { } or ;, and accumulate pieces + # until we hit a semicolon while not inside {brackets} + # + r = KernRe(r'(.*?)([{};])') + for chunk in r.split(line): + if chunk: # Ignore empty matches + self.entry.prototype += chunk + # + # This cries out for a match statement ... someday after we can + # drop Python 3.9 ... + # + if chunk == '{': + self.entry.brcount += 1 + elif chunk == '}': + self.entry.brcount -= 1 + elif chunk == ';' and self.entry.brcount <= 0: + self.dump_declaration(ln, self.entry.prototype) + self.reset_state(ln) + return + # + # We hit the end of the line while still in the declaration; put + # in a space to represent the newline. + # + self.entry.prototype += ' ' + + def process_proto(self, ln, line): + """STATE_PROTO: reading a function/whatever prototype.""" + + if doc_inline_oneline.search(line): + self.entry.begin_section(ln, doc_inline_oneline.group(1)) + self.entry.add_text(doc_inline_oneline.group(2)) + self.dump_section() + + elif doc_inline_start.search(line): + self.state = state.INLINE_NAME + + elif self.entry.decl_type == 'function': + self.process_proto_function(ln, line) + + else: + self.process_proto_type(ln, line) + + def process_docblock(self, ln, line): + """STATE_DOCBLOCK: within a DOC: block.""" + + if doc_end.search(line): + self.dump_section() + self.output_declaration("doc", self.entry.identifier) + self.reset_state(ln) + + elif doc_content.search(line): + self.entry.add_text(doc_content.group(1)) + + def parse_export(self): + """ + Parses EXPORT_SYMBOL* macros from a single Kernel source file. + """ + + export_table = set() + + try: + with open(self.fname, "r", encoding="utf8", + errors="backslashreplace") as fp: + + for line in fp: + self.process_export(export_table, line) + + except IOError: + return None + + return export_table + + # + # The state/action table telling us which function to invoke in + # each state. + # + state_actions = { + state.NORMAL: process_normal, + state.NAME: process_name, + state.BODY: process_body, + state.DECLARATION: process_decl, + state.SPECIAL_SECTION: process_special, + state.INLINE_NAME: process_inline_name, + state.INLINE_TEXT: process_inline_text, + state.PROTO: process_proto, + state.DOCBLOCK: process_docblock, + } + + def parse_kdoc(self): + """ + Open and process each line of a C source file. + The parsing is controlled via a state machine, and the line is passed + to a different process function depending on the state. The process + function may update the state as needed. + + Besides parsing kernel-doc tags, it also parses export symbols. + """ + + prev = "" + prev_ln = None + export_table = set() + + try: + with open(self.fname, "r", encoding="utf8", + errors="backslashreplace") as fp: + for ln, line in enumerate(fp): + + line = line.expandtabs().strip("\n") + + # Group continuation lines on prototypes + if self.state == state.PROTO: + if line.endswith("\\"): + prev += line.rstrip("\\") + if not prev_ln: + prev_ln = ln + continue + + if prev: + ln = prev_ln + line = prev + line + prev = "" + prev_ln = None + + self.config.log.debug("%d %s: %s", + ln, state.name[self.state], + line) + + # This is an optimization over the original script. + # There, when export_file was used for the same file, + # it was read twice. Here, we use the already-existing + # loop to parse exported symbols as well. + # + if (self.state != state.NORMAL) or \ + not self.process_export(export_table, line): + # Hand this line to the appropriate state handler + self.state_actions[self.state](self, ln, line) + + except OSError: + self.config.log.error(f"Error: Cannot open file {self.fname}") + + return export_table, self.entries diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py new file mode 100644 index 000000000000..612223e1e723 --- /dev/null +++ b/tools/lib/python/kdoc/kdoc_re.py @@ -0,0 +1,270 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2025: Mauro Carvalho Chehab . + +""" +Regular expression ancillary classes. + +Those help caching regular expressions and do matching for kernel-doc. +""" + +import re + +# Local cache for regular expressions +re_cache = {} + + +class KernRe: + """ + Helper class to simplify regex declaration and usage, + + It calls re.compile for a given pattern. It also allows adding + regular expressions and define sub at class init time. + + Regular expressions can be cached via an argument, helping to speedup + searches. + """ + + def _add_regex(self, string, flags): + """ + Adds a new regex or re-use it from the cache. + """ + self.regex = re_cache.get(string, None) + if not self.regex: + self.regex = re.compile(string, flags=flags) + if self.cache: + re_cache[string] = self.regex + + def __init__(self, string, cache=True, flags=0): + """ + Compile a regular expression and initialize internal vars. + """ + + self.cache = cache + self.last_match = None + + self._add_regex(string, flags) + + def __str__(self): + """ + Return the regular expression pattern. + """ + return self.regex.pattern + + def __add__(self, other): + """ + Allows adding two regular expressions into one. + """ + + return KernRe(str(self) + str(other), cache=self.cache or other.cache, + flags=self.regex.flags | other.regex.flags) + + def match(self, string): + """ + Handles a re.match storing its results + """ + + self.last_match = self.regex.match(string) + return self.last_match + + def search(self, string): + """ + Handles a re.search storing its results + """ + + self.last_match = self.regex.search(string) + return self.last_match + + def findall(self, string): + """ + Alias to re.findall + """ + + return self.regex.findall(string) + + def split(self, string): + """ + Alias to re.split + """ + + return self.regex.split(string) + + def sub(self, sub, string, count=0): + """ + Alias to re.sub + """ + + return self.regex.sub(sub, string, count=count) + + def group(self, num): + """ + Returns the group results of the last match + """ + + return self.last_match.group(num) + + +class NestedMatch: + """ + Finding nested delimiters is hard with regular expressions. It is + even harder on Python with its normal re module, as there are several + advanced regular expressions that are missing. + + This is the case of this pattern: + + '\\bSTRUCT_GROUP(\\(((?:(?>[^)(]+)|(?1))*)\\))[^;]*;' + + which is used to properly match open/close parenthesis of the + string search STRUCT_GROUP(), + + Add a class that counts pairs of delimiters, using it to match and + replace nested expressions. + + The original approach was suggested by: + https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex + + Although I re-implemented it to make it more generic and match 3 types + of delimiters. The logic checks if delimiters are paired. If not, it + will ignore the search string. + """ + + # TODO: make NestedMatch handle multiple match groups + # + # Right now, regular expressions to match it are defined only up to + # the start delimiter, e.g.: + # + # \bSTRUCT_GROUP\( + # + # is similar to: STRUCT_GROUP\((.*)\) + # except that the content inside the match group is delimiter's aligned. + # + # The content inside parenthesis are converted into a single replace + # group (e.g. r`\1'). + # + # It would be nice to change such definition to support multiple + # match groups, allowing a regex equivalent to. + # + # FOO\((.*), (.*), (.*)\) + # + # it is probably easier to define it not as a regular expression, but + # with some lexical definition like: + # + # FOO(arg1, arg2, arg3) + + DELIMITER_PAIRS = { + '{': '}', + '(': ')', + '[': ']', + } + + RE_DELIM = re.compile(r'[\{\}\[\]\(\)]') + + def _search(self, regex, line): + """ + Finds paired blocks for a regex that ends with a delimiter. + + The suggestion of using finditer to match pairs came from: + https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex + but I ended using a different implementation to align all three types + of delimiters and seek for an initial regular expression. + + The algorithm seeks for open/close paired delimiters and place them + into a stack, yielding a start/stop position of each match when the + stack is zeroed. + + The algorithm shoud work fine for properly paired lines, but will + silently ignore end delimiters that preceeds an start delimiter. + This should be OK for kernel-doc parser, as unaligned delimiters + would cause compilation errors. So, we don't need to rise exceptions + to cover such issues. + """ + + stack = [] + + for match_re in regex.finditer(line): + start = match_re.start() + offset = match_re.end() + + d = line[offset - 1] + if d not in self.DELIMITER_PAIRS: + continue + + end = self.DELIMITER_PAIRS[d] + stack.append(end) + + for match in self.RE_DELIM.finditer(line[offset:]): + pos = match.start() + offset + + d = line[pos] + + if d in self.DELIMITER_PAIRS: + end = self.DELIMITER_PAIRS[d] + + stack.append(end) + continue + + # Does the end delimiter match what it is expected? + if stack and d == stack[-1]: + stack.pop() + + if not stack: + yield start, offset, pos + 1 + break + + def search(self, regex, line): + """ + This is similar to re.search: + + It matches a regex that it is followed by a delimiter, + returning occurrences only if all delimiters are paired. + """ + + for t in self._search(regex, line): + + yield line[t[0]:t[2]] + + def sub(self, regex, sub, line, count=0): + """ + This is similar to re.sub: + + It matches a regex that it is followed by a delimiter, + replacing occurrences only if all delimiters are paired. + + if r'\1' is used, it works just like re: it places there the + matched paired data with the delimiter stripped. + + If count is different than zero, it will replace at most count + items. + """ + out = "" + + cur_pos = 0 + n = 0 + + for start, end, pos in self._search(regex, line): + out += line[cur_pos:start] + + # Value, ignoring start/end delimiters + value = line[end:pos - 1] + + # replaces \1 at the sub string, if \1 is used there + new_sub = sub + new_sub = new_sub.replace(r'\1', value) + + out += new_sub + + # Drop end ';' if any + if line[pos] == ';': + pos += 1 + + cur_pos = pos + n += 1 + + if count and count >= n: + break + + # Append the remaining string + l = len(line) + out += line[cur_pos:l] + + return out diff --git a/tools/lib/python/kdoc/latex_fonts.py b/tools/lib/python/kdoc/latex_fonts.py new file mode 100755 index 000000000000..29317f8006ea --- /dev/null +++ b/tools/lib/python/kdoc/latex_fonts.py @@ -0,0 +1,167 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (C) Akira Yokosawa, 2024 +# +# Ported to Python by (c) Mauro Carvalho Chehab, 2025 + +""" +Detect problematic Noto CJK variable fonts. + +For "make pdfdocs", reports of build errors of translations.pdf started +arriving early 2024 [1, 2]. It turned out that Fedora and openSUSE +tumbleweed have started deploying variable-font [3] format of "Noto CJK" +fonts [4, 5]. For PDF, a LaTeX package named xeCJK is used for CJK +(Chinese, Japanese, Korean) pages. xeCJK requires XeLaTeX/XeTeX, which +does not (and likely never will) understand variable fonts for historical +reasons. + +The build error happens even when both of variable- and non-variable-format +fonts are found on the build system. To make matters worse, Fedora enlists +variable "Noto CJK" fonts in the requirements of langpacks-ja, -ko, -zh_CN, +-zh_TW, etc. Hence developers who have interest in CJK pages are more +likely to encounter the build errors. + +This script is invoked from the error path of "make pdfdocs" and emits +suggestions if variable-font files of "Noto CJK" fonts are in the list of +fonts accessible from XeTeX. + +References: +[1]: https://lore.kernel.org/r/8734tqsrt7.fsf@meer.lwn.net/ +[2]: https://lore.kernel.org/r/1708585803.600323099@f111.i.mail.ru/ +[3]: https://en.wikipedia.org/wiki/Variable_font +[4]: https://fedoraproject.org/wiki/Changes/Noto_CJK_Variable_Fonts +[5]: https://build.opensuse.org/request/show/1157217 + +#=========================================================================== +Workarounds for building translations.pdf +#=========================================================================== + +* Denylist "variable font" Noto CJK fonts. + - Create $HOME/deny-vf/fontconfig/fonts.conf from template below, with + tweaks if necessary. Remove leading "". + - Path of fontconfig/fonts.conf can be overridden by setting an env + variable FONTS_CONF_DENY_VF. + + * Template: +----------------------------------------------------------------- + + + + + + + + /usr/share/fonts/google-noto-*-cjk-vf-fonts + + /usr/share/fonts/truetype/Noto*CJK*-VF.otf + + + +----------------------------------------------------------------- + + The denylisting is activated for "make pdfdocs". + +* For skipping CJK pages in PDF + - Uninstall texlive-xecjk. + Denylisting is not needed in this case. + +* For printing CJK pages in PDF + - Need non-variable "Noto CJK" fonts. + * Fedora + - google-noto-sans-cjk-fonts + - google-noto-serif-cjk-fonts + * openSUSE tumbleweed + - Non-variable "Noto CJK" fonts are not available as distro packages + as of April, 2024. Fetch a set of font files from upstream Noto + CJK Font released at: + https://github.com/notofonts/noto-cjk/tree/main/Sans#super-otc + and at: + https://github.com/notofonts/noto-cjk/tree/main/Serif#super-otc + , then uncompress and deploy them. + - Remember to update fontconfig cache by running fc-cache. + +!!! Caution !!! + Uninstalling "variable font" packages can be dangerous. + They might be depended upon by other packages important for your work. + Denylisting should be less invasive, as it is effective only while + XeLaTeX runs in "make pdfdocs". +""" + +import os +import re +import subprocess +import textwrap +import sys + +class LatexFontChecker: + """ + Detect problems with CJK variable fonts that affect PDF builds for + translations. + """ + + def __init__(self, deny_vf=None): + if not deny_vf: + deny_vf = os.environ.get('FONTS_CONF_DENY_VF', "~/deny-vf") + + self.environ = os.environ.copy() + self.environ['XDG_CONFIG_HOME'] = os.path.expanduser(deny_vf) + + self.re_cjk = re.compile(r"([^:]+):\s*Noto\s+(Sans|Sans Mono|Serif) CJK") + + def description(self): + return __doc__ + + def get_noto_cjk_vf_fonts(self): + """Get Noto CJK fonts""" + + cjk_fonts = set() + cmd = ["fc-list", ":", "file", "family", "variable"] + try: + result = subprocess.run(cmd,stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + universal_newlines=True, + env=self.environ, + check=True) + + except subprocess.CalledProcessError as exc: + sys.exit(f"Error running fc-list: {repr(exc)}") + + for line in result.stdout.splitlines(): + if 'variable=True' not in line: + continue + + match = self.re_cjk.search(line) + if match: + cjk_fonts.add(match.group(1)) + + return sorted(cjk_fonts) + + def check(self): + """Check for problems with CJK fonts""" + + fonts = textwrap.indent("\n".join(self.get_noto_cjk_vf_fonts()), " ") + if not fonts: + return None + + rel_file = os.path.relpath(__file__, os.getcwd()) + + msg = "=" * 77 + "\n" + msg += 'XeTeX is confused by "variable font" files listed below:\n' + msg += fonts + "\n" + msg += textwrap.dedent(f""" + For CJK pages in PDF, they need to be hidden from XeTeX by denylisting. + Or, CJK pages can be skipped by uninstalling texlive-xecjk. + + For more info on denylisting, other options, and variable font, run: + + tools/docs/check-variable-fonts.py -h + """) + msg += "=" * 77 + + return msg diff --git a/tools/lib/python/kdoc/parse_data_structs.py b/tools/lib/python/kdoc/parse_data_structs.py new file mode 100755 index 000000000000..25361996cd20 --- /dev/null +++ b/tools/lib/python/kdoc/parse_data_structs.py @@ -0,0 +1,482 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright (c) 2016-2025 by Mauro Carvalho Chehab . +# pylint: disable=R0912,R0915 + +""" +Parse a source file or header, creating ReStructured Text cross references. + +It accepts an optional file to change the default symbol reference or to +suppress symbols from the output. + +It is capable of identifying defines, functions, structs, typedefs, +enums and enum symbols and create cross-references for all of them. +It is also capable of distinguish #define used for specifying a Linux +ioctl. + +The optional rules file contains a set of rules like: + + ignore ioctl VIDIOC_ENUM_FMT + replace ioctl VIDIOC_DQBUF vidioc_qbuf + replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det` +""" + +import os +import re +import sys + + +class ParseDataStructs: + """ + Creates an enriched version of a Kernel header file with cross-links + to each C data structure type. + + It is meant to allow having a more comprehensive documentation, where + uAPI headers will create cross-reference links to the code. + + It is capable of identifying defines, functions, structs, typedefs, + enums and enum symbols and create cross-references for all of them. + It is also capable of distinguish #define used for specifying a Linux + ioctl. + + By default, it create rules for all symbols and defines, but it also + allows parsing an exception file. Such file contains a set of rules + using the syntax below: + + 1. Ignore rules: + + ignore ` + + Removes the symbol from reference generation. + + 2. Replace rules: + + replace + + Replaces how old_symbol with a new reference. The new_reference can be: + + - A simple symbol name; + - A full Sphinx reference. + + 3. Namespace rules + + namespace + + Sets C namespace to be used during cross-reference generation. Can + be overridden by replace rules. + + On ignore and replace rules, can be: + - ioctl: for defines that end with _IO*, e.g. ioctl definitions + - define: for other defines + - symbol: for symbols defined within enums; + - typedef: for typedefs; + - enum: for the name of a non-anonymous enum; + - struct: for structs. + + Examples: + + ignore define __LINUX_MEDIA_H + ignore ioctl VIDIOC_ENUM_FMT + replace ioctl VIDIOC_DQBUF vidioc_qbuf + replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det` + + namespace MC + """ + + # Parser regexes with multiple ways to capture enums and structs + RE_ENUMS = [ + re.compile(r"^\s*enum\s+([\w_]+)\s*\{"), + re.compile(r"^\s*enum\s+([\w_]+)\s*$"), + re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*\{"), + re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*$"), + ] + RE_STRUCTS = [ + re.compile(r"^\s*struct\s+([_\w][\w\d_]+)\s*\{"), + re.compile(r"^\s*struct\s+([_\w][\w\d_]+)$"), + re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)\s*\{"), + re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)$"), + ] + + # FIXME: the original code was written a long time before Sphinx C + # domain to have multiple namespaces. To avoid to much turn at the + # existing hyperlinks, the code kept using "c:type" instead of the + # right types. To change that, we need to change the types not only + # here, but also at the uAPI media documentation. + DEF_SYMBOL_TYPES = { + "ioctl": { + "prefix": "\\ ", + "suffix": "\\ ", + "ref_type": ":ref", + "description": "IOCTL Commands", + }, + "define": { + "prefix": "\\ ", + "suffix": "\\ ", + "ref_type": ":ref", + "description": "Macros and Definitions", + }, + # We're calling each definition inside an enum as "symbol" + "symbol": { + "prefix": "\\ ", + "suffix": "\\ ", + "ref_type": ":ref", + "description": "Enumeration values", + }, + "typedef": { + "prefix": "\\ ", + "suffix": "\\ ", + "ref_type": ":c:type", + "description": "Type Definitions", + }, + # This is the description of the enum itself + "enum": { + "prefix": "\\ ", + "suffix": "\\ ", + "ref_type": ":c:type", + "description": "Enumerations", + }, + "struct": { + "prefix": "\\ ", + "suffix": "\\ ", + "ref_type": ":c:type", + "description": "Structures", + }, + } + + def __init__(self, debug: bool = False): + """Initialize internal vars""" + self.debug = debug + self.data = "" + + self.symbols = {} + + self.namespace = None + self.ignore = [] + self.replace = [] + + for symbol_type in self.DEF_SYMBOL_TYPES: + self.symbols[symbol_type] = {} + + def read_exceptions(self, fname: str): + if not fname: + return + + name = os.path.basename(fname) + + with open(fname, "r", encoding="utf-8", errors="backslashreplace") as f: + for ln, line in enumerate(f): + ln += 1 + line = line.strip() + if not line or line.startswith("#"): + continue + + # ignore rules + match = re.match(r"^ignore\s+(\w+)\s+(\S+)", line) + + if match: + self.ignore.append((ln, match.group(1), match.group(2))) + continue + + # replace rules + match = re.match(r"^replace\s+(\S+)\s+(\S+)\s+(\S+)", line) + if match: + self.replace.append((ln, match.group(1), match.group(2), + match.group(3))) + continue + + match = re.match(r"^namespace\s+(\S+)", line) + if match: + self.namespace = match.group(1) + continue + + sys.exit(f"{name}:{ln}: invalid line: {line}") + + def apply_exceptions(self): + """ + Process exceptions file with rules to ignore or replace references. + """ + + # Handle ignore rules + for ln, c_type, symbol in self.ignore: + if c_type not in self.DEF_SYMBOL_TYPES: + sys.exit(f"{name}:{ln}: {c_type} is invalid") + + d = self.symbols[c_type] + if symbol in d: + del d[symbol] + + # Handle replace rules + for ln, c_type, old, new in self.replace: + if c_type not in self.DEF_SYMBOL_TYPES: + sys.exit(f"{name}:{ln}: {c_type} is invalid") + + reftype = None + + # Parse reference type when the type is specified + + match = re.match(r"^\:c\:(\w+)\:\`(.+)\`", new) + if match: + reftype = f":c:{match.group(1)}" + new = match.group(2) + else: + match = re.search(r"(\:ref)\:\`(.+)\`", new) + if match: + reftype = match.group(1) + new = match.group(2) + + # If the replacement rule doesn't have a type, get default + if not reftype: + reftype = self.DEF_SYMBOL_TYPES[c_type].get("ref_type") + if not reftype: + reftype = self.DEF_SYMBOL_TYPES[c_type].get("real_type") + + new_ref = f"{reftype}:`{old} <{new}>`" + + # Change self.symbols to use the replacement rule + if old in self.symbols[c_type]: + (_, ln) = self.symbols[c_type][old] + self.symbols[c_type][old] = (new_ref, ln) + else: + print(f"{name}:{ln}: Warning: can't find {old} {c_type}") + + def store_type(self, ln, symbol_type: str, symbol: str, + ref_name: str = None, replace_underscores: bool = True): + """ + Stores a new symbol at self.symbols under symbol_type. + + By default, underscores are replaced by "-" + """ + defs = self.DEF_SYMBOL_TYPES[symbol_type] + + prefix = defs.get("prefix", "") + suffix = defs.get("suffix", "") + ref_type = defs.get("ref_type") + + # Determine ref_link based on symbol type + if ref_type or self.namespace: + if not ref_name: + ref_name = symbol.lower() + + # c-type references don't support hash + if ref_type == ":ref" and replace_underscores: + ref_name = ref_name.replace("_", "-") + + # C domain references may have namespaces + if ref_type.startswith(":c:"): + if self.namespace: + ref_name = f"{self.namespace}.{ref_name}" + + if ref_type: + ref_link = f"{ref_type}:`{symbol} <{ref_name}>`" + else: + ref_link = f"`{symbol} <{ref_name}>`" + else: + ref_link = symbol + + self.symbols[symbol_type][symbol] = (f"{prefix}{ref_link}{suffix}", ln) + + def store_line(self, line): + """Stores a line at self.data, properly indented""" + line = " " + line.expandtabs() + self.data += line.rstrip(" ") + + def parse_file(self, file_in: str, exceptions: str = None): + """Reads a C source file and get identifiers""" + self.data = "" + is_enum = False + is_comment = False + multiline = "" + + self.read_exceptions(exceptions) + + with open(file_in, "r", + encoding="utf-8", errors="backslashreplace") as f: + for line_no, line in enumerate(f): + self.store_line(line) + line = line.strip("\n") + + # Handle continuation lines + if line.endswith(r"\\"): + multiline += line[-1] + continue + + if multiline: + line = multiline + line + multiline = "" + + # Handle comments. They can be multilined + if not is_comment: + if re.search(r"/\*.*", line): + is_comment = True + else: + # Strip C99-style comments + line = re.sub(r"(//.*)", "", line) + + if is_comment: + if re.search(r".*\*/", line): + is_comment = False + else: + multiline = line + continue + + # At this point, line variable may be a multilined statement, + # if lines end with \ or if they have multi-line comments + # With that, it can safely remove the entire comments, + # and there's no need to use re.DOTALL for the logic below + + line = re.sub(r"(/\*.*\*/)", "", line) + if not line.strip(): + continue + + # It can be useful for debug purposes to print the file after + # having comments stripped and multi-lines grouped. + if self.debug > 1: + print(f"line {line_no + 1}: {line}") + + # Now the fun begins: parse each type and store it. + + # We opted for a two parsing logic here due to: + # 1. it makes easier to debug issues not-parsed symbols; + # 2. we want symbol replacement at the entire content, not + # just when the symbol is detected. + + if is_enum: + match = re.match(r"^\s*([_\w][\w\d_]+)\s*[\,=]?", line) + if match: + self.store_type(line_no, "symbol", match.group(1)) + if "}" in line: + is_enum = False + continue + + match = re.match(r"^\s*#\s*define\s+([\w_]+)\s+_IO", line) + if match: + self.store_type(line_no, "ioctl", match.group(1), + replace_underscores=False) + continue + + match = re.match(r"^\s*#\s*define\s+([\w_]+)(\s+|$)", line) + if match: + self.store_type(line_no, "define", match.group(1)) + continue + + match = re.match(r"^\s*typedef\s+([_\w][\w\d_]+)\s+(.*)\s+([_\w][\w\d_]+);", + line) + if match: + name = match.group(2).strip() + symbol = match.group(3) + self.store_type(line_no, "typedef", symbol, ref_name=name) + continue + + for re_enum in self.RE_ENUMS: + match = re_enum.match(line) + if match: + self.store_type(line_no, "enum", match.group(1)) + is_enum = True + break + + for re_struct in self.RE_STRUCTS: + match = re_struct.match(line) + if match: + self.store_type(line_no, "struct", match.group(1)) + break + + self.apply_exceptions() + + def debug_print(self): + """ + Print debug information containing the replacement rules per symbol. + To make easier to check, group them per type. + """ + if not self.debug: + return + + for c_type, refs in self.symbols.items(): + if not refs: # Skip empty dictionaries + continue + + print(f"{c_type}:") + + for symbol, (ref, ln) in sorted(refs.items()): + print(f" #{ln:<5d} {symbol} -> {ref}") + + print() + + def gen_output(self): + """Write the formatted output to a file.""" + + # Avoid extra blank lines + text = re.sub(r"\s+$", "", self.data) + "\n" + text = re.sub(r"\n\s+\n", "\n\n", text) + + # Escape Sphinx special characters + text = re.sub(r"([\_\`\*\<\>\&\\\\:\/\|\%\$\#\{\}\~\^])", r"\\\1", text) + + # Source uAPI files may have special notes. Use bold font for them + text = re.sub(r"DEPRECATED", "**DEPRECATED**", text) + + # Delimiters to catch the entire symbol after escaped + start_delim = r"([ \n\t\(=\*\@])" + end_delim = r"(\s|,|\\=|\\:|\;|\)|\}|\{)" + + # Process all reference types + for ref_dict in self.symbols.values(): + for symbol, (replacement, _) in ref_dict.items(): + symbol = re.escape(re.sub(r"([\_\`\*\<\>\&\\\\:\/])", r"\\\1", symbol)) + text = re.sub(fr'{start_delim}{symbol}{end_delim}', + fr'\1{replacement}\2', text) + + # Remove "\ " where not needed: before spaces and at the end of lines + text = re.sub(r"\\ ([\n ])", r"\1", text) + text = re.sub(r" \\ ", " ", text) + + return text + + def gen_toc(self): + """ + Create a list of symbols to be part of a TOC contents table + """ + text = [] + + # Sort symbol types per description + symbol_descriptions = [] + for k, v in self.DEF_SYMBOL_TYPES.items(): + symbol_descriptions.append((v['description'], k)) + + symbol_descriptions.sort() + + # Process each category + for description, c_type in symbol_descriptions: + + refs = self.symbols[c_type] + if not refs: # Skip empty categories + continue + + text.append(f"{description}") + text.append("-" * len(description)) + text.append("") + + # Sort symbols alphabetically + for symbol, (ref, ln) in sorted(refs.items()): + text.append(f"- LINENO_{ln}: {ref}") + + text.append("") # Add empty line between categories + + return "\n".join(text) + + def write_output(self, file_in: str, file_out: str, toc: bool): + title = os.path.basename(file_in) + + if toc: + text = self.gen_toc() + else: + text = self.gen_output() + + with open(file_out, "w", encoding="utf-8", errors="backslashreplace") as f: + f.write(".. -*- coding: utf-8; mode: rst -*-\n\n") + f.write(f"{title}\n") + f.write("=" * len(title) + "\n\n") + + if not toc: + f.write(".. parsed-literal::\n\n") + + f.write(text) diff --git a/tools/lib/python/kdoc/python_version.py b/tools/lib/python/kdoc/python_version.py new file mode 100644 index 000000000000..4fde1b882164 --- /dev/null +++ b/tools/lib/python/kdoc/python_version.py @@ -0,0 +1,178 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0-or-later +# Copyright (c) 2017-2025 Mauro Carvalho Chehab + +""" +Handle Python version check logic. + +Not all Python versions are supported by scripts. Yet, on some cases, +like during documentation build, a newer version of python could be +available. + +This class allows checking if the minimal requirements are followed. + +Better than that, PythonVersion.check_python() not only checks the minimal +requirements, but it automatically switches to a the newest available +Python version if present. + +""" + +import os +import re +import subprocess +import shlex +import sys + +from glob import glob +from textwrap import indent + +class PythonVersion: + """ + Ancillary methods that checks for missing dependencies for different + types of types, like binaries, python modules, rpm deps, etc. + """ + + def __init__(self, version): + """Ïnitialize self.version tuple from a version string""" + self.version = self.parse_version(version) + + @staticmethod + def parse_version(version): + """Convert a major.minor.patch version into a tuple""" + return tuple(int(x) for x in version.split(".")) + + @staticmethod + def ver_str(version): + """Returns a version tuple as major.minor.patch""" + return ".".join([str(x) for x in version]) + + @staticmethod + def cmd_print(cmd, max_len=80): + cmd_line = [] + + for w in cmd: + w = shlex.quote(w) + + if cmd_line: + if not max_len or len(cmd_line[-1]) + len(w) < max_len: + cmd_line[-1] += " " + w + continue + else: + cmd_line[-1] += " \\" + cmd_line.append(w) + else: + cmd_line.append(w) + + return "\n ".join(cmd_line) + + def __str__(self): + """Returns a version tuple as major.minor.patch from self.version""" + return self.ver_str(self.version) + + @staticmethod + def get_python_version(cmd): + """ + Get python version from a Python binary. As we need to detect if + are out there newer python binaries, we can't rely on sys.release here. + """ + + kwargs = {} + if sys.version_info < (3, 7): + kwargs['universal_newlines'] = True + else: + kwargs['text'] = True + + result = subprocess.run([cmd, "--version"], + stdout = subprocess.PIPE, + stderr = subprocess.PIPE, + **kwargs, check=False) + + version = result.stdout.strip() + + match = re.search(r"(\d+\.\d+\.\d+)", version) + if match: + return PythonVersion.parse_version(match.group(1)) + + print(f"Can't parse version {version}") + return (0, 0, 0) + + @staticmethod + def find_python(min_version): + """ + Detect if are out there any python 3.xy version newer than the + current one. + + Note: this routine is limited to up to 2 digits for python3. We + may need to update it one day, hopefully on a distant future. + """ + patterns = [ + "python3.[0-9][0-9]", + "python3.[0-9]", + ] + + python_cmd = [] + + # Seek for a python binary newer than min_version + for path in os.getenv("PATH", "").split(":"): + for pattern in patterns: + for cmd in glob(os.path.join(path, pattern)): + if os.path.isfile(cmd) and os.access(cmd, os.X_OK): + version = PythonVersion.get_python_version(cmd) + if version >= min_version: + python_cmd.append((version, cmd)) + + return sorted(python_cmd, reverse=True) + + @staticmethod + def check_python(min_version, show_alternatives=False, bail_out=False, + success_on_error=False): + """ + Check if the current python binary satisfies our minimal requirement + for Sphinx build. If not, re-run with a newer version if found. + """ + cur_ver = sys.version_info[:3] + if cur_ver >= min_version: + ver = PythonVersion.ver_str(cur_ver) + return + + python_ver = PythonVersion.ver_str(cur_ver) + + available_versions = PythonVersion.find_python(min_version) + if not available_versions: + print(f"ERROR: Python version {python_ver} is not spported anymore\n") + print(" Can't find a new version. This script may fail") + return + + script_path = os.path.abspath(sys.argv[0]) + + # Check possible alternatives + if available_versions: + new_python_cmd = available_versions[0][1] + else: + new_python_cmd = None + + if show_alternatives and available_versions: + print("You could run, instead:") + for _, cmd in available_versions: + args = [cmd, script_path] + sys.argv[1:] + + cmd_str = indent(PythonVersion.cmd_print(args), " ") + print(f"{cmd_str}\n") + + if bail_out: + msg = f"Python {python_ver} not supported. Bailing out" + if success_on_error: + print(msg, file=sys.stderr) + sys.exit(0) + else: + sys.exit(msg) + + print(f"Python {python_ver} not supported. Changing to {new_python_cmd}") + + # Restart script using the newer version + args = [new_python_cmd, script_path] + sys.argv[1:] + + try: + os.execv(new_python_cmd, args) + except OSError as e: + sys.exit(f"Failed to restart with {new_python_cmd}: {e}") -- cgit v1.2.3 From 992a9df41ad7173588bf90e15b33d45db2811aea Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Mon, 10 Nov 2025 15:04:30 -0700 Subject: docs: bring some order to our Python module hierarchy Now that we have tools/lib/python for our Python modules, turn them into proper packages with a single namespace so that everything can just use tools/lib/python in sys.path. No functional change. Signed-off-by: Jonathan Corbet Message-ID: <20251110220430.726665-3-corbet@lwn.net> --- Documentation/sphinx/kernel_abi.py | 4 ++-- Documentation/sphinx/kernel_include.py | 4 ++-- Documentation/sphinx/kerneldoc.py | 6 +++--- scripts/kernel-doc.py | 6 +++--- tools/docs/check-variable-fonts.py | 4 ++-- tools/docs/get_abi.py | 10 +++++----- tools/docs/parse-headers.py | 6 +++--- tools/docs/sphinx-build-wrapper | 5 ++--- tools/docs/sphinx-pre-install | 4 ++-- tools/lib/python/__init__.py | 0 tools/lib/python/abi/__init__.py | 0 tools/lib/python/abi/abi_parser.py | 2 +- tools/lib/python/abi/abi_regex.py | 4 ++-- tools/lib/python/abi/system_symbols.py | 2 +- tools/lib/python/kdoc/__init__.py | 0 tools/lib/python/kdoc/kdoc_files.py | 4 ++-- tools/lib/python/kdoc/kdoc_output.py | 4 ++-- tools/lib/python/kdoc/kdoc_parser.py | 4 ++-- 18 files changed, 34 insertions(+), 35 deletions(-) create mode 100644 tools/lib/python/__init__.py create mode 100644 tools/lib/python/abi/__init__.py create mode 100644 tools/lib/python/kdoc/__init__.py (limited to 'tools/docs/parse-headers.py') diff --git a/Documentation/sphinx/kernel_abi.py b/Documentation/sphinx/kernel_abi.py index 7ec832da8444..5667f207d175 100644 --- a/Documentation/sphinx/kernel_abi.py +++ b/Documentation/sphinx/kernel_abi.py @@ -43,9 +43,9 @@ from sphinx.util.docutils import switch_source_input from sphinx.util import logging srctree = os.path.abspath(os.environ["srctree"]) -sys.path.insert(0, os.path.join(srctree, "tools/lib/python/abi")) +sys.path.insert(0, os.path.join(srctree, "tools/lib/python")) -from abi_parser import AbiParser +from abi.abi_parser import AbiParser __version__ = "1.0" diff --git a/Documentation/sphinx/kernel_include.py b/Documentation/sphinx/kernel_include.py index a12455daa6d7..626762ff6af3 100755 --- a/Documentation/sphinx/kernel_include.py +++ b/Documentation/sphinx/kernel_include.py @@ -97,9 +97,9 @@ from docutils.parsers.rst.directives.body import CodeBlock, NumberLines from sphinx.util import logging srctree = os.path.abspath(os.environ["srctree"]) -sys.path.insert(0, os.path.join(srctree, "tools/lib/python/kdoc")) +sys.path.insert(0, os.path.join(srctree, "tools/lib/python")) -from parse_data_structs import ParseDataStructs +from kdoc.parse_data_structs import ParseDataStructs __version__ = "1.0" logger = logging.getLogger(__name__) diff --git a/Documentation/sphinx/kerneldoc.py b/Documentation/sphinx/kerneldoc.py index 56f382a6bdf1..d8cdf068ef35 100644 --- a/Documentation/sphinx/kerneldoc.py +++ b/Documentation/sphinx/kerneldoc.py @@ -42,10 +42,10 @@ from sphinx.util import logging from pprint import pformat srctree = os.path.abspath(os.environ["srctree"]) -sys.path.insert(0, os.path.join(srctree, "tools/lib/python/kdoc")) +sys.path.insert(0, os.path.join(srctree, "tools/lib/python")) -from kdoc_files import KernelFiles -from kdoc_output import RestFormat +from kdoc.kdoc_files import KernelFiles +from kdoc.kdoc_output import RestFormat __version__ = '1.0' kfiles = None diff --git a/scripts/kernel-doc.py b/scripts/kernel-doc.py index bb24bbf73167..7a1eaf986bcd 100755 --- a/scripts/kernel-doc.py +++ b/scripts/kernel-doc.py @@ -111,7 +111,7 @@ import sys # Import Python modules -LIB_DIR = "../tools/lib/python/kdoc" +LIB_DIR = "../tools/lib/python" SRC_DIR = os.path.dirname(os.path.realpath(__file__)) sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR)) @@ -292,8 +292,8 @@ def main(): logger.warning("Python 3.7 or later is required for correct results") # Import kernel-doc libraries only after checking Python version - from kdoc_files import KernelFiles # pylint: disable=C0415 - from kdoc_output import RestFormat, ManFormat # pylint: disable=C0415 + from kdoc.kdoc_files import KernelFiles # pylint: disable=C0415 + from kdoc.kdoc_output import RestFormat, ManFormat # pylint: disable=C0415 if args.man: out_style = ManFormat(modulename=args.modulename) diff --git a/tools/docs/check-variable-fonts.py b/tools/docs/check-variable-fonts.py index c48bb05dad82..958d5a745724 100755 --- a/tools/docs/check-variable-fonts.py +++ b/tools/docs/check-variable-fonts.py @@ -17,9 +17,9 @@ import sys import os.path src_dir = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(src_dir, '../lib/python/kdoc')) +sys.path.insert(0, os.path.join(src_dir, '../lib/python')) -from latex_fonts import LatexFontChecker +from kdoc.latex_fonts import LatexFontChecker checker = LatexFontChecker() diff --git a/tools/docs/get_abi.py b/tools/docs/get_abi.py index e0abfe12fac7..2f0b99401f26 100755 --- a/tools/docs/get_abi.py +++ b/tools/docs/get_abi.py @@ -14,15 +14,15 @@ import sys # Import Python modules -LIB_DIR = "../lib/python/abi" +LIB_DIR = "../lib/python" SRC_DIR = os.path.dirname(os.path.realpath(__file__)) sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR)) -from abi_parser import AbiParser # pylint: disable=C0413 -from abi_regex import AbiRegex # pylint: disable=C0413 -from helpers import ABI_DIR, DEBUG_HELP # pylint: disable=C0413 -from system_symbols import SystemSymbols # pylint: disable=C0413 +from abi.abi_parser import AbiParser # pylint: disable=C0413 +from abi.abi_regex import AbiRegex # pylint: disable=C0413 +from abi.helpers import ABI_DIR, DEBUG_HELP # pylint: disable=C0413 +from abi.system_symbols import SystemSymbols # pylint: disable=C0413 # Command line classes diff --git a/tools/docs/parse-headers.py b/tools/docs/parse-headers.py index ed9cf2bf22de..436acea4c6ca 100755 --- a/tools/docs/parse-headers.py +++ b/tools/docs/parse-headers.py @@ -28,9 +28,9 @@ import argparse, sys import os.path src_dir = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(src_dir, '../lib/python/kdoc')) -from parse_data_structs import ParseDataStructs -from enrich_formatter import EnrichFormatter +sys.path.insert(0, os.path.join(src_dir, '../lib/python')) +from kdoc.parse_data_structs import ParseDataStructs +from kdoc.enrich_formatter import EnrichFormatter def main(): """Main function""" diff --git a/tools/docs/sphinx-build-wrapper b/tools/docs/sphinx-build-wrapper index ce0b1b5292da..d4943d952e2a 100755 --- a/tools/docs/sphinx-build-wrapper +++ b/tools/docs/sphinx-build-wrapper @@ -61,10 +61,9 @@ LIB_DIR = "../lib/python" SRC_DIR = os.path.dirname(os.path.realpath(__file__)) sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR)) -sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR + '/kdoc')) # temporary -from python_version import PythonVersion -from latex_fonts import LatexFontChecker +from kdoc.python_version import PythonVersion +from kdoc.latex_fonts import LatexFontChecker from jobserver import JobserverExec # pylint: disable=C0413,C0411,E0401 # diff --git a/tools/docs/sphinx-pre-install b/tools/docs/sphinx-pre-install index d8c9fb76948d..965c9b093a41 100755 --- a/tools/docs/sphinx-pre-install +++ b/tools/docs/sphinx-pre-install @@ -35,8 +35,8 @@ from glob import glob import os.path src_dir = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(src_dir, '../lib/python/kdoc')) -from python_version import PythonVersion +sys.path.insert(0, os.path.join(src_dir, '../lib/python')) +from kdoc.python_version import PythonVersion RECOMMENDED_VERSION = PythonVersion("3.4.3").version MIN_PYTHON_VERSION = PythonVersion("3.7").version diff --git a/tools/lib/python/__init__.py b/tools/lib/python/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tools/lib/python/abi/__init__.py b/tools/lib/python/abi/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tools/lib/python/abi/abi_parser.py b/tools/lib/python/abi/abi_parser.py index 66a738013ce1..9b8db70067ef 100644 --- a/tools/lib/python/abi/abi_parser.py +++ b/tools/lib/python/abi/abi_parser.py @@ -17,7 +17,7 @@ from random import randrange, seed # Import Python modules -from helpers import AbiDebug, ABI_DIR +from abi.helpers import AbiDebug, ABI_DIR class AbiParser: diff --git a/tools/lib/python/abi/abi_regex.py b/tools/lib/python/abi/abi_regex.py index 8a57846cbc69..d5553206de3c 100644 --- a/tools/lib/python/abi/abi_regex.py +++ b/tools/lib/python/abi/abi_regex.py @@ -12,8 +12,8 @@ import sys from pprint import pformat -from abi_parser import AbiParser -from helpers import AbiDebug +from abi.abi_parser import AbiParser +from abi.helpers import AbiDebug class AbiRegex(AbiParser): """Extends AbiParser to search ABI nodes with regular expressions""" diff --git a/tools/lib/python/abi/system_symbols.py b/tools/lib/python/abi/system_symbols.py index f15c94a6e33c..4a2554da217b 100644 --- a/tools/lib/python/abi/system_symbols.py +++ b/tools/lib/python/abi/system_symbols.py @@ -15,7 +15,7 @@ from concurrent import futures from datetime import datetime from random import shuffle -from helpers import AbiDebug +from abi.helpers import AbiDebug class SystemSymbols: """Stores arguments for the class and initialize class vars""" diff --git a/tools/lib/python/kdoc/__init__.py b/tools/lib/python/kdoc/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tools/lib/python/kdoc/kdoc_files.py b/tools/lib/python/kdoc/kdoc_files.py index 1fd8d17edb32..562cdf5261c3 100644 --- a/tools/lib/python/kdoc/kdoc_files.py +++ b/tools/lib/python/kdoc/kdoc_files.py @@ -13,8 +13,8 @@ import logging import os import re -from kdoc_parser import KernelDoc -from kdoc_output import OutputFormat +from kdoc.kdoc_parser import KernelDoc +from kdoc.kdoc_output import OutputFormat class GlobSourceFiles: diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py index 58f115059e93..14378953301b 100644 --- a/tools/lib/python/kdoc/kdoc_output.py +++ b/tools/lib/python/kdoc/kdoc_output.py @@ -19,8 +19,8 @@ import os import re from datetime import datetime -from kdoc_parser import KernelDoc, type_param -from kdoc_re import KernRe +from kdoc.kdoc_parser import KernelDoc, type_param +from kdoc.kdoc_re import KernRe function_pointer = KernRe(r"([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)", cache=False) diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index f7dbb0868367..c0cc714d4d6f 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -16,8 +16,8 @@ import sys import re from pprint import pformat -from kdoc_re import NestedMatch, KernRe -from kdoc_item import KdocItem +from kdoc.kdoc_re import NestedMatch, KernRe +from kdoc.kdoc_item import KdocItem # # Regular expressions used to parse kernel-doc markups at KernelDoc class. -- cgit v1.2.3