1 files changed, 236 insertions, 0 deletions
diff --git a/tools/ar_util.py b/tools/ar_util.py
new file mode 100644
index 000000000..b90d37903
--- /dev/null
+++ b/tools/ar_util.py
@@ -0,0 +1,236 @@
+#!/usr/bin/env python3
+#
+# This file is part of the MicroPython project, http://micropython.org/
+#
+# The MIT License (MIT)
+#
+# Copyright (c) 2024 Volodymyr Shymanskyy
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+import os
+import re
+import hashlib
+import functools
+import pickle
+
+from elftools.elf import elffile
+from collections import defaultdict
+
+try:
+    from ar import Archive
+except:
+    Archive = None
+
+
+class PickleCache:
+    def __init__(self, path, prefix=""):
+        self.path = path
+        self._get_fn = lambda key: os.path.join(path, prefix + key[:24])
+
+    def store(self, key, data):
+        os.makedirs(self.path, exist_ok=True)
+        # See also https://bford.info/cachedir/
+        cachedir_tag_path = os.path.join(self.path, "CACHEDIR.TAG")
+        if not os.path.exists(cachedir_tag_path):
+            with open(cachedir_tag_path, "w") as f:
+                f.write(
+                    "Signature: 8a477f597d28d172789f06886806bc55\n"
+                    "# This file is a cache directory tag created by MicroPython.\n"
+                    "# For information about cache directory tags see https://bford.info/cachedir/\n"
+                )
+        with open(self._get_fn(key), "wb") as f:
+            pickle.dump(data, f)
+
+    def load(self, key):
+        with open(self._get_fn(key), "rb") as f:
+            return pickle.load(f)
+
+
+def cached(key, cache):
+    def decorator(func):
+        @functools.wraps(func)
+        def wrapper(*args, **kwargs):
+            cache_key = key(*args, **kwargs)
+            try:
+                d = cache.load(cache_key)
+                if d["key"] != cache_key:
+                    raise Exception("Cache key mismatch")
+                return d["data"]
+            except Exception:
+                res = func(*args, **kwargs)
+                try:
+                    cache.store(
+                        cache_key,
+                        {
+                            "key": cache_key,
+                            "data": res,
+                        },
+                    )
+                except Exception:
+                    pass
+                return res
+
+        return wrapper
+
+    return decorator
+
+
+class CachedArFile:
+    def __init__(self, fn):
+        if not Archive:
+            raise RuntimeError("Please run 'pip install ar' to link .a files")
+        self.fn = fn
+        self._archive = Archive(open(fn, "rb"))
+        info = self.load_symbols()
+        self.objs = info["objs"]
+        self.symbols = info["symbols"]
+
+    def open(self, obj):
+        return self._archive.open(obj, "rb")
+
+    def _cache_key(self):
+        sha = hashlib.sha256()
+        with open(self.fn, "rb") as f:
+            for chunk in iter(lambda: f.read(4096), b""):
+                sha.update(chunk)
+        # Change this salt if the cache data format changes
+        sha.update(bytes.fromhex("00000000000000000000000000000001"))
+        return sha.hexdigest()
+
+    @cached(key=_cache_key, cache=PickleCache(path=".mpy_ld_cache", prefix="ar_"))
+    def load_symbols(self):
+        print("Loading", self.fn)
+        objs = defaultdict(lambda: {"def": set(), "undef": set(), "weak": set()})
+        symbols = {}
+        for entry in self._archive:
+            obj_name = entry.name
+            elf = elffile.ELFFile(self.open(obj_name))
+            symtab = elf.get_section_by_name(".symtab")
+            if not symtab:
+                continue
+
+            obj = objs[obj_name]
+
+            for symbol in symtab.iter_symbols():
+                sym_name = symbol.name
+                sym_bind = symbol["st_info"]["bind"]
+
+                if sym_bind in ("STB_GLOBAL", "STB_WEAK"):
+                    if symbol.entry["st_shndx"] != "SHN_UNDEF":
+                        obj["def"].add(sym_name)
+                        symbols[sym_name] = obj_name
+                    else:
+                        obj["undef"].add(sym_name)
+
+                    if sym_bind == "STB_WEAK":
+                        obj["weak"].add(sym_name)
+
+        return {"objs": dict(objs), "symbols": symbols}
+
+
+def resolve(archives, symbols):
+    resolved_objs = []  # Object files needed to resolve symbols
+    unresolved_symbols = set()
+    provided_symbols = {}  # Which symbol is provided by which object
+    symbol_stack = list(symbols)
+
+    # A helper function to handle symbol resolution from a particular object
+    def add_obj(archive, symbol):
+        obj_name = archive.symbols[symbol]
+        obj_info = archive.objs[obj_name]
+
+        obj_tuple = (archive, obj_name)
+        if obj_tuple in resolved_objs:
+            return  # Already processed this object
+
+        resolved_objs.append(obj_tuple)
+
+        # Add the symbols this object defines
+        for defined_symbol in obj_info["def"]:
+            if defined_symbol in provided_symbols and not defined_symbol.startswith(
+                "__x86.get_pc_thunk."
+            ):
+                if defined_symbol in obj_info["weak"]:
+                    continue
+                else:
+                    raise RuntimeError(f"Multiple definitions for {defined_symbol}")
+            provided_symbols[defined_symbol] = obj_name  # TODO: mark weak if needed
+
+        # Recursively add undefined symbols from this object
+        for undef_symbol in obj_info["undef"]:
+            if undef_symbol in obj_info["weak"]:
+                print(f"Skippping weak dependency: {undef_symbol}")
+                continue
+            if undef_symbol not in provided_symbols:
+                symbol_stack.append(undef_symbol)  # Add undefined symbol to resolve
+
+    while symbol_stack:
+        symbol = symbol_stack.pop(0)
+
+        if symbol in provided_symbols:
+            continue  # Symbol is already resolved
+
+        found = False
+        for archive in archives:
+            if symbol in archive.symbols:
+                add_obj(archive, symbol)
+                found = True
+                break
+
+        if not found:
+            unresolved_symbols.add(symbol)
+
+    return resolved_objs, list(unresolved_symbols)
+
+
+def expand_ld_script(fn):
+    # This function parses a subset of ld scripts
+    # Typically these are just groups of static lib references
+    group_pattern = re.compile(r"GROUP\s*\(\s*([^\)]+)\s*\)", re.MULTILINE)
+    output_format_pattern = re.compile(r"OUTPUT_FORMAT\s*\(\s*([^\)]+)\s*\)", re.MULTILINE)
+    comment_pattern = re.compile(r"/\*.*?\*/", re.MULTILINE | re.DOTALL)
+
+    with open(fn, "r") as f:
+        content = f.read()
+        content = comment_pattern.sub("", content).strip()
+
+    # Ensure no unrecognized instructions
+    leftovers = content
+    for pattern in (group_pattern, output_format_pattern):
+        leftovers = pattern.sub("", leftovers)
+    if leftovers.strip():
+        raise ValueError("Invalid instruction found in the ld script:" + leftovers)
+
+    # Extract files from GROUP instructions
+    files = []
+    for match in group_pattern.findall(content):
+        files.extend([file.strip() for file in re.split(r"[,\s]+", match) if file.strip()])
+
+    return files
+
+
+def load_archive(fn):
+    ar_header = b"!<arch>\012"
+    with open(fn, "rb") as f:
+        is_ar_file = f.read(len(ar_header)) == ar_header
+    if is_ar_file:
+        return [CachedArFile(fn)]
+    else:
+        return [CachedArFile(item) for item in expand_ld_script(fn)]