diff options
Diffstat (limited to 'tools/ar_util.py')
| -rw-r--r-- | tools/ar_util.py | 236 |
1 files changed, 236 insertions, 0 deletions
diff --git a/tools/ar_util.py b/tools/ar_util.py new file mode 100644 index 000000000..b90d37903 --- /dev/null +++ b/tools/ar_util.py @@ -0,0 +1,236 @@ +#!/usr/bin/env python3 +# +# This file is part of the MicroPython project, http://micropython.org/ +# +# The MIT License (MIT) +# +# Copyright (c) 2024 Volodymyr Shymanskyy +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +import os +import re +import hashlib +import functools +import pickle + +from elftools.elf import elffile +from collections import defaultdict + +try: + from ar import Archive +except: + Archive = None + + +class PickleCache: + def __init__(self, path, prefix=""): + self.path = path + self._get_fn = lambda key: os.path.join(path, prefix + key[:24]) + + def store(self, key, data): + os.makedirs(self.path, exist_ok=True) + # See also https://bford.info/cachedir/ + cachedir_tag_path = os.path.join(self.path, "CACHEDIR.TAG") + if not os.path.exists(cachedir_tag_path): + with open(cachedir_tag_path, "w") as f: + f.write( + "Signature: 8a477f597d28d172789f06886806bc55\n" + "# This file is a cache directory tag created by MicroPython.\n" + "# For information about cache directory tags see https://bford.info/cachedir/\n" + ) + with open(self._get_fn(key), "wb") as f: + pickle.dump(data, f) + + def load(self, key): + with open(self._get_fn(key), "rb") as f: + return pickle.load(f) + + +def cached(key, cache): + def decorator(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + cache_key = key(*args, **kwargs) + try: + d = cache.load(cache_key) + if d["key"] != cache_key: + raise Exception("Cache key mismatch") + return d["data"] + except Exception: + res = func(*args, **kwargs) + try: + cache.store( + cache_key, + { + "key": cache_key, + "data": res, + }, + ) + except Exception: + pass + return res + + return wrapper + + return decorator + + +class CachedArFile: + def __init__(self, fn): + if not Archive: + raise RuntimeError("Please run 'pip install ar' to link .a files") + self.fn = fn + self._archive = Archive(open(fn, "rb")) + info = self.load_symbols() + self.objs = info["objs"] + self.symbols = info["symbols"] + + def open(self, obj): + return self._archive.open(obj, "rb") + + def _cache_key(self): + sha = hashlib.sha256() + with open(self.fn, "rb") as f: + for chunk in iter(lambda: f.read(4096), b""): + sha.update(chunk) + # Change this salt if the cache data format changes + sha.update(bytes.fromhex("00000000000000000000000000000001")) + return sha.hexdigest() + + @cached(key=_cache_key, cache=PickleCache(path=".mpy_ld_cache", prefix="ar_")) + def load_symbols(self): + print("Loading", self.fn) + objs = defaultdict(lambda: {"def": set(), "undef": set(), "weak": set()}) + symbols = {} + for entry in self._archive: + obj_name = entry.name + elf = elffile.ELFFile(self.open(obj_name)) + symtab = elf.get_section_by_name(".symtab") + if not symtab: + continue + + obj = objs[obj_name] + + for symbol in symtab.iter_symbols(): + sym_name = symbol.name + sym_bind = symbol["st_info"]["bind"] + + if sym_bind in ("STB_GLOBAL", "STB_WEAK"): + if symbol.entry["st_shndx"] != "SHN_UNDEF": + obj["def"].add(sym_name) + symbols[sym_name] = obj_name + else: + obj["undef"].add(sym_name) + + if sym_bind == "STB_WEAK": + obj["weak"].add(sym_name) + + return {"objs": dict(objs), "symbols": symbols} + + +def resolve(archives, symbols): + resolved_objs = [] # Object files needed to resolve symbols + unresolved_symbols = set() + provided_symbols = {} # Which symbol is provided by which object + symbol_stack = list(symbols) + + # A helper function to handle symbol resolution from a particular object + def add_obj(archive, symbol): + obj_name = archive.symbols[symbol] + obj_info = archive.objs[obj_name] + + obj_tuple = (archive, obj_name) + if obj_tuple in resolved_objs: + return # Already processed this object + + resolved_objs.append(obj_tuple) + + # Add the symbols this object defines + for defined_symbol in obj_info["def"]: + if defined_symbol in provided_symbols and not defined_symbol.startswith( + "__x86.get_pc_thunk." + ): + if defined_symbol in obj_info["weak"]: + continue + else: + raise RuntimeError(f"Multiple definitions for {defined_symbol}") + provided_symbols[defined_symbol] = obj_name # TODO: mark weak if needed + + # Recursively add undefined symbols from this object + for undef_symbol in obj_info["undef"]: + if undef_symbol in obj_info["weak"]: + print(f"Skippping weak dependency: {undef_symbol}") + continue + if undef_symbol not in provided_symbols: + symbol_stack.append(undef_symbol) # Add undefined symbol to resolve + + while symbol_stack: + symbol = symbol_stack.pop(0) + + if symbol in provided_symbols: + continue # Symbol is already resolved + + found = False + for archive in archives: + if symbol in archive.symbols: + add_obj(archive, symbol) + found = True + break + + if not found: + unresolved_symbols.add(symbol) + + return resolved_objs, list(unresolved_symbols) + + +def expand_ld_script(fn): + # This function parses a subset of ld scripts + # Typically these are just groups of static lib references + group_pattern = re.compile(r"GROUP\s*\(\s*([^\)]+)\s*\)", re.MULTILINE) + output_format_pattern = re.compile(r"OUTPUT_FORMAT\s*\(\s*([^\)]+)\s*\)", re.MULTILINE) + comment_pattern = re.compile(r"/\*.*?\*/", re.MULTILINE | re.DOTALL) + + with open(fn, "r") as f: + content = f.read() + content = comment_pattern.sub("", content).strip() + + # Ensure no unrecognized instructions + leftovers = content + for pattern in (group_pattern, output_format_pattern): + leftovers = pattern.sub("", leftovers) + if leftovers.strip(): + raise ValueError("Invalid instruction found in the ld script:" + leftovers) + + # Extract files from GROUP instructions + files = [] + for match in group_pattern.findall(content): + files.extend([file.strip() for file in re.split(r"[,\s]+", match) if file.strip()]) + + return files + + +def load_archive(fn): + ar_header = b"!<arch>\012" + with open(fn, "rb") as f: + is_ar_file = f.read(len(ar_header)) == ar_header + if is_ar_file: + return [CachedArFile(fn)] + else: + return [CachedArFile(item) for item in expand_ld_script(fn)] |
