#!/usr/bin/env python3 # # This file is part of the MicroPython project, http://micropython.org/ # # The MIT License (MIT) # # Copyright (c) 2024 Volodymyr Shymanskyy # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. import os import re import hashlib import functools import pickle from elftools.elf import elffile from collections import defaultdict try: from ar import Archive except: Archive = None class PickleCache: def __init__(self, path, prefix=""): self.path = path self._get_fn = lambda key: os.path.join(path, prefix + key[:24]) def store(self, key, data): os.makedirs(self.path, exist_ok=True) # See also https://bford.info/cachedir/ cachedir_tag_path = os.path.join(self.path, "CACHEDIR.TAG") if not os.path.exists(cachedir_tag_path): with open(cachedir_tag_path, "w") as f: f.write( "Signature: 8a477f597d28d172789f06886806bc55\n" "# This file is a cache directory tag created by MicroPython.\n" "# For information about cache directory tags see https://bford.info/cachedir/\n" ) with open(self._get_fn(key), "wb") as f: pickle.dump(data, f) def load(self, key): with open(self._get_fn(key), "rb") as f: return pickle.load(f) def cached(key, cache): def decorator(func): @functools.wraps(func) def wrapper(*args, **kwargs): cache_key = key(*args, **kwargs) try: d = cache.load(cache_key) if d["key"] != cache_key: raise Exception("Cache key mismatch") return d["data"] except Exception: res = func(*args, **kwargs) try: cache.store( cache_key, { "key": cache_key, "data": res, }, ) except Exception: pass return res return wrapper return decorator class CachedArFile: def __init__(self, fn): if not Archive: raise RuntimeError("Please run 'pip install ar' to link .a files") self.fn = fn self._archive = Archive(open(fn, "rb")) info = self.load_symbols() self.objs = info["objs"] self.symbols = info["symbols"] def open(self, obj): return self._archive.open(obj, "rb") def _cache_key(self): sha = hashlib.sha256() with open(self.fn, "rb") as f: for chunk in iter(lambda: f.read(4096), b""): sha.update(chunk) # Change this salt if the cache data format changes sha.update(bytes.fromhex("00000000000000000000000000000001")) return sha.hexdigest() @cached(key=_cache_key, cache=PickleCache(path=".mpy_ld_cache", prefix="ar_")) def load_symbols(self): print("Loading", self.fn) objs = defaultdict(lambda: {"def": set(), "undef": set(), "weak": set()}) symbols = {} for entry in self._archive: obj_name = entry.name elf = elffile.ELFFile(self.open(obj_name)) symtab = elf.get_section_by_name(".symtab") if not symtab: continue obj = objs[obj_name] for symbol in symtab.iter_symbols(): sym_name = symbol.name sym_bind = symbol["st_info"]["bind"] if sym_bind in ("STB_GLOBAL", "STB_WEAK"): if symbol.entry["st_shndx"] != "SHN_UNDEF": obj["def"].add(sym_name) symbols[sym_name] = obj_name else: obj["undef"].add(sym_name) if sym_bind == "STB_WEAK": obj["weak"].add(sym_name) return {"objs": dict(objs), "symbols": symbols} def resolve(archives, symbols): resolved_objs = [] # Object files needed to resolve symbols unresolved_symbols = set() provided_symbols = {} # Which symbol is provided by which object symbol_stack = list(symbols) # A helper function to handle symbol resolution from a particular object def add_obj(archive, symbol): obj_name = archive.symbols[symbol] obj_info = archive.objs[obj_name] obj_tuple = (archive, obj_name) if obj_tuple in resolved_objs: return # Already processed this object resolved_objs.append(obj_tuple) # Add the symbols this object defines for defined_symbol in obj_info["def"]: if defined_symbol in provided_symbols and not defined_symbol.startswith( "__x86.get_pc_thunk." ): if defined_symbol in obj_info["weak"]: continue else: raise RuntimeError(f"Multiple definitions for {defined_symbol}") provided_symbols[defined_symbol] = obj_name # TODO: mark weak if needed # Recursively add undefined symbols from this object for undef_symbol in obj_info["undef"]: if undef_symbol in obj_info["weak"]: print(f"Skippping weak dependency: {undef_symbol}") continue if undef_symbol not in provided_symbols: symbol_stack.append(undef_symbol) # Add undefined symbol to resolve while symbol_stack: symbol = symbol_stack.pop(0) if symbol in provided_symbols: continue # Symbol is already resolved found = False for archive in archives: if symbol in archive.symbols: add_obj(archive, symbol) found = True break if not found: unresolved_symbols.add(symbol) return resolved_objs, list(unresolved_symbols) def expand_ld_script(fn): # This function parses a subset of ld scripts # Typically these are just groups of static lib references group_pattern = re.compile(r"GROUP\s*\(\s*([^\)]+)\s*\)", re.MULTILINE) output_format_pattern = re.compile(r"OUTPUT_FORMAT\s*\(\s*([^\)]+)\s*\)", re.MULTILINE) comment_pattern = re.compile(r"/\*.*?\*/", re.MULTILINE | re.DOTALL) with open(fn, "r") as f: content = f.read() content = comment_pattern.sub("", content).strip() # Ensure no unrecognized instructions leftovers = content for pattern in (group_pattern, output_format_pattern): leftovers = pattern.sub("", leftovers) if leftovers.strip(): raise ValueError("Invalid instruction found in the ld script:" + leftovers) # Extract files from GROUP instructions files = [] for match in group_pattern.findall(content): files.extend([file.strip() for file in re.split(r"[,\s]+", match) if file.strip()]) return files def load_archive(fn): ar_header = b"!\012" with open(fn, "rb") as f: is_ar_file = f.read(len(ar_header)) == ar_header if is_ar_file: return [CachedArFile(fn)] else: return [CachedArFile(item) for item in expand_ld_script(fn)]