summaryrefslogtreecommitdiff
path: root/tools/ar_util.py
diff options
context:
space:
mode:
Diffstat (limited to 'tools/ar_util.py')
-rw-r--r--tools/ar_util.py236
1 files changed, 236 insertions, 0 deletions
diff --git a/tools/ar_util.py b/tools/ar_util.py
new file mode 100644
index 000000000..b90d37903
--- /dev/null
+++ b/tools/ar_util.py
@@ -0,0 +1,236 @@
+#!/usr/bin/env python3
+#
+# This file is part of the MicroPython project, http://micropython.org/
+#
+# The MIT License (MIT)
+#
+# Copyright (c) 2024 Volodymyr Shymanskyy
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+import os
+import re
+import hashlib
+import functools
+import pickle
+
+from elftools.elf import elffile
+from collections import defaultdict
+
+try:
+ from ar import Archive
+except:
+ Archive = None
+
+
+class PickleCache:
+ def __init__(self, path, prefix=""):
+ self.path = path
+ self._get_fn = lambda key: os.path.join(path, prefix + key[:24])
+
+ def store(self, key, data):
+ os.makedirs(self.path, exist_ok=True)
+ # See also https://bford.info/cachedir/
+ cachedir_tag_path = os.path.join(self.path, "CACHEDIR.TAG")
+ if not os.path.exists(cachedir_tag_path):
+ with open(cachedir_tag_path, "w") as f:
+ f.write(
+ "Signature: 8a477f597d28d172789f06886806bc55\n"
+ "# This file is a cache directory tag created by MicroPython.\n"
+ "# For information about cache directory tags see https://bford.info/cachedir/\n"
+ )
+ with open(self._get_fn(key), "wb") as f:
+ pickle.dump(data, f)
+
+ def load(self, key):
+ with open(self._get_fn(key), "rb") as f:
+ return pickle.load(f)
+
+
+def cached(key, cache):
+ def decorator(func):
+ @functools.wraps(func)
+ def wrapper(*args, **kwargs):
+ cache_key = key(*args, **kwargs)
+ try:
+ d = cache.load(cache_key)
+ if d["key"] != cache_key:
+ raise Exception("Cache key mismatch")
+ return d["data"]
+ except Exception:
+ res = func(*args, **kwargs)
+ try:
+ cache.store(
+ cache_key,
+ {
+ "key": cache_key,
+ "data": res,
+ },
+ )
+ except Exception:
+ pass
+ return res
+
+ return wrapper
+
+ return decorator
+
+
+class CachedArFile:
+ def __init__(self, fn):
+ if not Archive:
+ raise RuntimeError("Please run 'pip install ar' to link .a files")
+ self.fn = fn
+ self._archive = Archive(open(fn, "rb"))
+ info = self.load_symbols()
+ self.objs = info["objs"]
+ self.symbols = info["symbols"]
+
+ def open(self, obj):
+ return self._archive.open(obj, "rb")
+
+ def _cache_key(self):
+ sha = hashlib.sha256()
+ with open(self.fn, "rb") as f:
+ for chunk in iter(lambda: f.read(4096), b""):
+ sha.update(chunk)
+ # Change this salt if the cache data format changes
+ sha.update(bytes.fromhex("00000000000000000000000000000001"))
+ return sha.hexdigest()
+
+ @cached(key=_cache_key, cache=PickleCache(path=".mpy_ld_cache", prefix="ar_"))
+ def load_symbols(self):
+ print("Loading", self.fn)
+ objs = defaultdict(lambda: {"def": set(), "undef": set(), "weak": set()})
+ symbols = {}
+ for entry in self._archive:
+ obj_name = entry.name
+ elf = elffile.ELFFile(self.open(obj_name))
+ symtab = elf.get_section_by_name(".symtab")
+ if not symtab:
+ continue
+
+ obj = objs[obj_name]
+
+ for symbol in symtab.iter_symbols():
+ sym_name = symbol.name
+ sym_bind = symbol["st_info"]["bind"]
+
+ if sym_bind in ("STB_GLOBAL", "STB_WEAK"):
+ if symbol.entry["st_shndx"] != "SHN_UNDEF":
+ obj["def"].add(sym_name)
+ symbols[sym_name] = obj_name
+ else:
+ obj["undef"].add(sym_name)
+
+ if sym_bind == "STB_WEAK":
+ obj["weak"].add(sym_name)
+
+ return {"objs": dict(objs), "symbols": symbols}
+
+
+def resolve(archives, symbols):
+ resolved_objs = [] # Object files needed to resolve symbols
+ unresolved_symbols = set()
+ provided_symbols = {} # Which symbol is provided by which object
+ symbol_stack = list(symbols)
+
+ # A helper function to handle symbol resolution from a particular object
+ def add_obj(archive, symbol):
+ obj_name = archive.symbols[symbol]
+ obj_info = archive.objs[obj_name]
+
+ obj_tuple = (archive, obj_name)
+ if obj_tuple in resolved_objs:
+ return # Already processed this object
+
+ resolved_objs.append(obj_tuple)
+
+ # Add the symbols this object defines
+ for defined_symbol in obj_info["def"]:
+ if defined_symbol in provided_symbols and not defined_symbol.startswith(
+ "__x86.get_pc_thunk."
+ ):
+ if defined_symbol in obj_info["weak"]:
+ continue
+ else:
+ raise RuntimeError(f"Multiple definitions for {defined_symbol}")
+ provided_symbols[defined_symbol] = obj_name # TODO: mark weak if needed
+
+ # Recursively add undefined symbols from this object
+ for undef_symbol in obj_info["undef"]:
+ if undef_symbol in obj_info["weak"]:
+ print(f"Skippping weak dependency: {undef_symbol}")
+ continue
+ if undef_symbol not in provided_symbols:
+ symbol_stack.append(undef_symbol) # Add undefined symbol to resolve
+
+ while symbol_stack:
+ symbol = symbol_stack.pop(0)
+
+ if symbol in provided_symbols:
+ continue # Symbol is already resolved
+
+ found = False
+ for archive in archives:
+ if symbol in archive.symbols:
+ add_obj(archive, symbol)
+ found = True
+ break
+
+ if not found:
+ unresolved_symbols.add(symbol)
+
+ return resolved_objs, list(unresolved_symbols)
+
+
+def expand_ld_script(fn):
+ # This function parses a subset of ld scripts
+ # Typically these are just groups of static lib references
+ group_pattern = re.compile(r"GROUP\s*\(\s*([^\)]+)\s*\)", re.MULTILINE)
+ output_format_pattern = re.compile(r"OUTPUT_FORMAT\s*\(\s*([^\)]+)\s*\)", re.MULTILINE)
+ comment_pattern = re.compile(r"/\*.*?\*/", re.MULTILINE | re.DOTALL)
+
+ with open(fn, "r") as f:
+ content = f.read()
+ content = comment_pattern.sub("", content).strip()
+
+ # Ensure no unrecognized instructions
+ leftovers = content
+ for pattern in (group_pattern, output_format_pattern):
+ leftovers = pattern.sub("", leftovers)
+ if leftovers.strip():
+ raise ValueError("Invalid instruction found in the ld script:" + leftovers)
+
+ # Extract files from GROUP instructions
+ files = []
+ for match in group_pattern.findall(content):
+ files.extend([file.strip() for file in re.split(r"[,\s]+", match) if file.strip()])
+
+ return files
+
+
+def load_archive(fn):
+ ar_header = b"!<arch>\012"
+ with open(fn, "rb") as f:
+ is_ar_file = f.read(len(ar_header)) == ar_header
+ if is_ar_file:
+ return [CachedArFile(fn)]
+ else:
+ return [CachedArFile(item) for item in expand_ld_script(fn)]