diff options
| author | Alessandro Gatti <a.gatti@frob.it> | 2025-04-07 22:25:00 +0200 |
|---|---|---|
| committer | Damien George <damien@micropython.org> | 2025-06-04 22:35:39 +1000 |
| commit | bf2005de9e3d9e75a26a6bb98e82be8511266604 (patch) | |
| tree | 629317f80574ea1cb3893aa180a7dcd40443bfae /tools | |
| parent | 9174cffc47a5c317dab174fee4e76678a0554068 (diff) | |
tools/mpy_ld.py: Resolve fixed-address symbols if requested.
This commit lets mpy_ld.py resolve symbols not only from the object
files involved in the linking process, or from compiler-supplied static
libraries, but also from a list of symbols referenced by an absolute
address (usually provided by the system's ROM).
This is needed for ESP8266 targets as some C stdlib functions are
provided by the MCU's own ROM code to reduce the final code footprint,
and therefore those functions' implementation was removed from the
compiler's support libraries. This means that unless `LINK_RUNTIME` is
set (which lets tooling look at more libraries to resolve symbols) the
build process will fail as tooling is unaware of the ROM symbols'
existence. With this change, fixed-address symbols can be exposed to
the symbol resolution step when performing natmod linking.
If there are symbols coming in from a fixed-address symbols list and
internal code or external libraries, the fixed-address symbol address
will take precedence in all cases.
Although this is - in theory - also working for the whole range of ESP32
MCUs, testing is currently limited to Xtensa processors and the example
natmods' makefiles only make use of this commit's changes for the
ESP8266 target.
Natmod builds can set the MPY_EXTERN_SYM_FILE variable pointing to a
linkerscript file containing a series of symbols (weak or strong) at a
fixed address; these symbols will then be used by the MicroPython
linker when packaging the natmod. If a different natmod build method is
used (eg. custom CMake scripts), `tools/mpy_ld.py` can now accept a
command line parameter called `--externs` (or its short variant `-e`)
that contains the path of a linkerscript file with the fixed-address
symbols to use when performing the linking process.
The linkerscript file parser can handle a very limited subset of
binutils's linkerscript syntax, namely just block comments, strong
symbols, and weak symbols. Each symbol must be in its own line for the
parser to succeed, empty lines or comment blocks are skipped. For an
example of what this parser was meant to handle, you can look at
`ports/esp8266/boards/eagle.rom.addr.v6.ld` and follow its format.
The natmod developer documentation is also updated to reflect the new
command line argument accepted by `mpy_ld.py` and the use cases for the
changes introduced by this commit.
Signed-off-by: Alessandro Gatti <a.gatti@frob.it>
Diffstat (limited to 'tools')
| -rwxr-xr-x | tools/mpy_ld.py | 82 |
1 files changed, 78 insertions, 4 deletions
diff --git a/tools/mpy_ld.py b/tools/mpy_ld.py index a47653f90..6518037f2 100755 --- a/tools/mpy_ld.py +++ b/tools/mpy_ld.py @@ -402,6 +402,7 @@ class LinkEnv: self.known_syms = {} # dict of symbols that are defined self.unresolved_syms = [] # list of unresolved symbols self.mpy_relocs = [] # list of relocations needed in the output .mpy file + self.externs = {} # dict of externally-defined symbols def check_arch(self, arch_name): if arch_name != self.arch.name: @@ -491,10 +492,14 @@ def populate_got(env): sym = got_entry.sym if hasattr(sym, "resolved"): sym = sym.resolved - sec = sym.section - addr = sym["st_value"] - got_entry.sec_name = sec.name - got_entry.link_addr += sec.addr + addr + if sym.name in env.externs: + got_entry.sec_name = ".external.fixed_addr" + got_entry.link_addr = env.externs[sym.name] + else: + sec = sym.section + addr = sym["st_value"] + got_entry.sec_name = sec.name + got_entry.link_addr += sec.addr + addr # Get sorted GOT, sorted by external, text, rodata, bss so relocations can be combined got_list = sorted( @@ -520,6 +525,9 @@ def populate_got(env): dest = int(got_entry.name.split("+")[1], 16) // env.arch.word_size elif got_entry.sec_name == ".external.mp_fun_table": dest = got_entry.sym.mp_fun_table_offset + elif got_entry.sec_name == ".external.fixed_addr": + # Fixed-address symbols should not be relocated. + continue elif got_entry.sec_name.startswith(".text"): dest = ".text" elif got_entry.sec_name.startswith(".rodata"): @@ -1207,6 +1215,9 @@ def link_objects(env, native_qstr_vals_len): sym.section = env.obj_table_section elif sym.name in env.known_syms: sym.resolved = env.known_syms[sym.name] + elif sym.name in env.externs: + # Fixed-address symbols do not need pre-processing. + continue else: if sym.name in fun_table: sym.section = mp_fun_table_sec @@ -1214,6 +1225,15 @@ def link_objects(env, native_qstr_vals_len): else: undef_errors.append("{}: undefined symbol: {}".format(sym.filename, sym.name)) + for sym in env.externs: + if sym in env.known_syms: + log( + LOG_LEVEL_1, + "Symbol {} is a fixed-address symbol at {:08x} and is also provided from an object file".format( + sym, env.externs[sym] + ), + ) + if undef_errors: raise LinkError("\n".join(undef_errors)) @@ -1456,6 +1476,9 @@ def do_link(args): log(LOG_LEVEL_2, "qstr vals: " + ", ".join(native_qstr_vals)) env = LinkEnv(args.arch) try: + if args.externs: + env.externs = parse_linkerscript(args.externs) + # Load object files for fn in args.files: with open(fn, "rb") as f: @@ -1484,6 +1507,50 @@ def do_link(args): sys.exit(1) +def parse_linkerscript(source): + # This extracts fixed-address symbol lists from linkerscripts, only parsing + # a small subset of all possible directives. Right now the only + # linkerscript file this is really tested against is the ESP8266's builtin + # ROM functions list ($SDK/ld/eagle.rom.addr.v6.ld). + # + # The parser should be able to handle symbol entries inside ESP-IDF's ROM + # symbol lists for the ESP32 range of MCUs as well (see *.ld files in + # $SDK/components/esp_rom/<name>/). + + symbols = {} + + LINE_REGEX = re.compile( + r'^(?P<weak>PROVIDE\()?' # optional weak marker start + r'(?P<symbol>[a-zA-Z_]\w*)' # symbol name + r'=0x(?P<address>[\da-fA-F]{1,8})*' # symbol address + r'(?(weak)\));$', # optional weak marker end and line terminator + re.ASCII, + ) + + inside_comment = False + for line in (line.strip() for line in source.readlines()): + if line.startswith('/*') and not inside_comment: + if not line.endswith('*/'): + inside_comment = True + continue + if inside_comment: + if line.endswith('*/'): + inside_comment = False + continue + if line.startswith('//'): + continue + match = LINE_REGEX.match(''.join(line.split())) + if not match: + continue + tokens = match.groupdict() + symbol = tokens['symbol'] + address = int(tokens['address'], 16) + if symbol in symbols: + raise ValueError(f"Symbol {symbol} already defined") + symbols[symbol] = address + return symbols + + def main(): import argparse @@ -1500,6 +1567,13 @@ def main(): cmd_parser.add_argument( "--output", "-o", default=None, help="output .mpy file (default to input with .o->.mpy)" ) + cmd_parser.add_argument( + "--externs", + "-e", + type=argparse.FileType("rt"), + default=None, + help="linkerscript providing fixed-address symbols to augment symbol resolution", + ) cmd_parser.add_argument("files", nargs="+", help="input files") args = cmd_parser.parse_args() |
