summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorAlessandro Gatti <a.gatti@frob.it>2025-04-07 22:25:00 +0200
committerDamien George <damien@micropython.org>2025-06-04 22:35:39 +1000
commitbf2005de9e3d9e75a26a6bb98e82be8511266604 (patch)
tree629317f80574ea1cb3893aa180a7dcd40443bfae /tools
parent9174cffc47a5c317dab174fee4e76678a0554068 (diff)
tools/mpy_ld.py: Resolve fixed-address symbols if requested.
This commit lets mpy_ld.py resolve symbols not only from the object files involved in the linking process, or from compiler-supplied static libraries, but also from a list of symbols referenced by an absolute address (usually provided by the system's ROM). This is needed for ESP8266 targets as some C stdlib functions are provided by the MCU's own ROM code to reduce the final code footprint, and therefore those functions' implementation was removed from the compiler's support libraries. This means that unless `LINK_RUNTIME` is set (which lets tooling look at more libraries to resolve symbols) the build process will fail as tooling is unaware of the ROM symbols' existence. With this change, fixed-address symbols can be exposed to the symbol resolution step when performing natmod linking. If there are symbols coming in from a fixed-address symbols list and internal code or external libraries, the fixed-address symbol address will take precedence in all cases. Although this is - in theory - also working for the whole range of ESP32 MCUs, testing is currently limited to Xtensa processors and the example natmods' makefiles only make use of this commit's changes for the ESP8266 target. Natmod builds can set the MPY_EXTERN_SYM_FILE variable pointing to a linkerscript file containing a series of symbols (weak or strong) at a fixed address; these symbols will then be used by the MicroPython linker when packaging the natmod. If a different natmod build method is used (eg. custom CMake scripts), `tools/mpy_ld.py` can now accept a command line parameter called `--externs` (or its short variant `-e`) that contains the path of a linkerscript file with the fixed-address symbols to use when performing the linking process. The linkerscript file parser can handle a very limited subset of binutils's linkerscript syntax, namely just block comments, strong symbols, and weak symbols. Each symbol must be in its own line for the parser to succeed, empty lines or comment blocks are skipped. For an example of what this parser was meant to handle, you can look at `ports/esp8266/boards/eagle.rom.addr.v6.ld` and follow its format. The natmod developer documentation is also updated to reflect the new command line argument accepted by `mpy_ld.py` and the use cases for the changes introduced by this commit. Signed-off-by: Alessandro Gatti <a.gatti@frob.it>
Diffstat (limited to 'tools')
-rwxr-xr-xtools/mpy_ld.py82
1 files changed, 78 insertions, 4 deletions
diff --git a/tools/mpy_ld.py b/tools/mpy_ld.py
index a47653f90..6518037f2 100755
--- a/tools/mpy_ld.py
+++ b/tools/mpy_ld.py
@@ -402,6 +402,7 @@ class LinkEnv:
self.known_syms = {} # dict of symbols that are defined
self.unresolved_syms = [] # list of unresolved symbols
self.mpy_relocs = [] # list of relocations needed in the output .mpy file
+ self.externs = {} # dict of externally-defined symbols
def check_arch(self, arch_name):
if arch_name != self.arch.name:
@@ -491,10 +492,14 @@ def populate_got(env):
sym = got_entry.sym
if hasattr(sym, "resolved"):
sym = sym.resolved
- sec = sym.section
- addr = sym["st_value"]
- got_entry.sec_name = sec.name
- got_entry.link_addr += sec.addr + addr
+ if sym.name in env.externs:
+ got_entry.sec_name = ".external.fixed_addr"
+ got_entry.link_addr = env.externs[sym.name]
+ else:
+ sec = sym.section
+ addr = sym["st_value"]
+ got_entry.sec_name = sec.name
+ got_entry.link_addr += sec.addr + addr
# Get sorted GOT, sorted by external, text, rodata, bss so relocations can be combined
got_list = sorted(
@@ -520,6 +525,9 @@ def populate_got(env):
dest = int(got_entry.name.split("+")[1], 16) // env.arch.word_size
elif got_entry.sec_name == ".external.mp_fun_table":
dest = got_entry.sym.mp_fun_table_offset
+ elif got_entry.sec_name == ".external.fixed_addr":
+ # Fixed-address symbols should not be relocated.
+ continue
elif got_entry.sec_name.startswith(".text"):
dest = ".text"
elif got_entry.sec_name.startswith(".rodata"):
@@ -1207,6 +1215,9 @@ def link_objects(env, native_qstr_vals_len):
sym.section = env.obj_table_section
elif sym.name in env.known_syms:
sym.resolved = env.known_syms[sym.name]
+ elif sym.name in env.externs:
+ # Fixed-address symbols do not need pre-processing.
+ continue
else:
if sym.name in fun_table:
sym.section = mp_fun_table_sec
@@ -1214,6 +1225,15 @@ def link_objects(env, native_qstr_vals_len):
else:
undef_errors.append("{}: undefined symbol: {}".format(sym.filename, sym.name))
+ for sym in env.externs:
+ if sym in env.known_syms:
+ log(
+ LOG_LEVEL_1,
+ "Symbol {} is a fixed-address symbol at {:08x} and is also provided from an object file".format(
+ sym, env.externs[sym]
+ ),
+ )
+
if undef_errors:
raise LinkError("\n".join(undef_errors))
@@ -1456,6 +1476,9 @@ def do_link(args):
log(LOG_LEVEL_2, "qstr vals: " + ", ".join(native_qstr_vals))
env = LinkEnv(args.arch)
try:
+ if args.externs:
+ env.externs = parse_linkerscript(args.externs)
+
# Load object files
for fn in args.files:
with open(fn, "rb") as f:
@@ -1484,6 +1507,50 @@ def do_link(args):
sys.exit(1)
+def parse_linkerscript(source):
+ # This extracts fixed-address symbol lists from linkerscripts, only parsing
+ # a small subset of all possible directives. Right now the only
+ # linkerscript file this is really tested against is the ESP8266's builtin
+ # ROM functions list ($SDK/ld/eagle.rom.addr.v6.ld).
+ #
+ # The parser should be able to handle symbol entries inside ESP-IDF's ROM
+ # symbol lists for the ESP32 range of MCUs as well (see *.ld files in
+ # $SDK/components/esp_rom/<name>/).
+
+ symbols = {}
+
+ LINE_REGEX = re.compile(
+ r'^(?P<weak>PROVIDE\()?' # optional weak marker start
+ r'(?P<symbol>[a-zA-Z_]\w*)' # symbol name
+ r'=0x(?P<address>[\da-fA-F]{1,8})*' # symbol address
+ r'(?(weak)\));$', # optional weak marker end and line terminator
+ re.ASCII,
+ )
+
+ inside_comment = False
+ for line in (line.strip() for line in source.readlines()):
+ if line.startswith('/*') and not inside_comment:
+ if not line.endswith('*/'):
+ inside_comment = True
+ continue
+ if inside_comment:
+ if line.endswith('*/'):
+ inside_comment = False
+ continue
+ if line.startswith('//'):
+ continue
+ match = LINE_REGEX.match(''.join(line.split()))
+ if not match:
+ continue
+ tokens = match.groupdict()
+ symbol = tokens['symbol']
+ address = int(tokens['address'], 16)
+ if symbol in symbols:
+ raise ValueError(f"Symbol {symbol} already defined")
+ symbols[symbol] = address
+ return symbols
+
+
def main():
import argparse
@@ -1500,6 +1567,13 @@ def main():
cmd_parser.add_argument(
"--output", "-o", default=None, help="output .mpy file (default to input with .o->.mpy)"
)
+ cmd_parser.add_argument(
+ "--externs",
+ "-e",
+ type=argparse.FileType("rt"),
+ default=None,
+ help="linkerscript providing fixed-address symbols to augment symbol resolution",
+ )
cmd_parser.add_argument("files", nargs="+", help="input files")
args = cmd_parser.parse_args()