blob: 60fc4c7c9b024f3b1bb1b43f8cf84951885905e4 [file] [log] [blame] [edit]
# Copyright 2011 The Emscripten Authors. All rights reserved.
# Emscripten is available under two separate licenses, the MIT license and the
# University of Illinois/NCSA Open Source License. Both these licenses can be
# found in the LICENSE file.
"""Utilties for manipulating WebAssembly binaries from python.
"""
from collections import namedtuple
from enum import IntEnum
import logging
import os
import sys
from . import shared
from .settings import settings
sys.path.append(shared.path_from_root('third_party'))
import leb128
logger = logging.getLogger('shared')
# For the Emscripten-specific WASM metadata section, follows semver, changes
# whenever metadata section changes structure.
# NB: major version 0 implies no compatibility
# NB: when changing the metadata format, we should only append new fields, not
# reorder, modify, or remove existing ones.
EMSCRIPTEN_METADATA_MAJOR, EMSCRIPTEN_METADATA_MINOR = (0, 3)
# For the JS/WASM ABI, specifies the minimum ABI version required of
# the WASM runtime implementation by the generated WASM binary. It follows
# semver and changes whenever C types change size/signedness or
# syscalls change signature. By semver, the maximum ABI version is
# implied to be less than (EMSCRIPTEN_ABI_MAJOR + 1, 0). On an ABI
# change, increment EMSCRIPTEN_ABI_MINOR if EMSCRIPTEN_ABI_MAJOR == 0
# or the ABI change is backwards compatible, otherwise increment
# EMSCRIPTEN_ABI_MAJOR and set EMSCRIPTEN_ABI_MINOR = 0.
EMSCRIPTEN_ABI_MAJOR, EMSCRIPTEN_ABI_MINOR = (0, 29)
WASM_PAGE_SIZE = 65536
HEADER_SIZE = 8
LIMITS_HAS_MAX = 0x1
SEG_IS_PASSIVE = 0x1
def toLEB(num):
return leb128.u.encode(num)
def readULEB(iobuf):
return leb128.u.decode_reader(iobuf)[0]
def readSLEB(iobuf):
return leb128.i.decode_reader(iobuf)[0]
def add_emscripten_metadata(wasm_file):
mem_size = settings.INITIAL_MEMORY // WASM_PAGE_SIZE
global_base = settings.GLOBAL_BASE
logger.debug('creating wasm emscripten metadata section with mem size %d' % mem_size)
name = b'\x13emscripten_metadata' # section name, including prefixed size
contents = (
# metadata section version
toLEB(EMSCRIPTEN_METADATA_MAJOR) +
toLEB(EMSCRIPTEN_METADATA_MINOR) +
# NB: The structure of the following should only be changed
# if EMSCRIPTEN_METADATA_MAJOR is incremented
# Minimum ABI version
toLEB(EMSCRIPTEN_ABI_MAJOR) +
toLEB(EMSCRIPTEN_ABI_MINOR) +
# Wasm backend, always 1 now
toLEB(1) +
toLEB(mem_size) +
toLEB(0) +
toLEB(global_base) +
toLEB(0) +
# dynamictopPtr, always 0 now
toLEB(0) +
# tempDoublePtr, always 0 in wasm backend
toLEB(0) +
toLEB(int(settings.STANDALONE_WASM))
# NB: more data can be appended here as long as you increase
# the EMSCRIPTEN_METADATA_MINOR
)
orig = open(wasm_file, 'rb').read()
with open(wasm_file, 'wb') as f:
f.write(orig[0:8]) # copy magic number and version
# write the special section
f.write(b'\0') # user section is code 0
# need to find the size of this section
size = len(name) + len(contents)
f.write(toLEB(size))
f.write(name)
f.write(contents)
f.write(orig[8:])
class SecType(IntEnum):
CUSTOM = 0
TYPE = 1
IMPORT = 2
FUNCTION = 3
TABLE = 4
MEMORY = 5
EVENT = 13
GLOBAL = 6
EXPORT = 7
START = 8
ELEM = 9
DATACOUNT = 12
CODE = 10
DATA = 11
class ExternType(IntEnum):
FUNC = 0
TABLE = 1
MEMORY = 2
GLOBAL = 3
EVENT = 4
class ValueType(IntEnum):
I32 = -0x01,
I64 = -0x02,
F32 = -0x03,
F64 = -0x04,
class OpCode(IntEnum):
GLOBAL_GET = 0x23
I32_CONST = 0x41
I64_CONST = 0x42
END = 0x0b
Section = namedtuple('Section', ['type', 'size', 'offset'])
Limits = namedtuple('Limits', ['flags', 'initial', 'maximum'])
Import = namedtuple('Import', ['kind', 'module', 'field', 'info'])
Export = namedtuple('Export', ['name', 'kind', 'index'])
Dylink = namedtuple('Dylink', ['mem_size', 'mem_align', 'table_size', 'table_align', 'section_end', 'needed'])
Table = namedtuple('Table', ['type', 'limits'])
Global = namedtuple('Global', ['type', 'mutable', 'init'])
Segment = namedtuple('Segment', ['flags', 'init', 'data'])
class Module:
"""Extremely minimal wasm module reader. Currently only used
for parsing the dylink section."""
def __init__(self, filename):
self.size = os.path.getsize(filename)
self.buf = open(filename, 'rb')
magic = self.buf.read(4)
version = self.buf.read(4)
assert magic == b'\0asm'
assert version == b'\x01\0\0\0'
def __del__(self):
self.buf.close()
def readByte(self):
return self.buf.read(1)[0]
def readULEB(self):
return readULEB(self.buf)
def readSLEB(self):
return readSLEB(self.buf)
def readString(self):
size = self.readULEB()
return self.buf.read(size).decode('utf-8')
def readLimits(self):
flags = self.readByte()
initial = self.readULEB()
maximum = 0
if flags & LIMITS_HAS_MAX:
maximum = self.readULEB()
return Limits(flags, initial, maximum)
def readInitExpr(self):
opcode = OpCode(self.readByte())
value = self.readSLEB()
end = OpCode(self.readByte())
assert end == OpCode.END
return (opcode, value)
def seek(self, offset):
self.buf.seek(offset)
def sections(self):
"""Generator that lazily returns sections from the wasm file."""
offset = HEADER_SIZE
while offset < self.size:
self.seek(offset)
section_type = SecType(self.readByte())
section_size = self.readULEB()
section_offset = self.buf.tell()
yield Section(section_type, section_size, section_offset)
offset = section_offset + section_size
def tables(self):
sec = next((s for s in self.sections() if s.type == SecType.TABLE), None)
if not sec:
return []
self.seek(sec.offset)
num_tables = self.readULEB()
tables = []
for i in range(num_tables):
kind = self.readByte()
limits = self.readLimits()
tables.append(Table(kind, limits))
return tables
def exports(self):
sec = next((s for s in self.sections() if s.type == SecType.EXPORT), None)
if not sec:
return []
self.seek(sec.offset)
num_exports = self.readULEB()
exports = []
for i in range(num_exports):
name = self.readString()
kind = ExternType(self.readByte())
index = self.readULEB()
exports.append(Export(name, kind, index))
return exports
def imports(self):
sec = next((s for s in self.sections() if s.type == SecType.IMPORT), None)
if not sec:
return []
self.seek(sec.offset)
num_imports = self.readULEB()
imports = []
for i in range(num_imports):
mod = self.readString()
field = self.readString()
kind = ExternType(self.readByte())
if kind == ExternType.FUNC:
info = self.readULEB() # sig
elif kind == ExternType.GLOBAL:
info = (
self.readSLEB(), # global type
self.readByte() # mutable
)
elif kind == ExternType.MEMORY:
info = self.readLimits() # limits
elif kind == ExternType.TABLE:
info = (
self.readSLEB(), # table type
self.readLimits() # limits
)
else:
assert False
imports.append(Import(kind, mod, field, info))
return imports
def globals(self):
sec = next((s for s in self.sections() if s.type == SecType.GLOBAL), None)
if not sec:
return []
self.seek(sec.offset)
num_globals = self.readULEB()
globals_ = []
for i in range(num_globals):
t = ValueType(self.readSLEB())
mutable = self.readByte()
init = self.readInitExpr()
g = Global(t, mutable, init)
globals_.append(g)
return globals_
def data_segments(self):
sec = next((s for s in self.sections() if s.type == SecType.DATA), None)
if not sec:
return []
self.seek(sec.offset)
num_segments = self.readULEB()
segments = []
for i in range(num_segments):
flags = self.readULEB()
if not (flags & SEG_IS_PASSIVE):
init = self.readInitExpr()
data_size = self.readULEB()
data = self.buf.read(data_size)
segments.append(Segment(flags, init, data))
return segments
def parse_dylink_section(wasm_file):
module = Module(wasm_file)
dylink_section = next(module.sections())
assert dylink_section.type == SecType.CUSTOM
section_size = dylink_section.size
section_offset = dylink_section.offset
section_end = section_offset + section_size
module.seek(section_offset)
# section name
section_name = module.readString()
assert section_name == 'dylink'
mem_size = module.readULEB()
mem_align = module.readULEB()
table_size = module.readULEB()
table_align = module.readULEB()
needed = []
needed_count = module.readULEB()
while needed_count:
libname = module.readString()
needed.append(libname)
needed_count -= 1
return Dylink(mem_size, mem_align, table_size, table_align, section_end, needed)
def get_exports(wasm_file):
return Module(wasm_file).exports()
def get_imports(wasm_file):
return Module(wasm_file).imports()
def update_dylink_section(wasm_file, extra_dynlibs):
# A wasm shared library has a special "dylink" section, see tools-conventions repo.
# This function updates this section, adding extra dynamic library dependencies.
mem_size, mem_align, table_size, table_align, section_end, needed = parse_dylink_section(wasm_file)
section_name = b'\06dylink' # section name, including prefixed size
contents = (toLEB(mem_size) + toLEB(mem_align) +
toLEB(table_size) + toLEB(0))
# we extend "dylink" section with information about which shared libraries
# our shared library needs. This is similar to DT_NEEDED entries in ELF.
#
# In theory we could avoid doing this, since every import in wasm has
# "module" and "name" attributes, but currently emscripten almost always
# uses just "env" for "module". This way we have to embed information about
# required libraries for the dynamic linker somewhere, and "dylink" section
# seems to be the most relevant place.
#
# Binary format of the extension:
#
# needed_dynlibs_count varuint32 ; number of needed shared libraries
# needed_dynlibs_entries dynlib_entry* ; repeated dynamic library entries as described below
#
# dynlib_entry:
#
# dynlib_name_len varuint32 ; length of dynlib_name_str in bytes
# dynlib_name_str bytes ; name of a needed dynamic library: valid UTF-8 byte sequence
#
# a proposal has been filed to include the extension into "dylink" specification:
# https://github.com/WebAssembly/tool-conventions/pull/77
needed += extra_dynlibs
contents += toLEB(len(needed))
for dyn_needed in needed:
dyn_needed = dyn_needed.encode('utf-8')
contents += toLEB(len(dyn_needed))
contents += dyn_needed
orig = open(wasm_file, 'rb').read()
file_header = orig[:8]
file_remainder = orig[section_end:]
section_size = len(section_name) + len(contents)
with open(wasm_file, 'wb') as f:
# copy magic number and version
f.write(file_header)
# write the special section
f.write(b'\0') # user section is code 0
f.write(toLEB(section_size))
f.write(section_name)
f.write(contents)
# copy rest of binary
f.write(file_remainder)