tools/webassembly.py - external/github.com/kripken/emscripten - Git at Google

 # Copyright 2011 The Emscripten Authors.  All rights reserved.
 # Emscripten is available under two separate licenses, the MIT license and the
 # University of Illinois/NCSA Open Source License.  Both these licenses can be
 # found in the LICENSE file.

 """Utilties for manipulating WebAssembly binaries from python.
 """

 from collections import namedtuple
 from enum import IntEnum
 import logging
 import os
 import sys

 from . import shared
 from .settings import settings

 sys.path.append(shared.path_from_root('third_party'))

 import leb128

 logger = logging.getLogger('shared')


 # For the Emscripten-specific WASM metadata section, follows semver, changes
 # whenever metadata section changes structure.
 # NB: major version 0 implies no compatibility
 # NB: when changing the metadata format, we should only append new fields, not
 #     reorder, modify, or remove existing ones.
 EMSCRIPTEN_METADATA_MAJOR, EMSCRIPTEN_METADATA_MINOR = (0, 3)
 # For the JS/WASM ABI, specifies the minimum ABI version required of
 # the WASM runtime implementation by the generated WASM binary. It follows
 # semver and changes whenever C types change size/signedness or
 # syscalls change signature. By semver, the maximum ABI version is
 # implied to be less than (EMSCRIPTEN_ABI_MAJOR + 1, 0). On an ABI
 # change, increment EMSCRIPTEN_ABI_MINOR if EMSCRIPTEN_ABI_MAJOR == 0
 # or the ABI change is backwards compatible, otherwise increment
 # EMSCRIPTEN_ABI_MAJOR and set EMSCRIPTEN_ABI_MINOR = 0.
 EMSCRIPTEN_ABI_MAJOR, EMSCRIPTEN_ABI_MINOR = (0, 29)

 WASM_PAGE_SIZE = 65536

 HEADER_SIZE = 8

 LIMITS_HAS_MAX = 0x1

 SEG_IS_PASSIVE = 0x1


 def toLEB(num):
   return leb128.u.encode(num)


 def readULEB(iobuf):
   return leb128.u.decode_reader(iobuf)[0]


 def readSLEB(iobuf):
   return leb128.i.decode_reader(iobuf)[0]


 def add_emscripten_metadata(wasm_file):
   mem_size = settings.INITIAL_MEMORY // WASM_PAGE_SIZE
   global_base = settings.GLOBAL_BASE

   logger.debug('creating wasm emscripten metadata section with mem size %d' % mem_size)
   name = b'\x13emscripten_metadata' # section name, including prefixed size
   contents = (
     # metadata section version
     toLEB(EMSCRIPTEN_METADATA_MAJOR) +
     toLEB(EMSCRIPTEN_METADATA_MINOR) +

     # NB: The structure of the following should only be changed
     #     if EMSCRIPTEN_METADATA_MAJOR is incremented
     # Minimum ABI version
     toLEB(EMSCRIPTEN_ABI_MAJOR) +
     toLEB(EMSCRIPTEN_ABI_MINOR) +

     # Wasm backend, always 1 now
     toLEB(1) +

     toLEB(mem_size) +
     toLEB(0) +
     toLEB(global_base) +
     toLEB(0) +
     # dynamictopPtr, always 0 now
     toLEB(0) +

     # tempDoublePtr, always 0 in wasm backend
     toLEB(0) +

     toLEB(int(settings.STANDALONE_WASM))

     # NB: more data can be appended here as long as you increase
     #     the EMSCRIPTEN_METADATA_MINOR
   )

   orig = open(wasm_file, 'rb').read()
   with open(wasm_file, 'wb') as f:
     f.write(orig[0:8]) # copy magic number and version
     # write the special section
     f.write(b'\0') # user section is code 0
     # need to find the size of this section
     size = len(name) + len(contents)
     f.write(toLEB(size))
     f.write(name)
     f.write(contents)
     f.write(orig[8:])


 class SecType(IntEnum):
   CUSTOM = 0
   TYPE = 1
   IMPORT = 2
   FUNCTION = 3
   TABLE = 4
   MEMORY = 5
   EVENT = 13
   GLOBAL = 6
   EXPORT = 7
   START = 8
   ELEM = 9
   DATACOUNT = 12
   CODE = 10
   DATA = 11


 class ExternType(IntEnum):
   FUNC = 0
   TABLE = 1
   MEMORY = 2
   GLOBAL = 3
   EVENT = 4


 class ValueType(IntEnum):
   I32 = -0x01,
   I64 = -0x02,
   F32 = -0x03,
   F64 = -0x04,


 class OpCode(IntEnum):
   GLOBAL_GET = 0x23
   I32_CONST = 0x41
   I64_CONST = 0x42
   END = 0x0b


 Section = namedtuple('Section', ['type', 'size', 'offset'])
 Limits = namedtuple('Limits', ['flags', 'initial', 'maximum'])
 Import = namedtuple('Import', ['kind', 'module', 'field', 'info'])
 Export = namedtuple('Export', ['name', 'kind', 'index'])
 Dylink = namedtuple('Dylink', ['mem_size', 'mem_align', 'table_size', 'table_align', 'section_end', 'needed'])
 Table = namedtuple('Table', ['type', 'limits'])
 Global = namedtuple('Global', ['type', 'mutable', 'init'])
 Segment = namedtuple('Segment', ['flags', 'init', 'data'])


 class Module:
   """Extremely minimal wasm module reader.  Currently only used
   for parsing the dylink section."""
   def __init__(self, filename):
     self.size = os.path.getsize(filename)
     self.buf = open(filename, 'rb')
     magic = self.buf.read(4)
     version = self.buf.read(4)
     assert magic == b'\0asm'
     assert version == b'\x01\0\0\0'

   def __del__(self):
     self.buf.close()

   def readByte(self):
     return self.buf.read(1)[0]

   def readULEB(self):
     return readULEB(self.buf)

   def readSLEB(self):
     return readSLEB(self.buf)

   def readString(self):
     size = self.readULEB()
     return self.buf.read(size).decode('utf-8')

   def readLimits(self):
     flags = self.readByte()
     initial = self.readULEB()
     maximum = 0
     if flags & LIMITS_HAS_MAX:
       maximum = self.readULEB()
     return Limits(flags, initial, maximum)

   def readInitExpr(self):
     opcode = OpCode(self.readByte())
     value = self.readSLEB()
     end = OpCode(self.readByte())
     assert end == OpCode.END
     return (opcode, value)

   def seek(self, offset):
     self.buf.seek(offset)

   def sections(self):
     """Generator that lazily returns sections from the wasm file."""
     offset = HEADER_SIZE
     while offset < self.size:
       self.seek(offset)
       section_type = SecType(self.readByte())
       section_size = self.readULEB()
       section_offset = self.buf.tell()
       yield Section(section_type, section_size, section_offset)
       offset = section_offset + section_size

   def tables(self):
     sec = next((s for s in self.sections() if s.type == SecType.TABLE), None)
     if not sec:
       return []

     self.seek(sec.offset)
     num_tables = self.readULEB()
     tables = []
     for i in range(num_tables):
       kind = self.readByte()
       limits = self.readLimits()
       tables.append(Table(kind, limits))

     return tables

   def exports(self):
     sec = next((s for s in self.sections() if s.type == SecType.EXPORT), None)
     if not sec:
       return []

     self.seek(sec.offset)
     num_exports = self.readULEB()
     exports = []
     for i in range(num_exports):
       name = self.readString()
       kind = ExternType(self.readByte())
       index = self.readULEB()
       exports.append(Export(name, kind, index))

     return exports

   def imports(self):
     sec = next((s for s in self.sections() if s.type == SecType.IMPORT), None)
     if not sec:
       return []

     self.seek(sec.offset)
     num_imports = self.readULEB()
     imports = []
     for i in range(num_imports):
       mod = self.readString()
       field = self.readString()
       kind = ExternType(self.readByte())
       if kind == ExternType.FUNC:
         info = self.readULEB()  # sig
       elif kind == ExternType.GLOBAL:
         info = (
           self.readSLEB(),  # global type
           self.readByte()   # mutable
         )
       elif kind == ExternType.MEMORY:
         info = self.readLimits()  # limits
       elif kind == ExternType.TABLE:
         info = (
           self.readSLEB(),   # table type
           self.readLimits()  # limits
         )
       else:
         assert False
       imports.append(Import(kind, mod, field, info))

     return imports

   def globals(self):
     sec = next((s for s in self.sections() if s.type == SecType.GLOBAL), None)
     if not sec:
       return []

     self.seek(sec.offset)
     num_globals = self.readULEB()
     globals_ = []
     for i in range(num_globals):
       t = ValueType(self.readSLEB())
       mutable = self.readByte()
       init = self.readInitExpr()
       g = Global(t, mutable, init)
       globals_.append(g)
     return globals_


   def data_segments(self):
     sec = next((s for s in self.sections() if s.type == SecType.DATA), None)
     if not sec:
       return []

     self.seek(sec.offset)
     num_segments = self.readULEB()
     segments = []
     for i in range(num_segments):
       flags = self.readULEB()
       if not (flags & SEG_IS_PASSIVE):
         init = self.readInitExpr()
       data_size = self.readULEB()
       data = self.buf.read(data_size)
       segments.append(Segment(flags, init, data))


     return segments


 def parse_dylink_section(wasm_file):
   module = Module(wasm_file)

   dylink_section = next(module.sections())
   assert dylink_section.type == SecType.CUSTOM
   section_size = dylink_section.size
   section_offset = dylink_section.offset
   section_end = section_offset + section_size
   module.seek(section_offset)
   # section name
   section_name = module.readString()
   assert section_name == 'dylink'
   mem_size = module.readULEB()
   mem_align = module.readULEB()
   table_size = module.readULEB()
   table_align = module.readULEB()

   needed = []
   needed_count = module.readULEB()
   while needed_count:
     libname = module.readString()
     needed.append(libname)
     needed_count -= 1

   return Dylink(mem_size, mem_align, table_size, table_align, section_end, needed)


 def get_exports(wasm_file):
   return Module(wasm_file).exports()


 def get_imports(wasm_file):
   return Module(wasm_file).imports()


 def update_dylink_section(wasm_file, extra_dynlibs):
   # A wasm shared library has a special "dylink" section, see tools-conventions repo.
   # This function updates this section, adding extra dynamic library dependencies.

   mem_size, mem_align, table_size, table_align, section_end, needed = parse_dylink_section(wasm_file)

   section_name = b'\06dylink' # section name, including prefixed size
   contents = (toLEB(mem_size) + toLEB(mem_align) +
               toLEB(table_size) + toLEB(0))

   # we extend "dylink" section with information about which shared libraries
   # our shared library needs. This is similar to DT_NEEDED entries in ELF.
   #
   # In theory we could avoid doing this, since every import in wasm has
   # "module" and "name" attributes, but currently emscripten almost always
   # uses just "env" for "module". This way we have to embed information about
   # required libraries for the dynamic linker somewhere, and "dylink" section
   # seems to be the most relevant place.
   #
   # Binary format of the extension:
   #
   #   needed_dynlibs_count        varuint32       ; number of needed shared libraries
   #   needed_dynlibs_entries      dynlib_entry*   ; repeated dynamic library entries as described below
   #
   # dynlib_entry:
   #
   #   dynlib_name_len             varuint32       ; length of dynlib_name_str in bytes
   #   dynlib_name_str             bytes           ; name of a needed dynamic library: valid UTF-8 byte sequence
   #
   # a proposal has been filed to include the extension into "dylink" specification:
   # https://github.com/WebAssembly/tool-conventions/pull/77
   needed += extra_dynlibs
   contents += toLEB(len(needed))
   for dyn_needed in needed:
     dyn_needed = dyn_needed.encode('utf-8')
     contents += toLEB(len(dyn_needed))
     contents += dyn_needed

   orig = open(wasm_file, 'rb').read()
   file_header = orig[:8]
   file_remainder = orig[section_end:]

   section_size = len(section_name) + len(contents)
   with open(wasm_file, 'wb') as f:
     # copy magic number and version
     f.write(file_header)
     # write the special section
     f.write(b'\0') # user section is code 0
     f.write(toLEB(section_size))
     f.write(section_name)
     f.write(contents)
     # copy rest of binary
     f.write(file_remainder)
	# Copyright 2011 The Emscripten Authors. All rights reserved.
	# Emscripten is available under two separate licenses, the MIT license and the
	# University of Illinois/NCSA Open Source License. Both these licenses can be
	# found in the LICENSE file.

	"""Utilties for manipulating WebAssembly binaries from python.
	"""

	from collections import namedtuple
	from enum import IntEnum
	import logging
	import os
	import sys

	from . import shared
	from .settings import settings

	sys.path.append(shared.path_from_root('third_party'))

	import leb128

	logger = logging.getLogger('shared')


	# For the Emscripten-specific WASM metadata section, follows semver, changes
	# whenever metadata section changes structure.
	# NB: major version 0 implies no compatibility
	# NB: when changing the metadata format, we should only append new fields, not
	# reorder, modify, or remove existing ones.
	EMSCRIPTEN_METADATA_MAJOR, EMSCRIPTEN_METADATA_MINOR = (0, 3)
	# For the JS/WASM ABI, specifies the minimum ABI version required of
	# the WASM runtime implementation by the generated WASM binary. It follows
	# semver and changes whenever C types change size/signedness or
	# syscalls change signature. By semver, the maximum ABI version is
	# implied to be less than (EMSCRIPTEN_ABI_MAJOR + 1, 0). On an ABI
	# change, increment EMSCRIPTEN_ABI_MINOR if EMSCRIPTEN_ABI_MAJOR == 0
	# or the ABI change is backwards compatible, otherwise increment
	# EMSCRIPTEN_ABI_MAJOR and set EMSCRIPTEN_ABI_MINOR = 0.
	EMSCRIPTEN_ABI_MAJOR, EMSCRIPTEN_ABI_MINOR = (0, 29)

	WASM_PAGE_SIZE = 65536

	HEADER_SIZE = 8

	LIMITS_HAS_MAX = 0x1

	SEG_IS_PASSIVE = 0x1


	def toLEB(num):
	return leb128.u.encode(num)


	def readULEB(iobuf):
	return leb128.u.decode_reader(iobuf)[0]


	def readSLEB(iobuf):
	return leb128.i.decode_reader(iobuf)[0]


	def add_emscripten_metadata(wasm_file):
	mem_size = settings.INITIAL_MEMORY // WASM_PAGE_SIZE
	global_base = settings.GLOBAL_BASE

	logger.debug('creating wasm emscripten metadata section with mem size %d' % mem_size)
	name = b'\x13emscripten_metadata' # section name, including prefixed size
	contents = (
	# metadata section version
	toLEB(EMSCRIPTEN_METADATA_MAJOR) +
	toLEB(EMSCRIPTEN_METADATA_MINOR) +

	# NB: The structure of the following should only be changed
	# if EMSCRIPTEN_METADATA_MAJOR is incremented
	# Minimum ABI version
	toLEB(EMSCRIPTEN_ABI_MAJOR) +
	toLEB(EMSCRIPTEN_ABI_MINOR) +

	# Wasm backend, always 1 now
	toLEB(1) +

	toLEB(mem_size) +
	toLEB(0) +
	toLEB(global_base) +
	toLEB(0) +
	# dynamictopPtr, always 0 now
	toLEB(0) +

	# tempDoublePtr, always 0 in wasm backend
	toLEB(0) +

	toLEB(int(settings.STANDALONE_WASM))

	# NB: more data can be appended here as long as you increase
	# the EMSCRIPTEN_METADATA_MINOR
	)

	orig = open(wasm_file, 'rb').read()
	with open(wasm_file, 'wb') as f:
	f.write(orig[0:8]) # copy magic number and version
	# write the special section
	f.write(b'\0') # user section is code 0
	# need to find the size of this section
	size = len(name) + len(contents)
	f.write(toLEB(size))
	f.write(name)
	f.write(contents)
	f.write(orig[8:])


	class SecType(IntEnum):
	CUSTOM = 0
	TYPE = 1
	IMPORT = 2
	FUNCTION = 3
	TABLE = 4
	MEMORY = 5
	EVENT = 13
	GLOBAL = 6
	EXPORT = 7
	START = 8
	ELEM = 9
	DATACOUNT = 12
	CODE = 10
	DATA = 11


	class ExternType(IntEnum):
	FUNC = 0
	TABLE = 1
	MEMORY = 2
	GLOBAL = 3
	EVENT = 4


	class ValueType(IntEnum):
	I32 = -0x01,
	I64 = -0x02,
	F32 = -0x03,
	F64 = -0x04,


	class OpCode(IntEnum):
	GLOBAL_GET = 0x23
	I32_CONST = 0x41
	I64_CONST = 0x42
	END = 0x0b


	Section = namedtuple('Section', ['type', 'size', 'offset'])
	Limits = namedtuple('Limits', ['flags', 'initial', 'maximum'])
	Import = namedtuple('Import', ['kind', 'module', 'field', 'info'])
	Export = namedtuple('Export', ['name', 'kind', 'index'])
	Dylink = namedtuple('Dylink', ['mem_size', 'mem_align', 'table_size', 'table_align', 'section_end', 'needed'])
	Table = namedtuple('Table', ['type', 'limits'])
	Global = namedtuple('Global', ['type', 'mutable', 'init'])
	Segment = namedtuple('Segment', ['flags', 'init', 'data'])


	class Module:
	"""Extremely minimal wasm module reader. Currently only used
	for parsing the dylink section."""
	def __init__(self, filename):
	self.size = os.path.getsize(filename)
	self.buf = open(filename, 'rb')
	magic = self.buf.read(4)
	version = self.buf.read(4)
	assert magic == b'\0asm'
	assert version == b'\x01\0\0\0'

	def __del__(self):
	self.buf.close()

	def readByte(self):
	return self.buf.read(1)[0]

	def readULEB(self):
	return readULEB(self.buf)

	def readSLEB(self):
	return readSLEB(self.buf)

	def readString(self):
	size = self.readULEB()
	return self.buf.read(size).decode('utf-8')

	def readLimits(self):
	flags = self.readByte()
	initial = self.readULEB()
	maximum = 0
	if flags & LIMITS_HAS_MAX:
	maximum = self.readULEB()
	return Limits(flags, initial, maximum)

	def readInitExpr(self):
	opcode = OpCode(self.readByte())
	value = self.readSLEB()
	end = OpCode(self.readByte())
	assert end == OpCode.END
	return (opcode, value)

	def seek(self, offset):
	self.buf.seek(offset)

	def sections(self):
	"""Generator that lazily returns sections from the wasm file."""
	offset = HEADER_SIZE
	while offset < self.size:
	self.seek(offset)
	section_type = SecType(self.readByte())
	section_size = self.readULEB()
	section_offset = self.buf.tell()
	yield Section(section_type, section_size, section_offset)
	offset = section_offset + section_size

	def tables(self):
	sec = next((s for s in self.sections() if s.type == SecType.TABLE), None)
	if not sec:
	return []

	self.seek(sec.offset)
	num_tables = self.readULEB()
	tables = []
	for i in range(num_tables):
	kind = self.readByte()
	limits = self.readLimits()
	tables.append(Table(kind, limits))

	return tables

	def exports(self):
	sec = next((s for s in self.sections() if s.type == SecType.EXPORT), None)
	if not sec:
	return []

	self.seek(sec.offset)
	num_exports = self.readULEB()
	exports = []
	for i in range(num_exports):
	name = self.readString()
	kind = ExternType(self.readByte())
	index = self.readULEB()
	exports.append(Export(name, kind, index))

	return exports

	def imports(self):
	sec = next((s for s in self.sections() if s.type == SecType.IMPORT), None)
	if not sec:
	return []

	self.seek(sec.offset)
	num_imports = self.readULEB()
	imports = []
	for i in range(num_imports):
	mod = self.readString()
	field = self.readString()
	kind = ExternType(self.readByte())
	if kind == ExternType.FUNC:
	info = self.readULEB() # sig
	elif kind == ExternType.GLOBAL:
	info = (
	self.readSLEB(), # global type
	self.readByte() # mutable
	)
	elif kind == ExternType.MEMORY:
	info = self.readLimits() # limits
	elif kind == ExternType.TABLE:
	info = (
	self.readSLEB(), # table type
	self.readLimits() # limits
	)
	else:
	assert False
	imports.append(Import(kind, mod, field, info))

	return imports

	def globals(self):
	sec = next((s for s in self.sections() if s.type == SecType.GLOBAL), None)
	if not sec:
	return []

	self.seek(sec.offset)
	num_globals = self.readULEB()
	globals_ = []
	for i in range(num_globals):
	t = ValueType(self.readSLEB())
	mutable = self.readByte()
	init = self.readInitExpr()
	g = Global(t, mutable, init)
	globals_.append(g)
	return globals_


	def data_segments(self):
	sec = next((s for s in self.sections() if s.type == SecType.DATA), None)
	if not sec:
	return []

	self.seek(sec.offset)
	num_segments = self.readULEB()
	segments = []
	for i in range(num_segments):
	flags = self.readULEB()
	if not (flags & SEG_IS_PASSIVE):
	init = self.readInitExpr()
	data_size = self.readULEB()
	data = self.buf.read(data_size)
	segments.append(Segment(flags, init, data))


	return segments




	def parse_dylink_section(wasm_file):
	module = Module(wasm_file)

	dylink_section = next(module.sections())
	assert dylink_section.type == SecType.CUSTOM
	section_size = dylink_section.size
	section_offset = dylink_section.offset
	section_end = section_offset + section_size
	module.seek(section_offset)
	# section name
	section_name = module.readString()
	assert section_name == 'dylink'
	mem_size = module.readULEB()
	mem_align = module.readULEB()
	table_size = module.readULEB()
	table_align = module.readULEB()

	needed = []
	needed_count = module.readULEB()
	while needed_count:
	libname = module.readString()
	needed.append(libname)
	needed_count -= 1

	return Dylink(mem_size, mem_align, table_size, table_align, section_end, needed)


	def get_exports(wasm_file):
	return Module(wasm_file).exports()


	def get_imports(wasm_file):
	return Module(wasm_file).imports()


	def update_dylink_section(wasm_file, extra_dynlibs):
	# A wasm shared library has a special "dylink" section, see tools-conventions repo.
	# This function updates this section, adding extra dynamic library dependencies.

	mem_size, mem_align, table_size, table_align, section_end, needed = parse_dylink_section(wasm_file)

	section_name = b'\06dylink' # section name, including prefixed size
	contents = (toLEB(mem_size) + toLEB(mem_align) +
	toLEB(table_size) + toLEB(0))

	# we extend "dylink" section with information about which shared libraries
	# our shared library needs. This is similar to DT_NEEDED entries in ELF.
	#
	# In theory we could avoid doing this, since every import in wasm has
	# "module" and "name" attributes, but currently emscripten almost always
	# uses just "env" for "module". This way we have to embed information about
	# required libraries for the dynamic linker somewhere, and "dylink" section
	# seems to be the most relevant place.
	#
	# Binary format of the extension:
	#
	# needed_dynlibs_count varuint32 ; number of needed shared libraries
	# needed_dynlibs_entries dynlib_entry* ; repeated dynamic library entries as described below
	#
	# dynlib_entry:
	#
	# dynlib_name_len varuint32 ; length of dynlib_name_str in bytes
	# dynlib_name_str bytes ; name of a needed dynamic library: valid UTF-8 byte sequence
	#
	# a proposal has been filed to include the extension into "dylink" specification:
	# https://github.com/WebAssembly/tool-conventions/pull/77
	needed += extra_dynlibs
	contents += toLEB(len(needed))
	for dyn_needed in needed:
	dyn_needed = dyn_needed.encode('utf-8')
	contents += toLEB(len(dyn_needed))
	contents += dyn_needed

	orig = open(wasm_file, 'rb').read()
	file_header = orig[:8]
	file_remainder = orig[section_end:]

	section_size = len(section_name) + len(contents)
	with open(wasm_file, 'wb') as f:
	# copy magic number and version
	f.write(file_header)
	# write the special section
	f.write(b'\0') # user section is code 0
	f.write(toLEB(section_size))
	f.write(section_name)
	f.write(contents)
	# copy rest of binary
	f.write(file_remainder)