blob: d4c327f6b189c8951c6ca8234b12165ca9b6a0f4 [file] [log] [blame] [edit]
//===-- NaClCompress.cpp - Bitcode (abbrev) compression -----------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Analyzes the data in memory buffer, and determines what
// abbreviations can be added to compress the bitcode file. The result
// is written to an output stream.
//
// A bitcode file has two types of abbreviations. The first are Global
// abbreviations that apply to all instances of a particular type of
// block. These abbreviations appear in the BlockInfo block of the
// bitcode file.
//
// The second type of abbreviations are local to a particular instance
// of a block.
//
// For simplicity, we will only add global abbreviations. Local
// abbreviations are converted to corresponding global abbreviations,
// so that they can be added as global abbreviations.
//
// The process of compressing is done by reading the input file
// twice. In the first round, the records are read and analyzed,
// generating a set of (global) abbreviations to use to generate the
// compressed output file. Then, the input file is read again, and for
// each record, the best fitting global abbreviation is found (or it
// figures out that leaving the record unabbreviated is best) and
// writes the record out accordingly.
//
// TODO(kschimpf): The current implementation does a trivial solution
// for the first round. It just converts all abbreviations (local and
// global) into global abbreviations. Future refinements of this file
// will generate better (and more intelligent) global abbreviations,
// based on the records found on the first read of the bitcode file.
//
//===----------------------------------------------------------------------===//
#include "llvm/Bitcode/NaCl/NaClCompress.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Bitcode/NaCl/AbbrevTrieNode.h"
#include "llvm/Bitcode/NaCl/NaClBitcodeHeader.h"
#include "llvm/Bitcode/NaCl/NaClBitstreamWriter.h"
#include "llvm/Bitcode/NaCl/NaClCompressBlockDist.h"
#include "llvm/Bitcode/NaCl/NaClReaderWriter.h"
namespace {
using namespace llvm;
// Generates an error message when outside parsing, and no
// corresponding bit position is known.
static bool Error(const std::string &Err) {
errs() << Err << "\n";
return true;
}
// Prints out the abbreviation in readable form to the given Stream.
static void printAbbrev(raw_ostream &Stream, unsigned BlockID,
const NaClBitCodeAbbrev *Abbrev) {
Stream << "Abbrev(block " << BlockID << "): ";
Abbrev->Print(Stream);
}
/// type map from bitstream abbreviation index to corresponding
/// internal abbreviation index.
typedef std::map<unsigned, unsigned> BitstreamToInternalAbbrevMapType;
/// Defines a mapping from bitstream abbreviation indices, to
/// corresponding internal abbreviation indices.
class AbbrevBitstreamToInternalMap {
public:
AbbrevBitstreamToInternalMap()
: NextBitstreamAbbrevIndex(0) {}
/// Returns the bitstream abbreviaion index that will be associated
/// with the next internal abbreviation index.
unsigned getNextBitstreamAbbrevIndex() {
return NextBitstreamAbbrevIndex;
}
/// Changes the next bitstream abbreviation index to the given value.
void setNextBitstreamAbbrevIndex(unsigned NextIndex) {
NextBitstreamAbbrevIndex = NextIndex;
}
/// Returns true if there is an internal abbreviation index for the
/// given bitstream abbreviation.
bool definesBitstreamAbbrevIndex(unsigned Index) {
return BitstreamToInternalAbbrevMap.find(Index) !=
BitstreamToInternalAbbrevMap.end();
}
/// Returns the internal abbreviation index for the given bitstream
/// abbreviation index.
unsigned getInternalAbbrevIndex(unsigned Index) {
return BitstreamToInternalAbbrevMap.at(Index);
}
/// Installs the given internal abbreviation index using the next
/// available bitstream abbreviation index.
void installNewBitstreamAbbrevIndex(unsigned InternalIndex) {
BitstreamToInternalAbbrevMap[NextBitstreamAbbrevIndex++] = InternalIndex;
}
private:
// The index of the next bitstream abbreviation to be defined.
unsigned NextBitstreamAbbrevIndex;
// Map from bitstream abbreviation index to corresponding internal
// abbreviation index.
BitstreamToInternalAbbrevMapType BitstreamToInternalAbbrevMap;
};
/// Defines the list of abbreviations associated with a block.
class BlockAbbrevs {
public:
// Vector to hold the (ordered) list of abbreviations.
typedef SmallVector<NaClBitCodeAbbrev*, 32> AbbrevVector;
BlockAbbrevs(unsigned BlockID)
: BlockID(BlockID) {
// backfill internal indices that don't correspond to bitstream
// application abbreviations, so that added abbreviations have
// valid abbreviation indices.
for (unsigned i = 0; i < naclbitc::FIRST_APPLICATION_ABBREV; ++i) {
// Make all non-application abbreviations look like the default
// abbreviation.
NaClBitCodeAbbrev *Abbrev = new NaClBitCodeAbbrev();
Abbrev->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Array));
Abbrev->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 6));
Abbrevs.push_back(Abbrev);
}
GlobalAbbrevBitstreamToInternalMap.
setNextBitstreamAbbrevIndex(Abbrevs.size());
}
~BlockAbbrevs() {
for (AbbrevVector::const_iterator
Iter = Abbrevs.begin(), IterEnd = Abbrevs.end();
Iter != IterEnd; ++Iter) {
(*Iter)->dropRef();
}
for (AbbrevLookupSizeMap::const_iterator
Iter = LookupMap.begin(), IterEnd = LookupMap.end();
Iter != IterEnd; ++Iter) {
delete Iter->second;
}
}
// Constant used to denote that a given abbreviation is not in the
// set of abbreviations defined by the block.
static const int NO_SUCH_ABBREVIATION = -1;
// Returns the index to the corresponding application abbreviation,
// if it exists. Otherwise return NO_SUCH_ABBREVIATION;
int findAbbreviation(const NaClBitCodeAbbrev *Abbrev) const {
for (unsigned i = getFirstApplicationAbbreviation();
i < getNumberAbbreviations(); ++i) {
if (*Abbrevs[i] == *Abbrev) return i;
}
return NO_SUCH_ABBREVIATION;
}
/// Adds the given abbreviation to the set of global abbreviations
/// defined for the block. Guarantees that duplicate abbreviations
/// are not added to the block. Note: Code takes ownership of
/// the given abbreviation. Returns true if new abbreviation.
/// Updates Index to the index where the abbreviation is located
/// in the set of abbreviations.
bool addAbbreviation(NaClBitCodeAbbrev *Abbrev, int &Index) {
Index = findAbbreviation(Abbrev);
if (Index != NO_SUCH_ABBREVIATION) {
// Already defined, don't install.
Abbrev->dropRef();
return false;
}
// New abbreviation. Add.
Index = Abbrevs.size();
Abbrevs.push_back(Abbrev);
return true;
}
/// Adds the given abbreviation to the set of global abbreviations
/// defined for the block. Guarantees that duplicate abbreviations
/// are not added to the block. Note: Code takes ownership of
/// the given abbreviation. Returns true if new abbreviation.
bool addAbbreviation(NaClBitCodeAbbrev *Abbrev) {
int Index;
return addAbbreviation(Abbrev, Index);
}
/// The block ID associated with the block.
unsigned getBlockID() const {
return BlockID;
}
/// Returns the index of the frist application abbreviation for the
/// block.
unsigned getFirstApplicationAbbreviation() const {
return naclbitc::FIRST_APPLICATION_ABBREV;
}
// Returns the number of abbreviations associated with the block.
unsigned getNumberAbbreviations() const {
return Abbrevs.size();
}
// Returns true if there is an application abbreviation.
bool hasApplicationAbbreviations() const {
return Abbrevs.size() > naclbitc::FIRST_APPLICATION_ABBREV;
}
/// Returns the abbreviation associated with the given abbreviation
/// index.
NaClBitCodeAbbrev *getIndexedAbbrev(unsigned index) {
if (index >= Abbrevs.size()) return 0;
return Abbrevs[index];
}
// Builds the corresponding fast lookup map for finding abbreviations
// that applies to abbreviations in the block
void buildAbbrevLookupSizeMap(
const NaClBitcodeCompressor::CompressFlags &Flags) {
NaClBuildAbbrevLookupMap(getLookupMap(),
getAbbrevs(),
getFirstApplicationAbbreviation());
if (Flags.ShowAbbrevLookupTries) printLookupMap(errs());
}
AbbrevBitstreamToInternalMap &getGlobalAbbrevBitstreamToInternalMap() {
return GlobalAbbrevBitstreamToInternalMap;
}
AbbrevLookupSizeMap &getLookupMap() {
return LookupMap;
}
// Returns lower level vector of abbreviations.
const AbbrevVector &getAbbrevs() const {
return Abbrevs;
}
// Returns the abbreviation (index) to use for the corresponding
// record, based on the abbreviations of this block. Note: Assumes
// that buildAbbrevLookupSizeMap has already been called.
unsigned getRecordAbbrevIndex(const NaClBitcodeRecordData &Record) {
unsigned BestIndex = 0; // Ignored unless found candidate.
unsigned BestScore = 0; // Number of bits associated with BestIndex.
bool FoundCandidate = false;
NaClBitcodeValues Values(Record);
size_t Size = Values.size();
if (Size > NaClValueIndexCutoff) Size = NaClValueIndexCutoff+1;
AbbrevLookupSizeMap::const_iterator Pos = LookupMap.find(Size);
if (Pos != LookupMap.end()) {
if (const AbbrevTrieNode *Node = Pos->second) {
if (const AbbrevTrieNode *MatchNode =
Node->MatchRecord(Record)) {
const std::set<AbbrevIndexPair> &Abbreviations =
MatchNode->GetAbbreviations();
for (std::set<AbbrevIndexPair>::const_iterator
Iter = Abbreviations.begin(),
IterEnd = Abbreviations.end();
Iter != IterEnd; ++Iter) {
uint64_t NumBits = 0;
if (canUseAbbreviation(Values, Iter->second, NumBits)) {
if (!FoundCandidate || NumBits < BestScore) {
// Use this as candidate.
BestIndex = Iter->first;
BestScore = NumBits;
FoundCandidate = true;
}
}
}
}
}
}
if (FoundCandidate && BestScore <= unabbreviatedSize(Record)) {
return BestIndex;
}
return naclbitc::UNABBREV_RECORD;
}
// Computes the number of bits that will be generated by the
// corresponding read record, if no abbreviation is used.
static uint64_t unabbreviatedSize(const NaClBitcodeRecordData &Record) {
uint64_t NumBits = matchVBRBits(Record.Code, DefaultVBRBits);
size_t NumValues = Record.Values.size();
NumBits += matchVBRBits(NumValues, DefaultVBRBits);
for (size_t Index = 0; Index < NumValues; ++Index) {
NumBits += matchVBRBits(Record.Values[Index], DefaultVBRBits);
}
return NumBits;
}
// Returns true if the given abbreviation can be used to represent the
// record. Sets NumBits to the number of bits the abbreviation will
// generate. Note: Value of NumBits is undefined if this function
// return false.
static bool canUseAbbreviation(NaClBitcodeValues &Values,
NaClBitCodeAbbrev *Abbrev, uint64_t &NumBits) {
NumBits = 0;
unsigned OpIndex = 0;
unsigned OpIndexEnd = Abbrev->getNumOperandInfos();
size_t ValueIndex = 0;
size_t ValueIndexEnd = Values.size();
while (ValueIndex < ValueIndexEnd && OpIndex < OpIndexEnd) {
const NaClBitCodeAbbrevOp &Op = Abbrev->getOperandInfo(OpIndex);
switch (Op.getEncoding()) {
case NaClBitCodeAbbrevOp::Literal:
if (canUseSimpleAbbrevOp(Op, Values[ValueIndex], NumBits)) {
++ValueIndex;
++OpIndex;
continue;
} else {
return false;
}
case NaClBitCodeAbbrevOp::Array: {
assert(OpIndex+2 == OpIndexEnd);
const NaClBitCodeAbbrevOp &ElmtOp =
Abbrev->getOperandInfo(OpIndex+1);
// Add size of array.
NumBits += matchVBRBits(Values.size()-ValueIndex, DefaultVBRBits);
// Add size of each field.
for (; ValueIndex != ValueIndexEnd; ++ValueIndex) {
uint64_t FieldBits=0;
if (!canUseSimpleAbbrevOp(ElmtOp, Values[ValueIndex], FieldBits)) {
return false;
}
NumBits += FieldBits;
}
return true;
}
default: {
if (canUseSimpleAbbrevOp(Op, Values[ValueIndex], NumBits)) {
++ValueIndex;
++OpIndex;
break;
}
return false;
}
}
}
return ValueIndex == ValueIndexEnd && OpIndex == OpIndexEnd;
}
// Returns true if the given abbreviation Op defines a single value,
// and can be applied to the given Val. Adds the number of bits the
// abbreviation Op will generate to NumBits if Op applies.
static bool canUseSimpleAbbrevOp(const NaClBitCodeAbbrevOp &Op,
uint64_t Val,
uint64_t &NumBits) {
switch (Op.getEncoding()) {
case NaClBitCodeAbbrevOp::Literal:
return Val == Op.getValue();
case NaClBitCodeAbbrevOp::Array:
return false;
case NaClBitCodeAbbrevOp::Fixed: {
uint64_t Width = Op.getValue();
if (!matchFixedBits(Val, Width))
return false;
NumBits += Width;
return true;
}
case NaClBitCodeAbbrevOp::VBR:
if (unsigned Width = matchVBRBits(Val, Op.getValue())) {
NumBits += Width;
return true;
} else {
return false;
}
case NaClBitCodeAbbrevOp::Char6:
if (!NaClBitCodeAbbrevOp::isChar6(Val)) return false;
NumBits += 6;
return true;
}
llvm_unreachable("unhandled NaClBitCodeAbbrevOp encoding");
}
// Returns true if the given Val can be represented by abbreviation
// operand Fixed(Width).
static bool matchFixedBits(uint64_t Val, unsigned Width) {
// Note: The reader only allows up to 32 bits for fixed values.
if (Val & Mask32) return false;
if (Val & ~(~0U >> (32-Width))) return false;
return true;
}
// Returns the number of bits needed to represent Val by abbreviation
// operand VBR(Width). Note: Returns 0 if Val can't be represented
// by VBR(Width).
static unsigned matchVBRBits(uint64_t Val, unsigned Width) {
if (Width == 0) return 0;
unsigned NumBits = 0;
while (1) {
// values emitted Width-1 bits at a time (plus a continue bit).
NumBits += Width;
if ((Val & (1U << (Width-1))) == 0)
return NumBits;
Val >>= Width-1;
}
}
private:
// Defines the number of bits used to print VBR array field values.
static const unsigned DefaultVBRBits = 6;
// Masks out the top-32 bits of a uint64_t value.
static const uint64_t Mask32 = 0xFFFFFFFF00000000;
// The block ID for which abbreviations are being associated.
unsigned BlockID;
// The list of abbreviations defined for the block.
AbbrevVector Abbrevs;
// The mapping from global bitstream abbreviations to the corresponding
// block abbreviation index (in Abbrevs).
AbbrevBitstreamToInternalMap GlobalAbbrevBitstreamToInternalMap;
// A fast lookup map for finding the abbreviation that applies
// to a record.
AbbrevLookupSizeMap LookupMap;
void printLookupMap(raw_ostream &Stream) const {
Stream << "------------------------------\n";
Stream << "Block " << getBlockID() << " abbreviation tries:\n";
bool IsFirstIteration = true;
for (AbbrevLookupSizeMap::const_iterator
Iter = LookupMap.begin(), IterEnd = LookupMap.end();
Iter != IterEnd; ++Iter) {
if (IsFirstIteration)
IsFirstIteration = false;
else
Stream << "-----\n";
if (Iter->second) {
Stream << "Index " << Iter->first << ":\n";
Iter->second->Print(Stream, " ");
}
}
Stream << "------------------------------\n";
}
};
/// Defines a map from block ID's to the corresponding abbreviation
/// map to use.
typedef DenseMap<unsigned, BlockAbbrevs*> BlockAbbrevsMapType;
typedef std::pair<unsigned, BlockAbbrevs*> BlockAbbrevsMapElmtType;
/// Gets the corresponding block abbreviations for a block ID.
static BlockAbbrevs *getAbbrevs(BlockAbbrevsMapType &AbbrevsMap,
unsigned BlockID) {
BlockAbbrevs *Abbrevs = AbbrevsMap[BlockID];
if (Abbrevs == nullptr) {
Abbrevs = new BlockAbbrevs(BlockID);
AbbrevsMap[BlockID] = Abbrevs;
}
return Abbrevs;
}
/// Parses the bitcode file, analyzes it, and generates the
/// corresponding lists of global abbreviations to use in the
/// generated (compressed) bitcode file.
class NaClAnalyzeParser : public NaClBitcodeParser {
NaClAnalyzeParser(const NaClAnalyzeParser&)
LLVM_DELETED_FUNCTION;
void operator=(const NaClAnalyzeParser&)
LLVM_DELETED_FUNCTION;
public:
// Creates the analysis parser, which will fill the given
// BlockAbbrevsMap with appropriate abbreviations, after
// analyzing the bitcode file defined by Cursor.
NaClAnalyzeParser(const NaClBitcodeCompressor::CompressFlags &Flags,
NaClBitstreamCursor &Cursor,
BlockAbbrevsMapType &BlockAbbrevsMap)
: NaClBitcodeParser(Cursor), Flags(Flags),
BlockAbbrevsMap(BlockAbbrevsMap),
BlockDist(&NaClCompressBlockDistElement::Sentinel),
AbbrevListener(this)
{
SetListener(&AbbrevListener);
}
virtual ~NaClAnalyzeParser() {}
virtual bool ParseBlock(unsigned BlockID);
const NaClBitcodeCompressor::CompressFlags &Flags;
// Mapping from block ID's to the corresponding list of abbreviations
// associated with that block.
BlockAbbrevsMapType &BlockAbbrevsMap;
// Nested distribution capturing distribution of records in bitcode file.
NaClBitcodeBlockDist BlockDist;
// Listener used to get abbreviations as they are read.
NaClBitcodeParserListener AbbrevListener;
};
class NaClBlockAnalyzeParser : public NaClBitcodeParser {
NaClBlockAnalyzeParser(const NaClBlockAnalyzeParser&)
LLVM_DELETED_FUNCTION;
void operator=(NaClBlockAnalyzeParser&)
LLVM_DELETED_FUNCTION;
public:
/// Top-level constructor to build the top-level block with the
/// given BlockID, and collect data (for compression) in that block.
NaClBlockAnalyzeParser(unsigned BlockID,
NaClAnalyzeParser *Context)
: NaClBitcodeParser(BlockID, Context), Context(Context) {
init();
}
virtual ~NaClBlockAnalyzeParser() {
Context->BlockDist.AddBlock(GetBlock());
}
protected:
/// Nested constructor to parse a block within a block. Creates a
/// block parser to parse a block with the given BlockID, and
/// collect data (for compression) in that block.
NaClBlockAnalyzeParser(unsigned BlockID,
NaClBlockAnalyzeParser *EnclosingParser)
: NaClBitcodeParser(BlockID, EnclosingParser),
Context(EnclosingParser->Context) {
init();
}
public:
virtual bool ParseBlock(unsigned BlockID) {
NaClBlockAnalyzeParser Parser(BlockID, this);
return Parser.ParseThisBlock();
}
virtual void ProcessRecord() {
// Before processing the record, we need to rename the abbreviation
// index, so that we can look it up in the set of block abbreviations
// being defined.
if (Record.UsedAnAbbreviation()) {
unsigned AbbrevIndex = Record.GetAbbreviationIndex();
if (LocalAbbrevBitstreamToInternalMap.
definesBitstreamAbbrevIndex(AbbrevIndex)) {
Record.SetAbbreviationIndex(
LocalAbbrevBitstreamToInternalMap.
getInternalAbbrevIndex(AbbrevIndex));
} else {
AbbrevBitstreamToInternalMap &GlobalAbbrevBitstreamToInternalMap =
GlobalBlockAbbrevs->getGlobalAbbrevBitstreamToInternalMap();
if (GlobalAbbrevBitstreamToInternalMap.
definesBitstreamAbbrevIndex(AbbrevIndex)) {
Record.SetAbbreviationIndex(
GlobalAbbrevBitstreamToInternalMap.
getInternalAbbrevIndex(AbbrevIndex));
} else {
report_fatal_error("Bad abbreviation index in file");
}
}
}
cast<NaClCompressBlockDistElement>(
Context->BlockDist.GetElement(Record.GetBlockID()))
->GetAbbrevDist().AddRecord(Record);
}
virtual void ProcessAbbreviation(unsigned BlockID,
NaClBitCodeAbbrev *Abbrev,
bool IsLocal) {
int Index;
addAbbreviation(BlockID, Abbrev->Simplify(), Index);
if (IsLocal) {
LocalAbbrevBitstreamToInternalMap.installNewBitstreamAbbrevIndex(Index);
} else {
getGlobalAbbrevs(BlockID)->getGlobalAbbrevBitstreamToInternalMap().
installNewBitstreamAbbrevIndex(Index);
}
}
protected:
// The context (i.e. top-level) parser.
NaClAnalyzeParser *Context;
// The global block abbreviations associated with this block.
BlockAbbrevs *GlobalBlockAbbrevs;
// The local abbreviations associated with this block.
AbbrevBitstreamToInternalMap LocalAbbrevBitstreamToInternalMap;
/// Returns the set of (global) block abbreviations defined for the
/// given block ID.
BlockAbbrevs *getGlobalAbbrevs(unsigned BlockID) {
return getAbbrevs(Context->BlockAbbrevsMap, BlockID);
}
// Adds the abbreviation to the list of abbreviations for the given
// block.
void addAbbreviation(unsigned BlockID, NaClBitCodeAbbrev *Abbrev,
int &Index) {
// Get block abbreviations.
BlockAbbrevs* Abbrevs = getGlobalAbbrevs(BlockID);
// Read abbreviation and add as a global abbreviation.
if (Abbrevs->addAbbreviation(Abbrev, Index)
&& Context->Flags.TraceGeneratedAbbreviations) {
printAbbrev(errs(), BlockID, Abbrev);
}
}
/// Finds the index to the corresponding internal block abbreviation
/// for the given abbreviation.
int findAbbreviation(unsigned BlockID, const NaClBitCodeAbbrev *Abbrev) {
return getGlobalAbbrevs(BlockID)->findAbbreviation(Abbrev);
}
void init() {
GlobalBlockAbbrevs = getGlobalAbbrevs(GetBlockID());
LocalAbbrevBitstreamToInternalMap.setNextBitstreamAbbrevIndex(
GlobalBlockAbbrevs->
getGlobalAbbrevBitstreamToInternalMap().getNextBitstreamAbbrevIndex());
}
};
bool NaClAnalyzeParser::ParseBlock(unsigned BlockID) {
NaClBlockAnalyzeParser Parser(BlockID, this);
return Parser.ParseThisBlock();
}
/// Models the unrolling of an abbreviation into its sequence of
/// individual operators. That is, unrolling arrays to match the width
/// of the abbreviation.
///
/// For example, consider the abbreviation [Array(VBR(6))]. If the
/// distribution map has data for records of size 3, and the
/// distribution map suggests that a constant 4 appears as the second
/// element in the record, it is nontrivial to figure out how to
/// encorporate this into this abbrevation. Hence, we unroll the array
/// (3 times) to get [VBR(6), VBR(6), VBR(6), Array(VBR(6))]. To
/// update the second element to match the literal 4, we only need to
/// replace the second element in the unrolled abbreviation resulting
/// in [VBR(6), Lit(4), VBR(6), Array(VBR(6))].
///
/// After we have done appropriate substitutions, we can simplify the
/// unrolled abbreviation by calling method Restore.
///
/// Note: We unroll in the form that best matches the distribution
/// map. Hence, the code is stored as a separate operator. We also
/// keep the array abbreviation op, for untracked elements within the
/// distribution maps.
class UnrolledAbbreviation {
void operator=(const UnrolledAbbreviation&) LLVM_DELETED_FUNCTION;
public:
/// Unroll the given abbreviation, assuming it has the given size
/// (as specified in the distribution maps).
///
/// If argument CanBeBigger is true, then we do not assume that we
/// can remove the trailing array when expanding, because the
/// actual size of the corresponding record using this abbreviation
/// may be bigger.
UnrolledAbbreviation(NaClBitCodeAbbrev *Abbrev, unsigned Size,
bool CanBeBigger = false)
: CodeOp(0) {
unsigned NextOp = 0;
unrollAbbrevOp(CodeOp, Abbrev, NextOp);
--Size;
for (unsigned i = 0; i < Size; ++i) {
// Create slot and then fill with appropriate operator.
AbbrevOps.push_back(CodeOp);
unrollAbbrevOp(AbbrevOps[i], Abbrev, NextOp);
}
if (CanBeBigger) {
for (; NextOp < Abbrev->getNumOperandInfos(); ++NextOp) {
MoreOps.push_back(Abbrev->getOperandInfo(NextOp));
}
} else if (NextOp < Abbrev->getNumOperandInfos() &&
!Abbrev->getOperandInfo(NextOp).isArrayOp()) {
errs() << (Size+1) << ": ";
Abbrev->Print(errs());
llvm_unreachable("Malformed abbreviation/size pair");
}
}
explicit UnrolledAbbreviation(const UnrolledAbbreviation &Abbrev)
: CodeOp(Abbrev.CodeOp),
AbbrevOps(Abbrev.AbbrevOps),
MoreOps(Abbrev.MoreOps) {
}
/// Prints out the abbreviation modeled by the unrolled
/// abbreviation.
void print(raw_ostream &Stream) const {
NaClBitCodeAbbrev *Abbrev = restore(false);
Abbrev->Print(Stream);
Abbrev->dropRef();
}
/// Converts the unrolled abbreviation back into a regular
/// abbreviation. If Simplify is true, we simplify the
/// unrolled abbreviation as well.
NaClBitCodeAbbrev *restore(bool Simplify=true) const {
NaClBitCodeAbbrev *Abbrev = new NaClBitCodeAbbrev();
Abbrev->Add(CodeOp);
for (std::vector<NaClBitCodeAbbrevOp>::const_iterator
Iter = AbbrevOps.begin(), IterEnd = AbbrevOps.end();
Iter != IterEnd; ++Iter) {
Abbrev->Add(*Iter);
}
for (std::vector<NaClBitCodeAbbrevOp>::const_iterator
Iter = MoreOps.begin(), IterEnd = MoreOps.end();
Iter != IterEnd; ++Iter) {
Abbrev->Add(*Iter);
}
if (Simplify) {
NaClBitCodeAbbrev *SimpAbbrev = Abbrev->Simplify();
Abbrev->dropRef();
return SimpAbbrev;
} else {
return Abbrev;
}
}
// The abbreviation used for the record code.
NaClBitCodeAbbrevOp CodeOp;
// The abbreviations used for each tracked value index.
std::vector<NaClBitCodeAbbrevOp> AbbrevOps;
private:
// Any remaining abbreviation operators not part of the unrolling.
std::vector<NaClBitCodeAbbrevOp> MoreOps;
// Extracts out the next abbreviation operator from the abbreviation
// Abbrev, given the index NextOp, and assigns it to AbbrevOp
void unrollAbbrevOp(NaClBitCodeAbbrevOp &AbbrevOp,
NaClBitCodeAbbrev *Abbrev,
unsigned &NextOp) {
assert(NextOp < Abbrev->getNumOperandInfos());
const NaClBitCodeAbbrevOp &Op = Abbrev->getOperandInfo(NextOp);
if (Op.isArrayOp()) {
// Do not advance. The array operator assumes that all remaining
// elements should match its argument.
AbbrevOp = Abbrev->getOperandInfo(NextOp+1);
} else {
AbbrevOp = Op;
NextOp++;
}
}
};
/// Models a candidate block abbreviation, which is a blockID, and the
/// corresponding abbreviation to be considered for addition. Note:
/// Constructors and assignment take ownership of the abbreviation.
class CandBlockAbbrev {
public:
CandBlockAbbrev(unsigned BlockID, NaClBitCodeAbbrev *Abbrev)
: BlockID(BlockID), Abbrev(Abbrev) {
}
CandBlockAbbrev(const CandBlockAbbrev &BlockAbbrev)
: BlockID(BlockAbbrev.BlockID),
Abbrev(BlockAbbrev.Abbrev) {
Abbrev->addRef();
}
void operator=(const CandBlockAbbrev &BlockAbbrev) {
Abbrev->dropRef();
BlockID = BlockAbbrev.BlockID;
Abbrev = BlockAbbrev.Abbrev;
Abbrev->addRef();
}
~CandBlockAbbrev() {
Abbrev->dropRef();
}
/// The block ID of the candidate abbreviation.
unsigned getBlockID() const {
return BlockID;
}
/// The abbreviation of the candidate abbreviation.
const NaClBitCodeAbbrev *getAbbrev() const {
return Abbrev;
}
/// orders this against the candidate abbreviation.
int compare(const CandBlockAbbrev &CandAbbrev) const {
unsigned diff = BlockID - CandAbbrev.BlockID;
if (diff) return diff;
return Abbrev->Compare(*CandAbbrev.Abbrev);
}
/// Prints the candidate abbreviation to the given stream.
void print(raw_ostream &Stream) const {
printAbbrev(Stream, BlockID, Abbrev);
}
private:
// The block the abbreviation applies to.
unsigned BlockID;
// The candidate abbreviation.
NaClBitCodeAbbrev *Abbrev;
};
static inline bool operator<(const CandBlockAbbrev &A1,
const CandBlockAbbrev &A2) {
return A1.compare(A2) < 0;
}
/// Models the set of candidate abbreviations being considered, and
/// the number of abbreviations associated with each candidate
/// Abbreviation.
///
/// Note: Because we may have abbreviation refinements of A->B->C and
/// A->D->C, we need to accumulate instance counts in such cases.
class CandidateAbbrevs {
public:
// Map from candidate abbreviations to the corresponding number of
// instances.
typedef std::map<CandBlockAbbrev, unsigned> AbbrevCountMap;
typedef AbbrevCountMap::const_iterator const_iterator;
/// Creates an empty set of candidate abbreviations, to be
/// (potentially) added to the given set of block abbreviations.
/// Argument is the (global) block abbreviations map, which is
/// used to determine if it already exists.
CandidateAbbrevs(BlockAbbrevsMapType &BlockAbbrevsMap)
: BlockAbbrevsMap(BlockAbbrevsMap)
{}
/// Adds the given (unrolled) abbreviation as a candidate
/// abbreviation to the given block. NumInstances is the number of
/// instances expected to use this candidate abbreviation. Returns
/// true if the corresponding candidate abbreviation is added to this
/// set of candidate abbreviations.
bool add(unsigned BlockID,
UnrolledAbbreviation &UnrolledAbbrev,
unsigned NumInstances);
/// Returns the list of candidate abbreviations in this set.
const AbbrevCountMap &getAbbrevsMap() const {
return AbbrevsMap;
}
/// Prints out the current contents of this set.
void print(raw_ostream &Stream, const char *Title = "Candidates") const {
Stream << "-- " << Title << ": \n";
for (const_iterator Iter = AbbrevsMap.begin(), IterEnd = AbbrevsMap.end();
Iter != IterEnd; ++Iter) {
Stream << format("%12u", Iter->second) << ": ";
Iter->first.print(Stream);
}
Stream << "--\n";
}
private:
// The set of abbreviations and corresponding number instances.
AbbrevCountMap AbbrevsMap;
// The map of (global) abbreviations already associated with each block.
BlockAbbrevsMapType &BlockAbbrevsMap;
};
bool CandidateAbbrevs::add(unsigned BlockID,
UnrolledAbbreviation &UnrolledAbbrev,
unsigned NumInstances) {
// Drop if it corresponds to an existing global abbreviation.
NaClBitCodeAbbrev *Abbrev = UnrolledAbbrev.restore();
if (BlockAbbrevs* Abbrevs = BlockAbbrevsMap[BlockID]) {
if (Abbrevs->findAbbreviation(Abbrev) !=
BlockAbbrevs::NO_SUCH_ABBREVIATION) {
Abbrev->dropRef();
return false;
}
}
CandBlockAbbrev CandAbbrev(BlockID, Abbrev);
AbbrevCountMap::iterator Pos = AbbrevsMap.find(CandAbbrev);
if (Pos == AbbrevsMap.end()) {
AbbrevsMap[CandAbbrev] = NumInstances;
} else {
Pos->second += NumInstances;
}
return true;
}
// Look for new abbreviations in block BlockID, considering it was
// read with the given abbreviation Abbrev, and considering changing
// the abbreviation opererator for value Index. ValueDist is how
// values at Index are distributed. Any found abbreviations are added
// to the candidate abbreviations CandAbbrevs. Returns true only if we
// have added new candidate abbreviations to CandAbbrevs.
static bool addNewAbbreviations(unsigned BlockID,
const UnrolledAbbreviation &Abbrev,
unsigned Index,
NaClBitcodeValueDist &ValueDist,
CandidateAbbrevs &CandAbbrevs) {
// TODO(kschimpf): Add code to try and find a better encoding for
// the values, based on the distribution.
// If this index is already a literal abbreviation, no improvements can
// be made.
const NaClBitCodeAbbrevOp Op = Abbrev.AbbrevOps[Index];
if (Op.isLiteral()) return false;
// Search based on sorted distribution, which sorts by number of
// instances. Start by trying to find possible constants to use.
const NaClBitcodeDist::Distribution *
Distribution = ValueDist.GetDistribution();
for (NaClBitcodeDist::Distribution::const_iterator
Iter = Distribution->begin(), IterEnd = Distribution->end();
Iter != IterEnd; ++Iter) {
NaClValueRangeType Range = GetNaClValueRange(Iter->second);
if (Range.first != Range.second) continue; // not a constant.
// Defines a constant. Try as new candidate range. In addition,
// don't try any more constant values, since this is the one with
// the greatest number of instances.
NaClBitcodeDistElement *Elmt = ValueDist.at(Range.first);
UnrolledAbbreviation CandAbbrev(Abbrev);
CandAbbrev.AbbrevOps[Index] = NaClBitCodeAbbrevOp(Range.first);
return CandAbbrevs.add(BlockID, CandAbbrev, Elmt->GetNumInstances());
}
return false;
}
// Look for new abbreviations in block BlockID, considering it was
// read with the given abbreviation Abbrev. IndexDist is the
// corresponding distribution of value indices associated with the
// abbreviation. Any found abbreviations are added to the candidate
// abbreviations CandAbbrevs.
static void addNewAbbreviations(
const NaClBitcodeCompressor::CompressFlags &Flags,
unsigned BlockID, const UnrolledAbbreviation &Abbrev,
NaClBitcodeDist &IndexDist, CandidateAbbrevs &CandAbbrevs) {
// Search based on sorted distribution, which sorts based on heuristic
// of best index to fix first.
const NaClBitcodeDist::Distribution *
IndexDistribution = IndexDist.GetDistribution();
for (NaClBitcodeDist::Distribution::const_iterator
IndexIter = IndexDistribution->begin(),
IndexIterEnd = IndexDistribution->end();
IndexIter != IndexIterEnd; ++IndexIter) {
unsigned Index = static_cast<unsigned>(IndexIter->second);
if (addNewAbbreviations(
BlockID, Abbrev, Index,
cast<NaClBitcodeValueIndexDistElement>(IndexDist.at(Index))
->GetValueDist(),
CandAbbrevs)) {
return;
}
}
}
// Look for new abbreviations in block BlockID, considering it was
// read with the given abbreviation Abbrev, and the given record Code.
// SizeDist is the corresponding distribution of sizes associated with
// the abbreviation. Any found abbreviations are added to the
// candidate abbreviations CandAbbrevs.
static void addNewAbbreviations(
const NaClBitcodeCompressor::CompressFlags &Flags,
unsigned BlockID, NaClBitCodeAbbrev *Abbrev, unsigned Code,
NaClBitcodeDist &SizeDist, CandidateAbbrevs &CandAbbrevs) {
const NaClBitcodeDist::Distribution *
SizeDistribution = SizeDist.GetDistribution();
for (NaClBitcodeDist::Distribution::const_iterator
SizeIter = SizeDistribution->begin(),
SizeIterEnd = SizeDistribution->end();
SizeIter != SizeIterEnd; ++SizeIter) {
unsigned Size = static_cast<unsigned>(SizeIter->second);
UnrolledAbbreviation UnrolledAbbrev(Abbrev, Size+1 /* Add code! */,
Size >= NaClValueIndexCutoff);
if (!UnrolledAbbrev.CodeOp.isLiteral()) {
// Try making the code a literal.
UnrolledAbbreviation CandAbbrev(UnrolledAbbrev);
CandAbbrev.CodeOp = NaClBitCodeAbbrevOp(Code);
CandAbbrevs.add(BlockID, CandAbbrev,
SizeDist.at(Size)->GetNumInstances());
}
// Now process value indices to find candidate abbreviations.
addNewAbbreviations(Flags, BlockID, UnrolledAbbrev,
cast<NaClBitcodeSizeDistElement>(SizeDist.at(Size))
->GetValueIndexDist(),
CandAbbrevs);
}
}
// Look for new abbreviations in block BlockID. Abbrevs is the map of
// read (globally defined) abbreviations associated with the
// BlockID. AbbrevDist is the distribution map of abbreviations
// associated with BlockID. Any found abbreviations are added to the
// candidate abbreviations CandAbbrevs.
static void addNewAbbreviations(
const NaClBitcodeCompressor::CompressFlags &Flags, unsigned BlockID,
BlockAbbrevs *Abbrevs, NaClBitcodeDist &AbbrevDist,
CandidateAbbrevs &CandAbbrevs) {
const NaClBitcodeDist::Distribution *
AbbrevDistribution = AbbrevDist.GetDistribution();
for (NaClBitcodeDist::Distribution::const_iterator
AbbrevIter = AbbrevDistribution->begin(),
AbbrevIterEnd = AbbrevDistribution->end();
AbbrevIter != AbbrevIterEnd; ++AbbrevIter) {
NaClBitcodeDistValue AbbrevIndex = AbbrevIter->second;
NaClBitCodeAbbrev *Abbrev = Abbrevs->getIndexedAbbrev(AbbrevIndex);
NaClBitcodeAbbrevDistElement *AbbrevElmt =
cast<NaClBitcodeAbbrevDistElement>(AbbrevDist.at(AbbrevIndex));
NaClBitcodeDist &CodeDist = AbbrevElmt->GetCodeDist();
const NaClBitcodeDist::Distribution *
CodeDistribution = CodeDist.GetDistribution();
for (NaClBitcodeDist::Distribution::const_iterator
CodeIter = CodeDistribution->begin(),
CodeIterEnd = CodeDistribution->end();
CodeIter != CodeIterEnd; ++CodeIter) {
unsigned Code = static_cast<unsigned>(CodeIter->second);
addNewAbbreviations(
Flags, BlockID, Abbrev, Code,
cast<NaClCompressCodeDistElement>(CodeDist.at(CodeIter->second))
->GetSizeDist(),
CandAbbrevs);
}
}
}
typedef std::pair<unsigned, CandBlockAbbrev> CountedAbbrevType;
// Look for new abbreviations in the given block distribution map
// BlockDist. BlockAbbrevsMap contains the set of read global
// abbreviations. Adds found candidate abbreviations to the set of
// known global abbreviations.
static void addNewAbbreviations(
const NaClBitcodeCompressor::CompressFlags &Flags,
NaClBitcodeBlockDist &BlockDist, BlockAbbrevsMapType &BlockAbbrevsMap) {
CandidateAbbrevs CandAbbrevs(BlockAbbrevsMap);
// Start by collecting candidate abbreviations.
const NaClBitcodeDist::Distribution *
Distribution = BlockDist.GetDistribution();
for (NaClBitcodeDist::Distribution::const_iterator
Iter = Distribution->begin(), IterEnd = Distribution->end();
Iter != IterEnd; ++Iter) {
unsigned BlockID = static_cast<unsigned>(Iter->second);
addNewAbbreviations(
Flags, BlockID, BlockAbbrevsMap[BlockID],
cast<NaClCompressBlockDistElement>(BlockDist.at(BlockID))
->GetAbbrevDist(),
CandAbbrevs);
}
// Install candidate abbreviations.
//
// Sort the candidate abbreviations by number of instances, so that
// if multiple abbreviations apply, the one with the largest number
// of instances will be chosen when compressing a file.
//
// For example, we may have just generated two abbreviations. The
// first has replaced the 3rd element with the constant 4 while the
// second replaced the 4th element with the constant 5. The first
// abbreviation can apply to 300 records while the second can apply
// to 1000 records. Assuming that both abbreviations shrink the
// record by the same number of bits, we clearly want the tool to
// choose the second abbreviation when selecting the abbreviation
// index to choose (via method getRecordAbbrevIndex).
//
// Selecting the second is important in that abbreviation are
// refined by successive calls to this tool. We do not want to
// restrict downstream refinements prematurely. Hence, we want the
// tool to choose the abbreviation with the most candidates first.
//
// Since method getRecordAbbrevIndex chooses the first abbreviation
// that generates the least number of bits, we clearly want to make
// sure that the second abbreviation occurs before the first.
std::vector<CountedAbbrevType> Candidates;
for (CandidateAbbrevs::const_iterator
Iter = CandAbbrevs.getAbbrevsMap().begin(),
IterEnd = CandAbbrevs.getAbbrevsMap().end();
Iter != IterEnd; ++Iter) {
Candidates.push_back(CountedAbbrevType(Iter->second,Iter->first));
}
std::sort(Candidates.begin(), Candidates.end());
std::vector<CountedAbbrevType>::const_reverse_iterator
Iter = Candidates.rbegin(), IterEnd = Candidates.rend();
if (Iter == IterEnd) return;
if (Flags.TraceGeneratedAbbreviations) {
errs() << "-- New abbrevations:\n";
}
unsigned Min = (Iter->first >> 2);
for (; Iter != IterEnd; ++Iter) {
if (Iter->first < Min) break;
unsigned BlockID = Iter->second.getBlockID();
BlockAbbrevs *Abbrevs = BlockAbbrevsMap[BlockID];
NaClBitCodeAbbrev *Abbrev = Iter->second.getAbbrev()->Copy();
if (Flags.TraceGeneratedAbbreviations) {
errs() <<format("%12u", Iter->first) << ": ";
printAbbrev(errs(), BlockID, Abbrev);
}
Abbrevs->addAbbreviation(Abbrev);
}
if (Flags.TraceGeneratedAbbreviations) {
errs() << "--\n";
}
}
// Walks the block distribution (BlockDist), sorting entries based
// on the distribution of blocks and abbreviations, and then
// prints out the frequency of each abbreviation used.
static void displayAbbreviationFrequencies(
raw_ostream &Output,
const NaClBitcodeBlockDist &BlockDist,
const BlockAbbrevsMapType &BlockAbbrevsMap) {
const NaClBitcodeDist::Distribution *BlockDistribution =
BlockDist.GetDistribution();
for (NaClBitcodeDist::Distribution::const_iterator
BlockIter = BlockDistribution->begin(),
BlockIterEnd = BlockDistribution->end();
BlockIter != BlockIterEnd; ++BlockIter) {
unsigned BlockID = static_cast<unsigned>(BlockIter->second);
BlockAbbrevsMapType::const_iterator BlockPos = BlockAbbrevsMap.find(BlockID);
if (BlockPos == BlockAbbrevsMap.end()) continue;
Output << "Block " << BlockID << "\n";
if (NaClCompressBlockDistElement *BlockElement =
dyn_cast<NaClCompressBlockDistElement>(BlockDist.at(BlockID))) {
NaClBitcodeDist &AbbrevDist = BlockElement->GetAbbrevDist();
const NaClBitcodeDist::Distribution *AbbrevDistribution =
AbbrevDist.GetDistribution();
unsigned Total = AbbrevDist.GetTotal();
for (NaClBitcodeDist::Distribution::const_iterator
AbbrevIter = AbbrevDistribution->begin(),
AbbrevIterEnd = AbbrevDistribution->end();
AbbrevIter != AbbrevIterEnd; ++AbbrevIter) {
unsigned Index = static_cast<unsigned>(AbbrevIter->second);
unsigned Count = AbbrevDist.at(Index)->GetNumInstances();
Output << format("%8u (%6.2f%%): ", Count,
(double) Count/Total*100.0);
BlockPos->second->getIndexedAbbrev(Index)->Print(Output);
}
}
Output << '\n';
}
}
// Read in bitcode, analyze data, and figure out set of abbreviations
// to use, from memory buffer MemBuf containing the input bitcode file.
// If ShowAbbreviationFrequencies or Flag.ShowValueDistributions are
// defined, the corresponding results is printed to Output.
static bool analyzeBitcode(
const NaClBitcodeCompressor::CompressFlags &Flags,
MemoryBuffer *MemBuf,
raw_ostream &Output,
BlockAbbrevsMapType &BlockAbbrevsMap) {
// TODO(kschimpf): The current code only extracts abbreviations
// defined in the bitcode file. This code needs to be updated to
// collect data distributions and figure out better (global)
// abbreviations to use.
const unsigned char *BufPtr = (const unsigned char *)MemBuf->getBufferStart();
const unsigned char *EndBufPtr = BufPtr+MemBuf->getBufferSize();
// First read header and verify it is good.
NaClBitcodeHeader Header;
if (Header.Read(BufPtr, EndBufPtr) || !Header.IsSupported())
return Error("Invalid PNaCl bitcode header");
// Create a bitstream reader to read the bitcode file.
NaClBitstreamReader StreamFile(BufPtr, EndBufPtr);
NaClBitstreamCursor Stream(StreamFile);
// Parse the the bitcode file.
NaClAnalyzeParser Parser(Flags, Stream, BlockAbbrevsMap);
while (!Stream.AtEndOfStream()) {
if (Parser.Parse()) return true;
}
if (Flags.ShowAbbreviationFrequencies)
displayAbbreviationFrequencies(Output, Parser.BlockDist, BlockAbbrevsMap);
if (Flags.ShowValueDistributions)
Parser.BlockDist.Print(Output);
addNewAbbreviations(Flags, Parser.BlockDist, BlockAbbrevsMap);
return false;
}
/// Models a queue of selected abbreviation indices, for each record
/// in all instances of a given block. Elements are added in the order
/// they appear in the bitcode file.
///
/// The goal is to remove abbreviations that are not really used, from
/// the list of candidate abbrevations, reducing the number of
/// abbreviations needed. This is important because as the number of
/// abbreviations increase, the number of bits needed to store the
/// abbreviations also increase. By removing unnecessary
/// abbreviations, this improves the ability of this executable to
/// compress the bitcode file.
class SelectedAbbrevsQueue {
SelectedAbbrevsQueue(const SelectedAbbrevsQueue &) = delete;
SelectedAbbrevsQueue &operator=(const SelectedAbbrevsQueue &) = delete;
// The minimum number of times an abbreviation must be used in the
// compressed version of the bitcode file, if it is going to be used
// at all.
static const uint32_t MinUsageCount = 5;
public:
SelectedAbbrevsQueue() : AbbrevsIndexQueueFront(0) {}
/// Adds the given selected abbreviation index to the end of the
/// queue.
void addIndex(unsigned Index) { AbbrevsIndexQueue.push_back(Index); }
/// Removes the next selected abbreviation index from the
/// queue.
unsigned removeIndex() {
assert(AbbrevsIndexQueueFront < AbbrevsIndexQueue.size());
return AbbrevsIndexQueue[AbbrevsIndexQueueFront++];
}
/// Takes the abbreviation indices on the queue, determines what
/// subset of abbreviations should be kept, and puts them on the
/// list of abbreviations defined by getKeptAbbrevs. Updates the
/// abbreviation idices on the queue to match the corresponding kept
/// abbreviations.
///
/// Should be called after the last call to AddIndex, and before
/// calling removeIndex.
void installFrequentlyUsedAbbrevs(BlockAbbrevs *Abbrevs);
/// Return the list of kept abbreviations, for the corresponding
/// block, in the order they should be defined.
const std::vector<NaClBitCodeAbbrev *> &getKeptAbbrevs() const {
return KeptAbbrevs;
}
/// Returns the maximum abbreviation index used by the kept
/// abbreviations.
unsigned getMaxKeptAbbrevIndex() const {
return KeptAbbrevs.size() + naclbitc::DEFAULT_MAX_ABBREV;
}
protected:
// Defines a queue of abbreviations indices using a
// vector. AbbrevsIndexQueueFront is used to point to the front of
// the queue.
std::vector<unsigned> AbbrevsIndexQueue;
unsigned AbbrevsIndexQueueFront;
// The list of abbreviations that should be defined for the block,
// in the order they should be defined.
std::vector<NaClBitCodeAbbrev *> KeptAbbrevs;
};
void SelectedAbbrevsQueue::installFrequentlyUsedAbbrevs(BlockAbbrevs *Abbrevs) {
// Start by collecting usage counts for each abbreviation.
assert(AbbrevsIndexQueueFront == 0);
assert(KeptAbbrevs.empty());
std::map<unsigned, uint32_t> UsageMap;
for (unsigned Index : AbbrevsIndexQueue) {
if (Index != naclbitc::UNABBREV_RECORD)
++UsageMap[Index];
}
// Install results
std::map<unsigned, unsigned> KeepIndexMap;
for (const std::pair<unsigned, uint32_t> &Pair : UsageMap) {
if (Pair.second >= MinUsageCount) {
KeepIndexMap[Pair.first] =
KeptAbbrevs.size() + naclbitc::FIRST_APPLICATION_ABBREV;
KeptAbbrevs.push_back(Abbrevs->getIndexedAbbrev(Pair.first));
}
}
// Update the queue of selected abbreviation indices to match kept
// abbreviations.
for (unsigned &AbbrevIndex : AbbrevsIndexQueue) {
std::map<unsigned, unsigned>::const_iterator NewPos =
KeepIndexMap.find(AbbrevIndex);
AbbrevIndex = NewPos == KeepIndexMap.end() ? naclbitc::UNABBREV_RECORD
: NewPos->second;
}
}
/// Models the kept queue of abbreviations associated with each block ID.
typedef std::map<unsigned, SelectedAbbrevsQueue *> BlockAbbrevsQueueMap;
typedef std::pair<unsigned, SelectedAbbrevsQueue *> BlockAbbrevsQueueMapElmt;
/// Installs frequently used abbreviations for each of the blocks
/// defined in AbbrevsQueueMap, based on the abbreviations in the
/// corresponding AbbrevsMap
static void
installFrequentlyUsedAbbrevs(BlockAbbrevsMapType &AbbrevsMap,
BlockAbbrevsQueueMap &AbbrevsQueueMap) {
for (const BlockAbbrevsQueueMapElmt &Pair : AbbrevsQueueMap) {
if (SelectedAbbrevsQueue *SelectedAbbrevs = Pair.second)
SelectedAbbrevs->installFrequentlyUsedAbbrevs(AbbrevsMap[Pair.first]);
}
}
/// Top level parser to queue selected abbreviation indices (within
/// the bitcode file), so that we can remove unused abbreviations from
/// the collected list of abbreviations before generating the final,
/// compressed bitcode file.
class NaClAssignAbbrevsParser : public NaClBitcodeParser {
public:
NaClAssignAbbrevsParser(NaClBitstreamCursor &Cursor,
BlockAbbrevsMapType &AbbrevsMap,
BlockAbbrevsQueueMap &AbbrevsQueueMap)
: NaClBitcodeParser(Cursor), AbbrevsMap(AbbrevsMap),
AbbrevsQueueMap(AbbrevsQueueMap) {}
~NaClAssignAbbrevsParser() final {}
bool ParseBlock(unsigned BlockID) final;
/// The abbreviations to use for the compressed bitcode.
BlockAbbrevsMapType &AbbrevsMap;
/// The corresponding selected abbreviation indices to for each
/// block.
BlockAbbrevsQueueMap &AbbrevsQueueMap;
};
/// Block parser to queue abbreviation indices used by the records in
/// the block.
class NaClAssignAbbrevsBlockParser : public NaClBitcodeParser {
public:
NaClAssignAbbrevsBlockParser(unsigned BlockID,
NaClAssignAbbrevsParser *Context)
: NaClBitcodeParser(BlockID, Context), BlockID(BlockID),
Context(Context) {
init();
}
~NaClAssignAbbrevsBlockParser() final {}
protected:
unsigned BlockID;
NaClAssignAbbrevsParser *Context;
// Cached abbreviations defined for this block,.
BlockAbbrevs *Abbreviations;
// Cached abbreviations queue to add abbreviation indices to.
SelectedAbbrevsQueue *Queue;
NaClAssignAbbrevsBlockParser(unsigned BlockID,
NaClAssignAbbrevsBlockParser *EnclosingParser)
: NaClBitcodeParser(BlockID, EnclosingParser), BlockID(BlockID),
Context(EnclosingParser->Context) {
init();
}
void init() {
Abbreviations = getAbbrevs(Context->AbbrevsMap, BlockID);
Queue = Context->AbbrevsQueueMap[BlockID];
if (Queue == nullptr) {
Queue = new SelectedAbbrevsQueue();
Context->AbbrevsQueueMap[BlockID] = Queue;
}
}
bool ParseBlock(unsigned BlockID) final {
NaClAssignAbbrevsBlockParser Parser(BlockID, this);
return Parser.ParseThisBlock();
}
void ProcessRecord() final {
// Find best fitting abbreviation to use, and save.
Queue->addIndex(
Abbreviations->getRecordAbbrevIndex(Record.GetRecordData()));
}
};
bool NaClAssignAbbrevsParser::ParseBlock(unsigned BlockID) {
NaClAssignAbbrevsBlockParser Parser(BlockID, this);
return Parser.ParseThisBlock();
}
// Reads the bitcode in MemBuf, using the abbreviations in AbbrevsMap,
// and queues the selected abbrevations for each record into
// AbbrevsQueueMap.
static bool chooseAbbrevs(MemoryBuffer *MemBuf, BlockAbbrevsMapType &AbbrevsMap,
BlockAbbrevsQueueMap &AbbrevsQueueMap) {
const unsigned char *BufPtr = (const unsigned char *)MemBuf->getBufferStart();
const unsigned char *EndBufPtr = BufPtr + MemBuf->getBufferSize();
// Read header. No verification is needed since AnalyzeBitcode has
// already checked it.
NaClBitcodeHeader Header;
if (Header.Read(BufPtr, EndBufPtr))
return Error("Invalid PNaCl bitcode header");
// Create the bitcode reader.
NaClBitstreamReader StreamFile(BufPtr, EndBufPtr);
NaClBitstreamCursor Stream(StreamFile);
// Set up the parser.
NaClAssignAbbrevsParser Parser(Stream, AbbrevsMap, AbbrevsQueueMap);
// Parse the bitcode and choose abbreviations for records.
while (!Stream.AtEndOfStream()) {
if (Parser.Parse()) {
installFrequentlyUsedAbbrevs(AbbrevsMap, AbbrevsQueueMap);
return true;
}
}
installFrequentlyUsedAbbrevs(AbbrevsMap, AbbrevsQueueMap);
return false;
}
/// Parses the input bitcode file and generates the corresponding
/// compressed bitcode file, by replacing abbreviations in the input
/// file with the corresponding abbreviations defined in
/// BlockAbbrevsMap using the selected abbreviations in AbbrevsQueueMap.
class NaClBitcodeCopyParser : public NaClBitcodeParser {
public:
// Top-level constructor to build the appropriate block parser
// using the given BlockAbbrevsMap to define abbreviations.
NaClBitcodeCopyParser(const NaClBitcodeCompressor::CompressFlags &Flags,
NaClBitstreamCursor &Cursor,
BlockAbbrevsMapType &BlockAbbrevsMap,
BlockAbbrevsQueueMap &AbbrevsQueueMap,
NaClBitstreamWriter &Writer)
: NaClBitcodeParser(Cursor), Flags(Flags),
BlockAbbrevsMap(BlockAbbrevsMap), AbbrevsQueueMap(AbbrevsQueueMap),
Writer(Writer) {}
virtual ~NaClBitcodeCopyParser() {}
bool ParseBlock(unsigned BlockID) final;
const NaClBitcodeCompressor::CompressFlags &Flags;
// The abbreviations to use for the copied bitcode.
BlockAbbrevsMapType &BlockAbbrevsMap;
// Map defining the selected abbreviations for each block.
BlockAbbrevsQueueMap &AbbrevsQueueMap;
// The bitstream to copy the compressed bitcode into.
NaClBitstreamWriter &Writer;
};
class NaClBlockCopyParser : public NaClBitcodeParser {
public:
// Top-level constructor to build the appropriate block parser.
NaClBlockCopyParser(unsigned BlockID, NaClBitcodeCopyParser *Context)
: NaClBitcodeParser(BlockID, Context), Context(Context),
BlockAbbreviations(nullptr), SelectedAbbrevs(nullptr) {
init();
}
virtual ~NaClBlockCopyParser() {}
protected:
// The context defining state associated with the block parser.
NaClBitcodeCopyParser *Context;
// The block abbreviations defined for this block (initialized by
// EnterBlock).
BlockAbbrevs *BlockAbbreviations;
// The corresonding selected abbreviations.
SelectedAbbrevsQueue *SelectedAbbrevs;
/// Constructor to parse nested blocks. Creates a block parser to
/// parse in a block with the given BlockID, and write the block
/// back out using the abbreviations in BlockAbbrevsMap.
NaClBlockCopyParser(unsigned BlockID,
NaClBlockCopyParser *EnclosingParser)
: NaClBitcodeParser(BlockID, EnclosingParser),
Context(EnclosingParser->Context), BlockAbbreviations(nullptr),
SelectedAbbrevs(nullptr) {
init();
}
void init() {
unsigned BlockID = GetBlockID();
BlockAbbreviations = getGlobalAbbrevs(BlockID);
SelectedAbbrevs = Context->AbbrevsQueueMap[BlockID];
assert(SelectedAbbrevs);
}
/// Returns the set of (global) block abbreviations defined for the
/// given block ID.
BlockAbbrevs *getGlobalAbbrevs(unsigned BlockID) {
return getAbbrevs(Context->BlockAbbrevsMap, BlockID);
}
virtual bool ParseBlock(unsigned BlockID) {
NaClBlockCopyParser Parser(BlockID, this);
return Parser.ParseThisBlock();
}
virtual void EnterBlock(unsigned NumWords) {
// Enter the subblock.
NaClBitcodeSelectorAbbrev Selector(
SelectedAbbrevs->getMaxKeptAbbrevIndex());
if (Context->Flags.RemoveAbbreviations)
Selector = NaClBitcodeSelectorAbbrev();
unsigned BlockID = GetBlockID();
Context->Writer.EnterSubblock(BlockID, Selector);
if (BlockID != naclbitc::MODULE_BLOCK_ID
|| Context->Flags.RemoveAbbreviations)
return;
// To keep things simple, we dump all abbreviations immediately
// inside the module block. Start by dumping module abbreviations
// as local abbreviations.
for (auto Abbrev : SelectedAbbrevs->getKeptAbbrevs()) {
Context->Writer.EmitAbbrev(Abbrev->Copy());
}
// Insert the block info block, if needed, so that nested blocks
// will have defined abbreviations.
bool HasNonmoduleAbbrevs = false;
for (const BlockAbbrevsQueueMapElmt &Pair : Context->AbbrevsQueueMap) {
if (Pair.second->getKeptAbbrevs().empty())
continue;
HasNonmoduleAbbrevs = true;
break;
}
if (!HasNonmoduleAbbrevs)
return;
Context->Writer.EnterBlockInfoBlock();
for (const BlockAbbrevsMapElmtType &Pair : Context->BlockAbbrevsMap) {
unsigned BlockID = Pair.first;
// Don't emit module abbreviations, since they have been
// emitted as local abbreviations.
if (BlockID == naclbitc::MODULE_BLOCK_ID)
continue;
BlockAbbrevs *Abbrevs = Pair.second;
if (Abbrevs == nullptr)
continue;
if (SelectedAbbrevsQueue *SelectedAbbrevs =
Context->AbbrevsQueueMap[BlockID]) {
for (auto Abbrev : SelectedAbbrevs->getKeptAbbrevs()) {
Context->Writer.EmitBlockInfoAbbrev(BlockID, Abbrev->Copy());
}
}
}
Context->Writer.ExitBlock();
}
virtual void ExitBlock() {
Context->Writer.ExitBlock();
}
virtual void ProcessRecord() {
const NaClBitcodeRecord::RecordVector &Values = Record.GetValues();
if (Context->Flags.RemoveAbbreviations) {
Context->Writer.EmitRecord(Record.GetCode(), Values, 0);
return;
}
// Find best fitting abbreviation to use, and print out the record
// using that abbreviation.
unsigned AbbrevIndex = SelectedAbbrevs->removeIndex();
if (AbbrevIndex == naclbitc::UNABBREV_RECORD) {
Context->Writer.EmitRecord(Record.GetCode(), Values, 0);
} else {
Context->Writer.EmitRecord(Record.GetCode(), Values, AbbrevIndex);
}
}
};
bool NaClBitcodeCopyParser::ParseBlock(unsigned BlockID) {
NaClBlockCopyParser Parser(BlockID, this);
return Parser.ParseThisBlock();
}
// Read in bitcode, and write it back out using the abbreviations in
// BlockAbbrevsMap, from memory buffer MemBuf containing the input
// bitcode file. The bitcode is copied to Output.
static bool copyBitcode(const NaClBitcodeCompressor::CompressFlags &Flags,
MemoryBuffer *MemBuf, raw_ostream &Output,
BlockAbbrevsMapType &BlockAbbrevsMap,
BlockAbbrevsQueueMap &AbbrevsQueueMap) {
const unsigned char *BufPtr = (const unsigned char *)MemBuf->getBufferStart();
const unsigned char *EndBufPtr = BufPtr + MemBuf->getBufferSize();
// Read header. No verification is needed since AnalyzeBitcode has
// already checked it.
NaClBitcodeHeader Header;
if (Header.Read(BufPtr, EndBufPtr))
return Error("Invalid PNaCl bitcode header");
// Create the bitcode reader.
NaClBitstreamReader StreamFile(BufPtr, EndBufPtr);
NaClBitstreamCursor Stream(StreamFile);
// Create the bitcode writer.
SmallVector<char, 0> OutputBuffer;
OutputBuffer.reserve(256 * 1024);
NaClBitstreamWriter StreamWriter(OutputBuffer);
// Emit the file header.
NaClWriteHeader(Header, StreamWriter);
// Set up the parser.
NaClBitcodeCopyParser Parser(Flags, Stream, BlockAbbrevsMap, AbbrevsQueueMap,
StreamWriter);
// Parse the bitcode and copy.
while (!Stream.AtEndOfStream()) {
if (Parser.Parse())
return true;
}
// Write out the copied results.
Output.write((char *)&OutputBuffer.front(), OutputBuffer.size());
return false;
}
// Build fast lookup abbreviation maps for each of the abbreviation blocks
// defined in AbbrevsMap.
static void buildAbbrevLookupMaps(
const NaClBitcodeCompressor::CompressFlags &Flags,
BlockAbbrevsMapType &AbbrevsMap) {
for (BlockAbbrevsMapType::const_iterator
Iter = AbbrevsMap.begin(),
IterEnd = AbbrevsMap.end();
Iter != IterEnd; ++Iter) {
Iter->second->buildAbbrevLookupSizeMap(Flags);
}
}
} // namespace
bool NaClBitcodeCompressor::analyze(MemoryBuffer *MemBuf, raw_ostream &Output) {
BlockAbbrevsMapType BlockAbbrevsMap;
return !analyzeBitcode(Flags, MemBuf, Output, BlockAbbrevsMap);
}
bool NaClBitcodeCompressor::compress(MemoryBuffer *MemBuf,
raw_ostream &BitcodeOutput,
raw_ostream &ShowOutput) {
BlockAbbrevsMapType BlockAbbrevsMap;
if (analyzeBitcode(Flags, MemBuf, ShowOutput, BlockAbbrevsMap))
return false;
buildAbbrevLookupMaps(Flags, BlockAbbrevsMap);
BlockAbbrevsQueueMap AbbrevsQueueMap;
bool Result = true;
if (chooseAbbrevs(MemBuf, BlockAbbrevsMap, AbbrevsQueueMap))
Result = false;
else if (copyBitcode(Flags, MemBuf, BitcodeOutput, BlockAbbrevsMap,
AbbrevsQueueMap))
Result = false;
DeleteContainerSeconds(AbbrevsQueueMap);
return Result;
}