diff --git a/basilmill/basildocs.mill b/basilmill/basildocs.mill index a6bc1abf3..a98901f7c 100644 --- a/basilmill/basildocs.mill +++ b/basilmill/basildocs.mill @@ -48,9 +48,9 @@ trait BasilDocs extends ScalaModule { def scalaDocExternalMappingOptions = Task { val defaultExternals = Seq( - ".*scala/.*::scaladoc3::https://scala-lang.org/api/3.3_LTS/", - "java/.*::javadoc::https://docs.oracle.com/en/java/javase/17/docs/api/java.base/", - ".*com/google/protobuf.*::javadoc::https://protobuf.dev/reference/java/api-docs/", + ".*/scala/.*::scaladoc3::https://scala-lang.org/api/3.3_LTS/", + ".*/java/.*::javadoc::https://docs.oracle.com/en/java/javase/17/docs/api/java.base/", + ".*/com/google/protobuf.*::javadoc::https://protobuf.dev/reference/java/api-docs/", ) val externals = defaultExternals ++ docsRegexes().map { case (path, regex) => s"$regex::$baseUrl/$path" diff --git a/src/main/scala/Main.scala b/src/main/scala/Main.scala index 25af89725..bba6fb892 100644 --- a/src/main/scala/Main.scala +++ b/src/main/scala/Main.scala @@ -10,14 +10,17 @@ import util.{ BoogieMemoryAccessMode, DSAConfig, DebugDumpIRLogger, + FrontendMode, ILLoadingConfig, + IRLoading, LogLevel, Logger, MemoryRegionsMode, PCTrackingOption, ProcRelyVersion, RunUtils, - StaticAnalysisConfig + StaticAnalysisConfig, + writeToFile } import scala.language.postfixOps @@ -144,6 +147,12 @@ object Main { doc = "Switch version of procedure rely/guarantee checks to emit. (function|ifblock)" ) procedureRG: Option[String], + @arg( + name = "gts-relf", + doc = + "Use .gts file for obtaining ELF symbol information (overrides --relf) (defaults to true if using GTIRB input and no --relf)" + ) + useGTIRBReadELF: Flag, @arg(name = "verbose", short = 'v', doc = "Show extra debugging logs (the same as -vl log)") verbose: Flag, @arg( @@ -157,6 +166,8 @@ object Main { interpret: Flag, @arg(name = "dump-il", doc = "Dump the Intermediate Language to text.") dumpIL: Option[String], + @arg(name = "dump-relf", doc = "Dump Basil's representation of the readelf information to the given file and exit.") + dumpRelf: Option[String], @arg(name = "main-procedure-name", short = 'm', doc = "Name of the main procedure to begin analysis at.") mainProcedureName: String = "main", @arg( @@ -335,7 +346,7 @@ object Main { val boogieGeneratorConfig = BoogieGeneratorConfig(boogieMemoryAccessMode, true, rely, conf.threadSplit.value, conf.noif.value) - val loadingInputs = if (conf.bapInputDirName.isDefined) then { + var loadingInputs = if (conf.bapInputDirName.isDefined) then { loadDirectory(ChooseInput.Bap, conf.bapInputDirName.get) } else if (conf.gtirbInputDirName.isDefined) then { @@ -350,6 +361,56 @@ object Main { ) } + val isGTIRB = loadingInputs.frontendMode == FrontendMode.Gtirb + + // NOTE: --dump-relf ignores --gts-relf, to ensure that the output ELF files are correctly named + conf.dumpRelf match { + case None => () + case Some(relfOut) => + + val gtirbRelfFile = Some(loadingInputs.inputFile).filter(_ => isGTIRB) + val realRelfFile = loadingInputs.relfFile + + Logger.setLevel(LogLevel.DEBUG) + val (relf, gtirb) = (realRelfFile, gtirbRelfFile) match { + case (Some(relfFile), _) => + val (a, b) = IRLoading.loadReadELFWithGTIRB(relfFile, loadingInputs) + (Some(a), b) + case (None, Some(_)) => (None, Some(IRLoading.loadGTIRBReadELF(loadingInputs))) + case _ => throw IllegalArgumentException("--dump-relf requires either --relf or a GTIRB input") + } + + // skip writing files if the given path is an empty string. this checks compatibility and exits. + if (relfOut.trim.isEmpty) + return + + relf match { + case Some(relf) => + writeToFile( + relf.sorted.toScala + .replace("@GLIBC_2.17", "") + .replace("@GLIBC_2.38", "") + .replace("@GLIBC_2.34", ""), + relfOut + "-readelf.scala" + ) + case None => Logger.warn(s"Failed to load .relf information, $relfOut-readelf.scala not written") + } + gtirb match { + case Some(relf) => writeToFile(relf.sorted.toScala, relfOut + "-gtsrelf.scala") + case None => Logger.warn(s"Failed to load GTIRB information, $relfOut-gtsrelf.scala not written") + } + return + } + + // patch in gtirb-as-relf if directed or if relf is omitted but we are using gtirb. + // NOTE: this must be done early, because lots of later places make checks about loadingInputs.relfFile. + if (conf.useGTIRBReadELF.value || (isGTIRB && loadingInputs.relfFile.isEmpty)) { + if (!isGTIRB) { + throw IllegalArgumentException("--gts-relf requires a GTIRB input") + } + loadingInputs = loadingInputs.copy(relfFile = Some(loadingInputs.inputFile)) + } + if (loadingInputs.specFile.isDefined && loadingInputs.relfFile.isEmpty) { throw IllegalArgumentException("--spec requires --relf") } diff --git a/src/main/scala/boogie/BExpr.scala b/src/main/scala/boogie/BExpr.scala index 471b48e6f..701fca508 100644 --- a/src/main/scala/boogie/BExpr.scala +++ b/src/main/scala/boogie/BExpr.scala @@ -1,5 +1,6 @@ package boogie import ir.* +import ir.dsl.given import specification.* import util.assertion.* @@ -788,7 +789,7 @@ case class SpecGlobal( arraySize: Option[Int], override val address: BigInt ) extends SymbolTableEntry, - SpecGlobalOrAccess { + SpecGlobalOrAccess derives ir.dsl.ToScala { override def specGlobals: Set[SpecGlobalOrAccess] = Set(this) def sanitisedName = util.StringEscape.escape(name) diff --git a/src/main/scala/gtirb/AuxDecoder.scala b/src/main/scala/gtirb/AuxDecoder.scala index cd478537c..1cf6ac8ab 100644 --- a/src/main/scala/gtirb/AuxDecoder.scala +++ b/src/main/scala/gtirb/AuxDecoder.scala @@ -29,8 +29,9 @@ object AuxDecoder { /** * [[AuxKind]] provides pre-defined decoders for some official AuxData fields. An [[AuxKind]] can be - * passed to [[decodeAux]] to automatically extract and decode the given AuxData from a GTIRB - * [[com.grammatech.gtirb.proto.Module.Module]]. + * passed to [[decodeAux]] to automatically extract and decode the given AuxData from a GTIRB [[com.grammatech.gtirb.proto.Module.Module]]. + * See the [Standard AuxData Schemata](https://grammatech.github.io/gtirb/md__aux_data.html) for a list of official AuxData fields + * and their types. */ enum AuxKind[T](val name: String, val decoder: Decoder[T]) { case ElfSymbolTabIdxInfo @@ -40,13 +41,16 @@ object AuxDecoder { "elfSymbolInfo", readMap(readUuid, readTuple(readUint(64), readString, readString, readString, readUint(64))) ) + case SectionProperties + extends AuxKind("sectionProperties", readMap(readUuid, readTuple(readUint(64), readUint(64)))) case FunctionEntries extends AuxKind("functionEntries", readMap(readUuid, readSet(readUuid))) case FunctionBlocks extends AuxKind("functionBlocks", readMap(readUuid, readSet(readUuid))) case FunctionNames extends AuxKind("functionNames", readMap(readUuid, readUuid)) + case SymbolForwarding extends AuxKind("symbolForwarding", readMap(readUuid, readUuid)) } type Input = ByteArrayInputStream - type Decoder[T] = Input => T + type Decoder[T] = ByteArrayInputStream => T def decodeAux[T](known: AuxKind[T])(mod: Module) = decode(known.decoder)(mod.auxData(known.name)) @@ -145,6 +149,17 @@ object AuxDecoder { val x6 = r6(bs) (x1, x2, x3, x4, x5, x6) + // type ReadTuple[T <: Tuple] <: Tuple = T match + // case Reader[out] *: rest => out *: ReadTuple[rest] + // case EmptyTuple => EmptyTuple + // + // inline def readTuple[T <: Tuple](xs: T)(bs: Input): ReadTuple[T] = + // inline xs match + // case xs: (Reader[o] *: rest) => + // xs match + // case h *: t => h(bs) *: readTuple[rest](t)(bs) + // case _: EmptyTuple => EmptyTuple + def readUuid(bs: Input) = // ByteString.copyFrom(readBytes(16)(bs)) Base64.getEncoder().encodeToString(readBytes(16)(bs)) diff --git a/src/main/scala/gtirb/GTIRBReadELF.scala b/src/main/scala/gtirb/GTIRBReadELF.scala new file mode 100644 index 000000000..73abb0c79 --- /dev/null +++ b/src/main/scala/gtirb/GTIRBReadELF.scala @@ -0,0 +1,275 @@ +package gtirb + +import boogie.SpecGlobal +import com.google.protobuf.ByteString +import com.grammatech.gtirb.proto.CFG.EdgeType.* +import specification.{ExternalFunction, FuncEntry} +import translating.{ELFBind, ELFNDX, ELFSymType, ELFSymbol, ELFVis, ReadELFData} +import util.Logger + +import java.io.ByteArrayInputStream +import scala.util.chaining.scalaUtilChainingOps + +/** + * Responsible for interpreting the GTIRB's symbol information + * and producing ELF information in a format matching [[translating.ReadELFLoader]]. + * + * **Useful links:** + * + * - Full ELF64 specification, useful for symbol kinds/visibility/binding: https://irix7.com/techpubs/007-4658-001.pdf + * - Full ELF32 specification: https://refspecs.linuxfoundation.org/elf/elf.pdf + * - ELF relocation specification, for relocation struct definition: https://refspecs.linuxbase.org/elf/gabi4+/ch4.reloc.html + * - Aarch64 ELF supplement, for relocation types: https://github.com/ARM-software/abi-aa/blob/main/aaelf64/aaelf64.rst#relocation-types + * - An ELF cheatsheet: https://gist.github.com/x0nu11byt3/bcb35c3de461e5fb66173071a2379779 + * - elf man page, extra details: https://www.man7.org/linux/man-pages/man5/elf.5.html + * + */ +class GTIRBReadELF(protected val gtirb: GTIRBResolver) { + + /** + * An `Elf64_Rela` structure, as described by the [System V ABI](https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.reloc.html). + * The three fields `r_offset`, `r_info`, and `r_addend` are as described in the struct. + * The last two fields, `r_sym` and `r_type`, are extracted from the `r_info` value. + * + * The [ABI supplement for AArch64](https://github.com/ARM-software/abi-aa/blob/main/aaelf64/aaelf64.rst#relocation-types) + * provides information about the interpretation of the `r_type` values. + */ + case class Elf64Rela(r_offset: BigInt, r_info: BigInt, r_addend: BigInt, r_sym: Long, r_type: Long) + + /** + * An Aarch64 relocation type, with constants from: + * https://github.com/ARM-software/abi-aa/blob/main/aaelf64/aaelf64.rst#relocation-types + */ + sealed trait Elf64RelaType(val value: Long) + case object R_AARCH64_COPY extends Elf64RelaType(1024) + case object R_AARCH64_GLOB_DAT extends Elf64RelaType(1025) + case object R_AARCH64_JUMP_SLOT extends Elf64RelaType(1026) + case object R_AARCH64_RELATIVE extends Elf64RelaType(1027) + + protected def readRela(bs: AuxDecoder.Input) = + import AuxDecoder.* + val (r_offset, r_info, r_addend) = readTuple(readUint(64), readUint(64), readUint(64))(bs) + val r_sym = r_info >> 32 + val r_type = r_info & 0xffffffffL + Elf64Rela(r_offset, r_info, r_addend, r_sym.toLong, r_type.toLong) + + protected def parseRelaTab(bstr: ByteString) = + val bs = ByteArrayInputStream(bstr.toByteArray) + List.unfold(bs) { + case bs if bs.available() > 0 => Some(readRela(bs), bs) + case _ => None + } + + /** + * Parsers an Aarch64 relocation type integer. See [[Elf64RelaType]] for constants. + */ + protected def parseAarch64RelaType(x: Long) = x match { + case R_AARCH64_COPY.value => R_AARCH64_COPY + case R_AARCH64_GLOB_DAT.value => R_AARCH64_GLOB_DAT + case R_AARCH64_JUMP_SLOT.value => R_AARCH64_JUMP_SLOT + case R_AARCH64_RELATIVE.value => R_AARCH64_RELATIVE + } + + /** + * https://refspecs.linuxfoundation.org/elf/elf.pdf. + * Figure 1-7. Special Section Indexes + */ + protected def parseElfNdx(n: BigInt) = n.toInt match { + case 0 => ELFNDX.UND + case 0xfff1 => ELFNDX.ABS + case i => + if (i >= 0xff00) + Logger.warn("unhandled special elf section index: " + i) + ELFNDX.Section(i) + } + + def parseRela(kind: R_AARCH64_JUMP_SLOT.type | R_AARCH64_GLOB_DAT.type, rela: Elf64Rela): ExternalFunction = + val sym = gtirb.getDynSym(rela.r_sym.toInt).get + ExternalFunction(sym.name, rela.r_offset) + + def parseRela(kind: R_AARCH64_RELATIVE.type, rela: Elf64Rela): (BigInt, BigInt) = + (rela.r_offset, rela.r_addend) + + def parseRela(kind: R_AARCH64_COPY.type, rela: Elf64Rela): gtirb.SymbolRef = + gtirb.getDynSym(rela.r_sym.toInt) + + def getAllSymbols(): List[ELFSymbol] = { + val normalsyms = gtirb.symbolEntriesByUuid.view + .flatMap { case (k, pos) => + val sym = k.get + + val idx = k.symTabIdx.collectFirst { case (".symtab", i) => + i.toInt + } + + val addr = k.getReferentAddress + val value = k.getScalarValue + val combinedValue = addr.orElse(value).getOrElse(0L) + + val (size, ty, bind, vis, shndx) = k.symEntry + + val name = sym.name + + (ty, idx) match { + case ("NONE", _) => None + case (_, None) => None + case (ty, Some(idx)) => + Some( + ELFSymbol( + idx, + combinedValue, + size.toInt, + ELFSymType.valueOf(ty), + ELFBind.valueOf(bind), + ELFVis.valueOf(vis), + parseElfNdx(shndx), + name + ) + ) + } + } + + val sectionsyms = gtirb.mod.sections.view.zipWithIndex.map { case (sec, i) => + val addr = sec.byteIntervals.head.address + val num = i + 1 + + ELFSymbol(num, addr, 0, ELFSymType.SECTION, ELFBind.LOCAL, ELFVis.DEFAULT, ELFNDX.Section(num), sec.name) + } + + (normalsyms ++ sectionsyms).toList + .sortBy(x => x.num) + } + + def getRelocations(): (Map[BigInt, BigInt], Set[ExternalFunction]) = { + def getSectionBytes(sectionName: String) = + gtirb.sectionsByName(sectionName).byteIntervals.head.contents + + val relaDyns = getSectionBytes(".rela.dyn").pipe(parseRelaTab) + val relaPlts = getSectionBytes(".rela.plt").pipe(parseRelaTab) + + val relas = (relaDyns ++ relaPlts) + .groupBy(x => parseAarch64RelaType(x.r_type)) + .withDefaultValue(Nil) + + val offs = relas(R_AARCH64_RELATIVE).map(parseRela(R_AARCH64_RELATIVE, _)) + val exts = (relas(R_AARCH64_GLOB_DAT) ++ relas(R_AARCH64_JUMP_SLOT)).map(parseRela(R_AARCH64_JUMP_SLOT, _)) + + (offs.toMap, exts.toSet) + } + + def getGlobals(): Set[SpecGlobal] = + gtirb.symbolEntriesByUuid.view.flatMap { + case (symid, (size, "OBJECT", "GLOBAL", "DEFAULT", idx)) => + + val referentid = symid.getReferentUuid.get + val referent: Option[gtirb.BlockData] = referentid.getOption + referent match { + case Some(blk) => + Some(SpecGlobal(symid.get.name, (size * 8).toInt, None, blk.address)) + + // if the referent is not a real block, then this is a + // forwarding target symbol. discard, because we generate + // the SpecGlobal from the forwarding source symbol. + case None => + assert( + gtirb.symbolForwardingInverse.contains(symid), + "a symbol with a referent that has no data block should be a forwarding target" + ) + None + } + case _ => None + }.toSet + + def getFunctionEntries(): Set[FuncEntry] = + gtirb.symbolEntriesByUuid.view.collect { + case (symid, (size, "FUNC", "GLOBAL", "DEFAULT", idx)) if idx != 0 => + + val nameSymbol = symid.get + val funcUuid = symid.getFunction.get + val entries = funcUuid.getEntries + + assert(entries.size == 1, "function with non-singular entry") + val entry = entries.head + val addr = entry.get.address + + FuncEntry(nameSymbol.name, (size * 8).toInt, addr) + }.toSet + + def getMainAddress(mainProcedureName: String): BigInt = + gtirb.symbolsByName(mainProcedureName).getReferentUuid.get.get.address + + def getReadELFData(mainProcedureName: String): ReadELFData = { + + val (offs, exts) = getRelocations() + val syms = getAllSymbols() + val globs = getGlobals() + val funs = getFunctionEntries() + val main = getMainAddress(mainProcedureName) + + ReadELFData(syms, exts, globs, funs, offs, main) + } + +} + +object GTIRBReadELF { + + private val atSuffix = """@[A-Za-z_\d.]+$""".r + + /** + * Strips away some information from [[translating.ReadELFData]] + * which is not so important and causes spurious mismatches between the two + * ELF loaders. + * + * For example, this throws away symbols of type SECTION and symbols beginning with `$`. + * It also strips the `@GLIBC_XX.X` suffix from symbol names. + */ + def normaliseRelf(relf: ReadELFData) = { + val exts = relf.externalFunctions.map(x => x.copy(name = atSuffix.replaceFirstIn(x.name, ""))) + val syms = relf.symbolTable.flatMap { + case ELFSymbol(_, 0, 0, ELFSymType.FILE, ELFBind.LOCAL, ELFVis.DEFAULT, ELFNDX.ABS, "crtstuff.c") => None + case sym if sym.etype != ELFSymType.SECTION && sym.num != -1 && !sym.name.startsWith("$") => + Some(sym.copy(name = atSuffix.replaceFirstIn(sym.name, ""))) + case _ => None + } + val globs = relf.globalVariables.map { x => + x.copy(name = atSuffix.replaceFirstIn(x.name, "")) + } + + relf.copy(externalFunctions = exts, symbolTable = syms, globalVariables = globs) + } + + /** + * Determines whether the current ReadELFData is compatible with + * a given reference ReadELFData. That is, whether the two ELF datas are + * equivalent when normalised ([[normalisedRelf]]). + */ + def checkReadELFCompatibility(gtirbRelf: ReadELFData, referenceRelf: ReadELFData): Boolean = { + var ok = true + + inline def check(b: Boolean, s: String) = { + if (!b) { + Logger.warn("PLEASE REPORT THIS ISSUE! include the gts and relf files. gtirb relf discrepancy, " + s) + ok = false + } + } + + inline def checkSet[T](x: Set[T], y: Set[T], s: String) = + check(x == y, s"$s:\ngtirb - relf = ${x -- y}\nrelf - gtirb = ${y -- x}") + + inline def checkEq(x: Any, y: Any, s: String) = + check(x == y, s"$s: gtirb: $x, readelf: $y}") + + val g = normaliseRelf(gtirbRelf) + val o = normaliseRelf(referenceRelf) + checkEq(g.mainAddress, o.mainAddress, "main address differs") + checkSet(g.functionEntries, o.functionEntries, "function entries differ") + checkSet(g.relocationOffsets.toSet, o.relocationOffsets.toSet, "relocations differ") + checkSet(g.globalVariables, o.globalVariables, "global variables differ") + checkSet(g.externalFunctions, o.externalFunctions, "external functions differ") + checkSet(g.symbolTable.toSet, o.symbolTable.toSet, "symbol tables differ") + + Logger.debug("gtirb relf and readelf relf compatible: " + ok) + ok + } + +} diff --git a/src/main/scala/gtirb/GTIRBResolver.scala b/src/main/scala/gtirb/GTIRBResolver.scala new file mode 100644 index 000000000..35062b786 --- /dev/null +++ b/src/main/scala/gtirb/GTIRBResolver.scala @@ -0,0 +1,215 @@ +package gtirb + +import com.google.protobuf.ByteString +import com.grammatech.gtirb.proto.ByteInterval.{Block, ByteInterval} +import com.grammatech.gtirb.proto.CFG.EdgeType.* +import com.grammatech.gtirb.proto.CodeBlock.CodeBlock +import com.grammatech.gtirb.proto.DataBlock.DataBlock +import com.grammatech.gtirb.proto.Module.Module +import com.grammatech.gtirb.proto.Section.Section +import com.grammatech.gtirb.proto.Symbol.Symbol +import com.grammatech.gtirb.proto.Symbol.Symbol.OptionalPayload +import gtirb.AuxDecoder.{AuxKind, decodeAux} + +import scala.collection.immutable.SortedMap + +/** + * A class for querying the GTIRB IR, abstracting away common operations of + * searching for symbols, functions, blocks, and their relations. The inner + * type [[GTIRBRef]] wraps a Base64 UUID. A number of UUID subtypes are defined to + * distinguish different kinds of GTIRB UUID, for example [[GTIRBRef.BlockRef]] + * and [[GTIRBRef.FunctionRef]]. + * + * Each Uuid specialisation defines a number of extension methods for common + * query operations. For example, given a [[GTIRBRef.SymbolRef]], you can get the symbol + * itself via the `.get` methods, and you can get its symbol table entry with the + * [[symEntry]] method. Internally, the [[GTIRBResolver]] is indexing into the GTIRB + * protobuf and parsing the AuxData, but this is all neatly hidden away. + */ +case class GTIRBResolver(val mod: Module) { + + sealed trait GTIRBRef(val kind: String, val uuid: String) { + override def toString = s"$kind:$uuid" + override def equals(o: Any) = o match { + case x: GTIRBRef => x.kind == kind && x.uuid == uuid + case _ => false + } + override def hashCode = (kind, uuid).hashCode + } + + object GTIRBRef { + + private def b64(bs: String | ByteString) = bs match { + case s: String => s + case bs: ByteString => java.util.Base64.getEncoder().encodeToString(bs.toByteArray) + } + + class BlockRef(xs: String | ByteString) extends GTIRBRef("blok", b64(xs)) + class FunctionRef(xs: String | ByteString) extends GTIRBRef("func", b64(xs)) + class SymbolRef(xs: String | ByteString) extends GTIRBRef("symb", b64(xs)) + class SectionRef(xs: String | ByteString) extends GTIRBRef("sect", b64(xs)) + } + + export GTIRBRef.* + + /** + * Represents a GTIRB code/data block and its parents. In GTIRB, block + * occurs within a byte interval which occur within a section. Desirable information, + * such as offset and address, is spread across these levels, so it is useful to bundle + * them all together. + * + * See the [GTIRB structure diagram](https://github.com/GrammaTech/gtirb#structure). + */ + case class BlockData(inner: DataBlock | CodeBlock, block: Block, interval: ByteInterval, section: Section) { + val uuid = inner match { + case x: DataBlock => BlockRef(x.uuid) + case x: CodeBlock => BlockRef(x.uuid) + } + val size = inner match { + case x: DataBlock => x.size + case x: CodeBlock => x.size + } + val address = block.offset + interval.address + } + + extension (x: BlockRef) + def get = blocksByUuid(x) + def getOption = blocksByUuid.get(x) + def isProxyBlock = proxyBlockUuids.contains(x) + + extension (x: SymbolRef) + def get: Symbol = { + val sym = symbolsByUuid(x) + + // XXX: forwarded object symbols correspond to R_AARCH64_COPY relocations. + // for these, ddisasm produces a `*_copy` symbol. get the original symbol + // name by following the forwarding. + (getForwardingTarget, symEntry) match { + case (Some(fwd), (_, "OBJECT", "GLOBAL", "DEFAULT", _)) => + sym.copy(name = fwd.get.name) + case _ => sym + } + } + + /** + * Returns the list of symbol table indices where this symbol can be found. + * Each index is a tuple of table name and index within that table. + */ + def symTabIdx = symbolTabIdxByUuid.getOrElse(x, Nil) + + /** + * Returns the `.symtab` entry for the given symbol. + * This is a 5-tuple made up of size, type, binding, visibility, and section index. + * Every symbol table entry is in relation to some section. The section index is the + * index of the relevant section's section header. + */ + def symEntry = symbolEntriesByUuid(x) + + /** + * Gets the [[GTIRBRef.BlockRef]] referred to by this symbol, for example + * a data block or code block. + * This is mutually-exclusive with [[getScalarValue]], + * only one of these can be non-None. + */ + def getReferentUuid = for { + uuid <- x.get.optionalPayload.referentUuid + } yield BlockRef(uuid) + + /** + * Gets the address referred to by this symbol, if the referent is a + * block. Correctly takes into account the `atEnd` field of [[Symbol]]. + */ + def getReferentAddress = for { + uuid <- x.getReferentUuid + block <- uuid.getOption + atEndOffset = if x.get.atEnd then block.size else 0 + } yield block.address + atEndOffset + + /** + * Gets the scalar value associated with this symbol. + * This is mutually-exclusive with [[getReferentUuid]], + * only one of these can be non-None. + */ + def getScalarValue = x.get.optionalPayload._value + + /** + * Gets the [[GTIRBRef.FunctionRef]] associated with this symbol, + * or None if this is not a function name symbol. + */ + def getFunction = funcNamesInverse.get(x) + + def getForwardingTarget = symbolForwarding.get(x) + + extension (x: FunctionRef) + /** + * Gets the set of entry block UUIDs for the given function. + */ + def getEntries = funcEntries(x) + + /** + * Gets the [[GTIRBRef.SymbolRef]] for the given function. + */ + def getName = funcNames(x) + + private def mapFirst[T, T2, U](f: T => T2)(x: (T, U)) = (f(x._1), x._2) + + val proxyBlockUuids = mod.proxies.map(x => BlockRef(x.uuid)).toSet + val symbolsByUuid = mod.symbols.map(x => SymbolRef(x.uuid) -> x).toMap + val symbolsByName = mod.symbols.map(x => x.name -> SymbolRef(x.uuid)).toMap + + val blocksByUuid = (for { + sec <- mod.sections.toList + interval <- sec.byteIntervals + (uuid, innerb, outerb) <- interval.blocks.collect { + case b @ Block(_, Block.Value.Data(dat), _) => (dat.uuid, (dat: DataBlock | CodeBlock), b) + case b @ Block(_, Block.Value.Code(cod), _) => (cod.uuid, (cod: DataBlock | CodeBlock), b) + } + id = BlockRef(uuid) + } yield id -> BlockData(innerb, outerb, interval, sec)).toMap + + val sectionsByName = mod.sections.map(x => x.name -> x).toMap + + val symbolTabIdxByUuid: Map[SymbolRef, List[(String, BigInt)]] = + decodeAux(AuxKind.ElfSymbolTabIdxInfo)(mod).map(mapFirst(SymbolRef(_))) + + /** + * A nested map indexed by section name, then symbol index, and returning a symbol uuid. + * For example, `symbolTables(".symtab")(63)`. + */ + val symbolTables = symbolTabIdxByUuid + .flatMap { case (sym, idxs) => + idxs.map(_ -> sym) + } + .groupMapReduce(kv => kv.head.head)(kv => SortedMap(kv.head.last -> kv.last))(_ ++ _) + + val symbolEntriesByUuid = decodeAux(AuxKind.ElfSymbolInfo)(mod) + .map(mapFirst(SymbolRef(_))) + + val funcNames = decodeAux(AuxKind.FunctionNames)(mod).map { case (fun, sym) => + FunctionRef(fun) -> SymbolRef(sym) + } + val funcNamesInverse = funcNames.map(_.swap) + val funcEntries = decodeAux(AuxKind.FunctionEntries)(mod).map { case (a, b) => + FunctionRef(a) -> b.map(BlockRef(_)) + } + + val entryPoint = BlockRef(mod.entryPoint) + + def getDynSym(i: Int) = + symbolTables(".dynsym")(i) + + /** + * Symbol forwarding. Keys are "forwarding" symbols which are dynamically-bound to their associated value symbol. + */ + val symbolForwarding = + decodeAux(AuxKind.SymbolForwarding)(mod).map(SymbolRef(_) -> SymbolRef(_)) + + /** + * Inverse symbol forwarding. + * Keys are "target" symbols. + * At runtime, a target's associated symbols will point to the key symbol. + */ + val symbolForwardingInverse = + symbolForwarding.map(_.swap) + +} diff --git a/src/main/scala/ir/dsl/ToScala.scala b/src/main/scala/ir/dsl/ToScala.scala index 1565d1707..6932410f3 100644 --- a/src/main/scala/ir/dsl/ToScala.scala +++ b/src/main/scala/ir/dsl/ToScala.scala @@ -81,7 +81,20 @@ given ToScalaString[BigInt] with given [T](using ToScala[T]): ToScalaLines[Seq[T]] with extension (x: Seq[T]) def toScalaLines = - Twine.indentNested("Seq(", x.map(_.toScalaLines), ")") + Twine.indentNested("Seq(", x.view.map(_.toScalaLines), ")") + +given [T](using ToScala[T]): ToScalaLines[Set[T]] with + extension (x: Set[T]) + def toScalaLines = + Twine.indentNested("Set(", x.view.map(_.toScalaLines), ")") + +given [K, V](using ToScala[K], ToScala[V]): ToScalaLines[Map[K, V]] with + extension (x: Map[K, V]) + def toScalaLines = + val pairs = x.view.map { case (k, v) => + Twine(k.toScalaLines, " -> ", v.toScalaLines) + } + Twine.indentNested("Map(", pairs, ")") given [T](using ToScala[T]): ToScalaString[Some[T]] with extension (x: Some[T]) diff --git a/src/main/scala/ir/dsl/ToScalaDeriving.scala b/src/main/scala/ir/dsl/ToScalaDeriving.scala index 646e9fa5f..c16d55272 100644 --- a/src/main/scala/ir/dsl/ToScalaDeriving.scala +++ b/src/main/scala/ir/dsl/ToScalaDeriving.scala @@ -303,16 +303,22 @@ object ToScalaDeriving { inline isSingleton: Boolean, x: T ): Twine = - val args: Twine = inline isSingleton match - case true => Twine.empty - case false => - val elems = x.asInstanceOf[Product].productIterator - val args = (instances.iterator zip elems) - .map((f, x) => f.asInstanceOf[ToScala[Any]].toScalaLines(x)) - .toList - Twine("(" :: args.intersperse(", ") ::: List(")")) - Twine(name, args) + val (open, close) = inline isSingleton match + case true => ("", "") + case false => ("(", ")") + + val args = + val elems = x.asInstanceOf[Product].productIterator + (instances.iterator zip elems) + .map((f, x) => f.asInstanceOf[ToScala[Any]].toScalaLines(x)) + .toList + + if (args.exists(Twine.shallowIsMultiline)) { + Twine.indentNested(name + open, args, close) + } else { + Twine(name + open, Twine(args.intersperse(", ")), close) + } /** * Helper class for wrapping a lambda function into a ToScala instance, diff --git a/src/main/scala/specification/Specification.scala b/src/main/scala/specification/Specification.scala index 19e060e42..2b4667376 100644 --- a/src/main/scala/specification/Specification.scala +++ b/src/main/scala/specification/Specification.scala @@ -1,6 +1,8 @@ package specification import boogie.* +import ir.* +import ir.dsl.given trait SymbolTableEntry { val name: String @@ -9,7 +11,7 @@ trait SymbolTableEntry { } case class FuncEntry(override val name: String, override val size: Int, override val address: BigInt) - extends SymbolTableEntry + extends SymbolTableEntry derives ir.dsl.ToScala case class Specification( funcs: Set[FuncEntry], @@ -52,4 +54,4 @@ case class SubroutineSpec( } } -case class ExternalFunction(name: String, offset: BigInt) +case class ExternalFunction(name: String, offset: BigInt) derives ir.dsl.ToScala diff --git a/src/main/scala/translating/ReadELFLoader.scala b/src/main/scala/translating/ReadELFLoader.scala index f0ba6000d..27102f986 100644 --- a/src/main/scala/translating/ReadELFLoader.scala +++ b/src/main/scala/translating/ReadELFLoader.scala @@ -2,15 +2,17 @@ package translating import Parsers.ReadELFParser.* import boogie.* +import ir.dsl.given import specification.* import util.{ILLoadingConfig, Logger} +import scala.collection.immutable.{SortedMap, SortedSet} import scala.jdk.CollectionConverters.* /** https://refspecs.linuxfoundation.org/elf/elf.pdf */ -enum ELFSymType: +enum ELFSymType derives ir.dsl.ToScala: case NOTYPE /* absolute symbol or similar */ case SECTION /* memory section */ case FILE @@ -18,17 +20,17 @@ enum ELFSymType: case FUNC /* code function */ case TLS /* ??? */ -enum ELFBind: +enum ELFBind derives ir.dsl.ToScala: case LOCAL /* local to the translation unit */ case GLOBAL /* global to the program */ case WEAK /* multiple versions of symbol may be exposed to the linker, and the last definition is used. */ -enum ELFVis: +enum ELFVis derives ir.dsl.ToScala: case HIDDEN case DEFAULT case PROTECTED -enum ELFNDX: +enum ELFNDX derives ir.dsl.ToScala: case Section(num: Int) /* Section containing the symbol */ case UND /* Undefined */ case ABS /* Absolute, unaffected by relocation */ @@ -42,13 +44,30 @@ case class ELFSymbol( vis: ELFVis, ndx: ELFNDX, /* The section containing the symbol */ name: String -) +) derives ir.dsl.ToScala + +case class ReadELFData( + symbolTable: List[ELFSymbol], + externalFunctions: Set[ExternalFunction], + globalVariables: Set[SpecGlobal], + functionEntries: Set[FuncEntry], + relocationOffsets: Map[BigInt, BigInt], + mainAddress: BigInt +) derives ir.dsl.ToScala { + + def sorted = ReadELFData( + symbolTable, + SortedSet.from(externalFunctions)(Ordering.by(Tuple.fromProductTyped(_))), + SortedSet.from(globalVariables)(Ordering.by(Tuple.fromProductTyped(_))), + SortedSet.from(functionEntries)(Ordering.by(Tuple.fromProductTyped(_))), + SortedMap.from(relocationOffsets), + mainAddress + ) + +} object ReadELFLoader { - def visitSyms( - ctx: SymsContext, - config: ILLoadingConfig - ): (List[ELFSymbol], Set[ExternalFunction], Set[SpecGlobal], Set[FuncEntry], Map[BigInt, BigInt], BigInt) = { + def visitSyms(ctx: SymsContext, config: ILLoadingConfig): ReadELFData = { val externalFunctions = ctx.relocationTable.asScala .filter(_.relocationTableHeader != null) .flatMap(r => visitRelocationTableExtFunc(r)) @@ -74,7 +93,7 @@ object ReadELFLoader { if (mainAddress.isEmpty) { throw Exception(s"no ${config.mainProcedureName} function in symbol table") } - (symbolTable, externalFunctions, globalVariables, functionEntries, relocationOffsets, mainAddress.head) + ReadELFData(symbolTable, externalFunctions, globalVariables, functionEntries, relocationOffsets, mainAddress.head) } def visitRelocationTableExtFunc(ctx: RelocationTableContext): Set[ExternalFunction] = { diff --git a/src/main/scala/util/BASILConfig.scala b/src/main/scala/util/BASILConfig.scala index 678492616..f606368a2 100644 --- a/src/main/scala/util/BASILConfig.scala +++ b/src/main/scala/util/BASILConfig.scala @@ -27,7 +27,20 @@ case class ILLoadingConfig( trimEarly: Boolean = false, gtirbLiftOffline: Boolean = false, pcTracking: PCTrackingOption = PCTrackingOption.None -) +) { + lazy val frontendMode = + if inputFile.endsWith(".gts") then { + FrontendMode.Gtirb + } else if inputFile.endsWith(".gtirb") then { + FrontendMode.Gtirb + } else if inputFile.endsWith(".adt") then { + FrontendMode.Bap + } else if (inputFile.endsWith(".il")) { + FrontendMode.Basil + } else { + throw Exception(s"input file name ${inputFile} must be an .adt, .gts or .gtirb file") + } +} case class StaticAnalysisConfig( dumpILToPath: Option[String] = None, diff --git a/src/main/scala/util/RunUtils.scala b/src/main/scala/util/RunUtils.scala index 5334bf23d..e870d67a6 100644 --- a/src/main/scala/util/RunUtils.scala +++ b/src/main/scala/util/RunUtils.scala @@ -6,6 +6,7 @@ import analysis.{Interval as _, *} import bap.* import boogie.* import com.grammatech.gtirb.proto.IR.IR +import gtirb.{GTIRBReadELF, GTIRBResolver} import ir.* import ir.dsl.given import ir.eval.* @@ -109,26 +110,24 @@ object IRLoading { */ def load(q: ILLoadingConfig): IRContext = { - val mode = if q.inputFile.endsWith(".gts") then { - FrontendMode.Gtirb - } else if q.inputFile.endsWith(".gtirb") then { - if (!q.gtirbLiftOffline) { - throw IllegalArgumentException(".gtirb input requires --lifter") - } - FrontendMode.Gtirb - } else if q.inputFile.endsWith(".adt") then { - FrontendMode.Bap - } else if (q.inputFile.endsWith(".il")) { - FrontendMode.Basil - } else { - throw Exception(s"input file name ${q.inputFile} must be an .adt, .gts or .gtirb file") + val mode = q.frontendMode + if (q.inputFile.endsWith(".gtirb") && !q.gtirbLiftOffline) { + throw IllegalArgumentException(".gtirb input requires --lifter") } val (mainAddress, makeContext) = q.relfFile match { case Some(relf) => { - // TODO: this tuple is large, should be a case class - val (symbols, externalFunctions, globals, funcEntries, globalOffsets, mainAddress) = + + // allow loading elf from inputFile if using GTIRB mode. + val relfData = if (relf == q.inputFile && mode == FrontendMode.Gtirb) { + Logger.info("[!] Using ELF data from GTIRB: " + q.inputFile) + IRLoading.loadGTIRBReadELF(q) + } else { + Logger.info("[!] Using ELF data from relf: " + relf) IRLoading.loadReadELF(relf, q) + } + + val ReadELFData(symbols, externalFunctions, globals, funcEntries, globalOffsets, mainAddress) = relfData def continuation(ctx: IRContext) = val specification = IRLoading.loadSpecification(q.specFile, ctx.program, globals) @@ -137,7 +136,9 @@ object IRLoading { (Some(mainAddress), continuation) } case None if mode == FrontendMode.Gtirb => { - Logger.warn("RELF not provided, recommended for GTIRB input") + Logger.warn( + "RELF input not provided, this is not recommended! To provide a RELF input, specify --relf or --gts-relf." + ) (None, (x: IRContext) => x) } case None => { @@ -213,18 +214,48 @@ object IRLoading { GTIRBConverter.createIR() } - def loadReadELF( - fileName: String, - config: ILLoadingConfig - ): (List[ELFSymbol], Set[ExternalFunction], Set[SpecGlobal], Set[FuncEntry], Map[BigInt, BigInt], BigInt) = { + /** Loads ELF data from the GTIRB input file. */ + def loadGTIRBReadELF(config: ILLoadingConfig): ReadELFData = { + val ir = IR.parseFrom(FileInputStream(config.inputFile)) + if (ir.modules.length != 1) { + Logger.warn(s"GTIRB file ${config.inputFile} unexpectedly has ${ir.modules.length} modules") + } + + val gtirb = GTIRBResolver(ir.modules.head) + val gtirbRelfLoader = GTIRBReadELF(gtirb) + gtirbRelfLoader.getReadELFData(config.mainProcedureName) + } + + /** + * Loads ELF data from *both* .relf and .gts (if using GTIRB input). If both + * sources load successfully, compares them and warns on any differences. + */ + def loadReadELFWithGTIRB(fileName: String, config: ILLoadingConfig): (ReadELFData, Option[ReadELFData]) = { val lexer = ReadELFLexer(CharStreams.fromFileName(fileName)) val tokens = CommonTokenStream(lexer) val parser = ReadELFParser(tokens) parser.setErrorHandler(BailErrorStrategy()) parser.setBuildParseTree(true) - ReadELFLoader.visitSyms(parser.syms(), config) + + val relf = ReadELFLoader.visitSyms(parser.syms(), config) + + val gtirbRelf = if (config.inputFile.endsWith(".gts") || config.inputFile.endsWith(".gtirb")) { + val gtirbRelf = loadGTIRBReadELF(config) + GTIRBReadELF.checkReadELFCompatibility(gtirbRelf, relf) + Some(gtirbRelf) + } else { + None + } + + (relf, gtirbRelf) } + /** + * Loads ELF data from .relf. + */ + def loadReadELF(fileName: String, config: ILLoadingConfig) = + loadReadELFWithGTIRB(fileName, config)._1 + def emptySpecification(globals: Set[SpecGlobal]) = Specification(Set(), globals, Map(), List(), List(), List(), Set()) diff --git a/src/main/scala/util/Twine.scala b/src/main/scala/util/Twine.scala index 26a4ff463..03d5d30f7 100644 --- a/src/main/scala/util/Twine.scala +++ b/src/main/scala/util/Twine.scala @@ -172,6 +172,11 @@ object Twine { def lines(parts: (String | Twine)*) = Lines(parts.map(Twine(_))) + def shallowIsMultiline(tw: Twine) = tw match { + case _: Lines | _: Indent => true + case _ => false + } + /** * Indents a nested structure, placing the indented `elems` between `head` and `tail`, * and separating them by `sep` and `newline`. diff --git a/src/test/scala/IrreducibleLoop.scala b/src/test/scala/IrreducibleLoop.scala index 958cfbffe..f200245a4 100644 --- a/src/test/scala/IrreducibleLoop.scala +++ b/src/test/scala/IrreducibleLoop.scala @@ -2,7 +2,7 @@ import analysis.LoopDetector import ir.{Block, Program, dotBlockGraph} import org.scalatest.funsuite.AnyFunSuite import test_util.{BASILTest, CaptureOutput} -import translating.BAPToIR +import translating.{BAPToIR, ReadELFData} import util.{ILLoadingConfig, IRLoading, LogLevel, Logger} import scala.sys.process.* @@ -14,7 +14,7 @@ class IrreducibleLoop extends AnyFunSuite with CaptureOutput { def load(conf: ILLoadingConfig): Program = { val bapProgram = IRLoading.loadBAP(conf.inputFile) - val (_, _, _, _, _, mainAddress) = IRLoading.loadReadELF(conf.relfFile.get, conf) + val ReadELFData(_, _, _, _, _, mainAddress) = IRLoading.loadReadELF(conf.relfFile.get, conf) val IRTranslator = BAPToIR(bapProgram, mainAddress) val IRProgram = IRTranslator.translate IRProgram