From 595756aa6a88b67ae1378d25e193d539afc0a87c Mon Sep 17 00:00:00 2001 From: rina Date: Fri, 20 Jun 2025 14:14:05 +1000 Subject: [PATCH 01/51] case class readelf, toscala readelf, --dump-relf --- build.mill | 2 +- src/main/scala/Main.scala | 11 ++++++++ src/main/scala/boogie/BExpr.scala | 3 ++- src/main/scala/ir/dsl/ToScala.scala | 13 ++++++++++ .../scala/specification/Specification.scala | 5 ++-- .../scala/translating/ReadELFLoader.scala | 25 +++++++++++++------ src/main/scala/util/RunUtils.scala | 4 +-- src/test/scala/IrreducibleLoop.scala | 4 +-- 8 files changed, 52 insertions(+), 15 deletions(-) diff --git a/build.mill b/build.mill index 2db4d87d1b..0ded7486f6 100644 --- a/build.mill +++ b/build.mill @@ -173,7 +173,7 @@ object `package` extends RootModule with ScalaModule { object mdbook extends mdbookBinary { def mdbookSources = Task.Source { - Task.workspace / "docs" + Task.workspace / "docs" } } diff --git a/src/main/scala/Main.scala b/src/main/scala/Main.scala index d6bb8956e7..137e2cfad4 100644 --- a/src/main/scala/Main.scala +++ b/src/main/scala/Main.scala @@ -143,6 +143,8 @@ object Main { interpret: Flag, @arg(name = "dump-il", doc = "Dump the Intermediate Language to text.") dumpIL: Option[String], + @arg(name = "dump-relf", doc = "Dump Basil's representation of the readelf information to stdout and exit.") + dumpRelf: Flag, @arg(name = "main-procedure-name", short = 'm', doc = "Name of the main procedure to begin analysis at.") mainProcedureName: String = "main", @arg( @@ -327,6 +329,15 @@ object Main { ) } + if (conf.dumpRelf.value) { + val relfFile = loadingInputs.relfFile.getOrElse { + throw IllegalArgumentException("--dump-relf requires --relf") + } + val relfData = IRLoading.loadReadELF(relfFile, loadingInputs) + println(relfData.toScala) + return + } + if (loadingInputs.specFile.isDefined && loadingInputs.relfFile.isEmpty) { throw IllegalArgumentException("--spec requires --relf") } diff --git a/src/main/scala/boogie/BExpr.scala b/src/main/scala/boogie/BExpr.scala index 72ed1956ac..02011b5029 100644 --- a/src/main/scala/boogie/BExpr.scala +++ b/src/main/scala/boogie/BExpr.scala @@ -1,5 +1,6 @@ package boogie import ir.* +import ir.dsl.given import specification.* import collection.mutable @@ -750,7 +751,7 @@ case class SpecGlobal( arraySize: Option[Int], override val address: BigInt ) extends SymbolTableEntry, - SpecGlobalOrAccess { + SpecGlobalOrAccess derives ir.dsl.ToScala { override def specGlobals: Set[SpecGlobalOrAccess] = Set(this) override val toAddrVar: BVar = BVariable("$" + s"${name}_addr", BitVecBType(64), Scope.Const) override val toOldVar: BVar = BVariable(s"${name}_old", BitVecBType(size), Scope.Local) diff --git a/src/main/scala/ir/dsl/ToScala.scala b/src/main/scala/ir/dsl/ToScala.scala index 97d9ccbbb6..e9a58e3d3f 100644 --- a/src/main/scala/ir/dsl/ToScala.scala +++ b/src/main/scala/ir/dsl/ToScala.scala @@ -86,6 +86,19 @@ given [T](using ToScala[T]): ToScalaLines[Seq[T]] with def toScalaLines = Twine.indentNested("Seq(", x.map(_.toScalaLines), ")") +given [T](using ToScala[T]): ToScalaLines[Set[T]] with + extension (x: Set[T]) + def toScalaLines = + Twine.indentNested("Set(", x.map(_.toScalaLines), ")") + +given [K, V](using ToScala[K], ToScala[V]): ToScalaLines[Map[K, V]] with + extension (x: Map[K, V]) + def toScalaLines = + val pairs = x.map { + case (k,v) => Twine(k.toScalaLines, " -> ", v.toScalaLines) + } + Twine.indentNested("Map(", pairs, ")") + given [T](using ToScala[T]): ToScalaString[Some[T]] with extension (x: Some[T]) def toScala: String = x match diff --git a/src/main/scala/specification/Specification.scala b/src/main/scala/specification/Specification.scala index 0144866af8..8f138a06b2 100644 --- a/src/main/scala/specification/Specification.scala +++ b/src/main/scala/specification/Specification.scala @@ -2,6 +2,7 @@ package specification import boogie.* import ir.* +import ir.dsl.given import util.Logger trait SymbolTableEntry { @@ -11,7 +12,7 @@ trait SymbolTableEntry { } case class FuncEntry(override val name: String, override val size: Int, override val address: BigInt) - extends SymbolTableEntry + extends SymbolTableEntry derives ir.dsl.ToScala case class Specification( funcs: Set[FuncEntry], @@ -54,4 +55,4 @@ case class SubroutineSpec( } } -case class ExternalFunction(name: String, offset: BigInt) +case class ExternalFunction(name: String, offset: BigInt) derives ir.dsl.ToScala diff --git a/src/main/scala/translating/ReadELFLoader.scala b/src/main/scala/translating/ReadELFLoader.scala index 8b4d87a85c..92786b3347 100644 --- a/src/main/scala/translating/ReadELFLoader.scala +++ b/src/main/scala/translating/ReadELFLoader.scala @@ -8,10 +8,12 @@ import util.ILLoadingConfig import scala.jdk.CollectionConverters.* +import ir.dsl.given + /** https://refspecs.linuxfoundation.org/elf/elf.pdf */ -enum ELFSymType: +enum ELFSymType derives ir.dsl.ToScala: case NOTYPE /* absolute symbol or similar */ case SECTION /* memory section */ case FILE @@ -19,17 +21,17 @@ enum ELFSymType: case FUNC /* code function */ case TLS /* ??? */ -enum ELFBind: +enum ELFBind derives ir.dsl.ToScala: case LOCAL /* local to the translation unit */ case GLOBAL /* global to the program */ case WEAK /* multiple versions of symbol may be exposed to the linker, and the last definition is used. */ -enum ELFVis: +enum ELFVis derives ir.dsl.ToScala: case HIDDEN case DEFAULT case PROTECTED -enum ELFNDX: +enum ELFNDX derives ir.dsl.ToScala: case Section(num: Int) /* Section containing the symbol */ case UND /* Undefined */ case ABS /* Absolute, unaffected by relocation */ @@ -43,13 +45,22 @@ case class ELFSymbol( vis: ELFVis, ndx: ELFNDX, /* The section containing the symbol */ name: String -) +) derives ir.dsl.ToScala + +case class ReadELFData( + symbolTable: List[ELFSymbol], + externalFunctions: Set[ExternalFunction], + globalVariables: Set[SpecGlobal], + functionEntries: Set[FuncEntry], + relocationOffsets: Map[BigInt, BigInt], + mainAddress: BigInt +) derives ir.dsl.ToScala object ReadELFLoader { def visitSyms( ctx: SymsContext, config: ILLoadingConfig - ): (List[ELFSymbol], Set[ExternalFunction], Set[SpecGlobal], Set[FuncEntry], Map[BigInt, BigInt], BigInt) = { + ): ReadELFData = { val externalFunctions = ctx.relocationTable.asScala .filter(_.relocationTableHeader != null) .flatMap(r => visitRelocationTableExtFunc(r)) @@ -75,7 +86,7 @@ object ReadELFLoader { if (mainAddress.isEmpty) { throw Exception(s"no ${config.mainProcedureName} function in symbol table") } - (symbolTable, externalFunctions, globalVariables, functionEntries, relocationOffsets, mainAddress.head) + ReadELFData(symbolTable, externalFunctions, globalVariables, functionEntries, relocationOffsets, mainAddress.head) } def visitRelocationTableExtFunc(ctx: RelocationTableContext): Set[ExternalFunction] = { diff --git a/src/main/scala/util/RunUtils.scala b/src/main/scala/util/RunUtils.scala index 43fd20e5b4..5b1f927ef4 100644 --- a/src/main/scala/util/RunUtils.scala +++ b/src/main/scala/util/RunUtils.scala @@ -138,7 +138,7 @@ object IRLoading { val (mainAddress, makeContext) = q.relfFile match { case Some(relf) => { // TODO: this tuple is large, should be a case class - val (symbols, externalFunctions, globals, funcEntries, globalOffsets, mainAddress) = + val ReadELFData(symbols, externalFunctions, globals, funcEntries, globalOffsets, mainAddress) = IRLoading.loadReadELF(relf, q) def continuation(program: Program) = @@ -202,7 +202,7 @@ object IRLoading { def loadReadELF( fileName: String, config: ILLoadingConfig - ): (List[ELFSymbol], Set[ExternalFunction], Set[SpecGlobal], Set[FuncEntry], Map[BigInt, BigInt], BigInt) = { + ): ReadELFData = { val lexer = ReadELFLexer(CharStreams.fromFileName(fileName)) val tokens = CommonTokenStream(lexer) val parser = ReadELFParser(tokens) diff --git a/src/test/scala/IrreducibleLoop.scala b/src/test/scala/IrreducibleLoop.scala index 10edf1fd6f..1155cbba2a 100644 --- a/src/test/scala/IrreducibleLoop.scala +++ b/src/test/scala/IrreducibleLoop.scala @@ -1,6 +1,6 @@ import org.scalatest.funsuite.AnyFunSuite import util.{ILLoadingConfig, IRLoading, LogLevel, Logger, PerformanceTimer, RunUtils} -import translating.BAPToIR +import translating.{BAPToIR, ReadELFData} import analysis.LoopDetector import analysis.LoopTransform import ir.{Block, Program, dotBlockGraph} @@ -18,7 +18,7 @@ class IrreducibleLoop extends AnyFunSuite with CaptureOutput { def load(conf: ILLoadingConfig): Program = { val bapProgram = IRLoading.loadBAP(conf.inputFile) - val (_, _, _, _, _, mainAddress) = IRLoading.loadReadELF(conf.relfFile.get, conf) + val ReadELFData(_, _, _, _, _, mainAddress) = IRLoading.loadReadELF(conf.relfFile.get, conf) val IRTranslator = BAPToIR(bapProgram, mainAddress) val IRProgram = IRTranslator.translate IRProgram From ba5dec46bfb8271862fdd127832533f924916e10 Mon Sep 17 00:00:00 2001 From: rina Date: Fri, 20 Jun 2025 15:21:27 +1000 Subject: [PATCH 02/51] we can get external function symbols. TODO: dig into auxdata --- src/main/scala/Main.scala | 5 +++++ src/main/scala/translating/GTIRBReadELF.scala | 19 +++++++++++++++++++ 2 files changed, 24 insertions(+) create mode 100644 src/main/scala/translating/GTIRBReadELF.scala diff --git a/src/main/scala/Main.scala b/src/main/scala/Main.scala index 137e2cfad4..7ca86df1e8 100644 --- a/src/main/scala/Main.scala +++ b/src/main/scala/Main.scala @@ -335,6 +335,11 @@ object Main { } val relfData = IRLoading.loadReadELF(relfFile, loadingInputs) println(relfData.toScala) + println() + + val fIn = java.io.FileInputStream(loadingInputs.inputFile) + val ir = com.grammatech.gtirb.proto.IR.IR.parseFrom(fIn) + println(ir.modules.map(translating.GTIRBReadELF.getExternalFunctions)) return } diff --git a/src/main/scala/translating/GTIRBReadELF.scala b/src/main/scala/translating/GTIRBReadELF.scala new file mode 100644 index 0000000000..f1135bd1f2 --- /dev/null +++ b/src/main/scala/translating/GTIRBReadELF.scala @@ -0,0 +1,19 @@ +package translating + +import com.google.protobuf.ByteString +import com.grammatech.gtirb.proto.CFG.EdgeType.* +import com.grammatech.gtirb.proto.CFG.CFG +import com.grammatech.gtirb.proto.CFG.Edge +import com.grammatech.gtirb.proto.CFG.EdgeLabel +import com.grammatech.gtirb.proto.Module.Module +import com.grammatech.gtirb.proto.Symbol.Symbol + +object GTIRBReadELF { + + def getExternalFunctions(mod: Module) = { + + val proxyBlockUuids = mod.proxies.map(_.uuid).toSet + val externalFunctionSymbols = mod.symbols.filter(x => proxyBlockUuids.contains(x.getReferentUuid)) + + } +} From 167325f9fc59748daa9c4ebea124200eaa05e325 Mon Sep 17 00:00:00 2001 From: rina Date: Fri, 20 Jun 2025 17:13:02 +1000 Subject: [PATCH 03/51] new AuxDecoder for compositional decoding of AuxData the previous MapDecoder is hard-coded to decode only certain structures in the aux_data. this makes it more compositional, so we can decode more complex types across more auxdata fields. --- src/main/scala/gtirb/AuxDecoder.scala | 78 +++++++++++++++++++++++++++ src/test/scala/AuxDecoderTest.scala | 31 +++++++++++ 2 files changed, 109 insertions(+) create mode 100644 src/main/scala/gtirb/AuxDecoder.scala create mode 100644 src/test/scala/AuxDecoderTest.scala diff --git a/src/main/scala/gtirb/AuxDecoder.scala b/src/main/scala/gtirb/AuxDecoder.scala new file mode 100644 index 0000000000..a91b9ae454 --- /dev/null +++ b/src/main/scala/gtirb/AuxDecoder.scala @@ -0,0 +1,78 @@ +package gtirb + +import java.io.ByteArrayInputStream +import com.google.protobuf.ByteString + +object AuxDecoder { + + type Input = ByteArrayInputStream + type Reader[T] = Input => T + + def readBytes(numBytes: Int)(bs: Input): Array[Byte] = + val bytes = bs.readNBytes(numBytes) + // assert(bytes.length == numBytes, s"insufficient bytes to read. got ${bytes.length} but wanted $numBytes") + bytes + + def readBool(bs: Input) = + readUint(8)(bs) != 0 + + def readUint(numBits: Int) = readInt(numBits, false) + + def readInt(numBits: Int, signed: Boolean = false)(bs: Input) = + val numBytes = numBits / 8 + require(numBytes * 8 == numBits, "requires multiple of 8") + + readBytes(numBytes)(bs).foldRight(BigInt(0)) { + case (x, acc) => + val n = x.toInt + acc * 256 + (if (!signed && n < 0) then n + 256 else n) + } + + def readMap[K,V](keyReader: Reader[K], valReader: Reader[V])(bs: Input) = + val len = readUint(64)(bs) + (BigInt(0) until len).map { + case _ => + val k = keyReader(bs) + val v = valReader(bs) + k -> v + }.toMap + + def readSet[K,V](valReader: Reader[V])(bs: Input) = + val len = readUint(64)(bs) + (BigInt(0) until len).map(_ => valReader(bs)).toSet + + def readList[K,V](valReader: Reader[V])(bs: Input) = + val len = readUint(64)(bs) + (BigInt(0) until len).map(_ => valReader(bs)).toList + + def readTuple[T1, T2](r1: Reader[T1], r2: Reader[T2])(bs: Input) = + val x1 = r1(bs) + val x2 = r2(bs) + (x1, x2) + + def readTuple[T1, T2, T3, T4](r1: Reader[T1], r2: Reader[T2], r3: Reader[T3], r4: Reader[T4])(bs: Input) = + val x1 = r1(bs) + val x2 = r2(bs) + val x3 = r3(bs) + val x4 = r4(bs) + (x1, x2, x3, x4) + + def readTuple[T1, T2, T3, T4, T5](r1: Reader[T1], r2: Reader[T2], r3: Reader[T3], r4: Reader[T4], r5: Reader[T5])(bs: Input) = + val x1 = r1(bs) + val x2 = r2(bs) + val x3 = r3(bs) + val x4 = r4(bs) + val x5 = r5(bs) + (x1, x2, x3, x4, x5) + + def readUuid(bs: Input) = + ByteString.copyFrom(readBytes(16)(bs)) + + def readOffset(bs: Input) = + val uuid = readUuid(bs) + val len = readUint(64)(bs) + (uuid, len) + +} + + diff --git a/src/test/scala/AuxDecoderTest.scala b/src/test/scala/AuxDecoderTest.scala new file mode 100644 index 0000000000..b2e5b397c6 --- /dev/null +++ b/src/test/scala/AuxDecoderTest.scala @@ -0,0 +1,31 @@ +import org.scalatest.funsuite.AnyFunSuite +import test_util.{CaptureOutput} + +import java.util.Base64 +import com.google.protobuf.ByteString + +@test_util.tags.UnitTest +class AuxDecoderTest extends AnyFunSuite, CaptureOutput { + + def bytes(x: String) = + java.io.ByteArrayInputStream(x.getBytes) + def bytes(x: Array[Byte]) = + java.io.ByteArrayInputStream(x) + def bytes(x: Int*) = + java.io.ByteArrayInputStream(Array[Byte](x.map(_.toByte) : _*)) + + test("reading unsigned long") { + assertResult(255) { + gtirb.AuxDecoder.readUint(8)(bytes(-1)) + } + assertResult(1) { + gtirb.AuxDecoder.readUint(16)(bytes(1, 0)) + } + assertResult(256) { + gtirb.AuxDecoder.readUint(16)(bytes(0, 1)) + } + } + + lazy val functionEntriesData = "DgAAAAAAAAAKN8AQfJJCx6z/YC/0ajCuAQAAAAAAAAC63E936mNIwLttnN9qQwpSEC0VjLcjTrWfq3ProZkm4QEAAAAAAAAA8+/+FHF9QLyY8Cp3s20b7UK9rpkxpUlVn3nywB/Pa6MBAAAAAAAAABDsf7+EvUScja3NVtybLYB8D9RjUu1GUJuU7h50M6qCAQAAAAAAAADPt2wBnjVBh72u9sO3UmghhYEnBCBdRgqUP83Wk83TKgEAAAAAAAAABH/y67RjR/CceNXW4dxJX5v1FZgF+UVhpIPWkdaYGM8BAAAAAAAAABE9oSg0AkvBr4d3nZzAJBueV40Xb8lNGL/X4rr0M+yNAQAAAAAAAADwmKneRoNNo7x59rfe+fM2y2YqeTQETDizbRO9Gd5rTgEAAAAAAAAATtUSuPRtRpKUyIiDPj5JWdgBS4GeWUYpkXOWrjz19dsBAAAAAAAAAHtFQipdkEVBlIMEUcxGTzjjOXYE/IhEDYMFs5/1CguSAQAAAAAAAABEjFAwt29EyqDlVadlTiLU7efo2YpZRjKoGwMiNg5x7AEAAAAAAAAARcz/PMuZRmSsGVbCF4FnpfPpZJPuKUVCovWrMD5+jrUBAAAAAAAAAAHijBSRNEmvo+oayuSZKB31ZSjpkoFFbpQb5ZqbQCvBAQAAAAAAAABYmEPRwHtEIr+F+AjLrUdn9g3j8z3UR3uy4oljzAjVDgEAAAAAAAAAaXmP/QmsQ2uAJBCKxtQSsQ==" + lazy val functionEntriesBytes = Base64.getDecoder().decode(functionEntriesData) +} From 7397b8839ea1e67dbe512f5a2c8037cddbd82fb4 Mon Sep 17 00:00:00 2001 From: rina Date: Fri, 20 Jun 2025 17:55:32 +1000 Subject: [PATCH 04/51] use auxdecoder in gtirb loader --- src/main/scala/gtirb/AuxDecoder.scala | 31 ++++++------ src/main/scala/gtirb/MapDecoder.scala | 58 ---------------------- src/main/scala/translating/GTIRBToIR.scala | 26 ++++++++-- src/main/scala/util/functional/List.scala | 22 +++++++- src/test/scala/AuxDecoderTest.scala | 5 +- 5 files changed, 62 insertions(+), 80 deletions(-) delete mode 100644 src/main/scala/gtirb/MapDecoder.scala diff --git a/src/main/scala/gtirb/AuxDecoder.scala b/src/main/scala/gtirb/AuxDecoder.scala index a91b9ae454..60fe3fa7ad 100644 --- a/src/main/scala/gtirb/AuxDecoder.scala +++ b/src/main/scala/gtirb/AuxDecoder.scala @@ -8,6 +8,9 @@ object AuxDecoder { type Input = ByteArrayInputStream type Reader[T] = Input => T + def decode[T](reader: Reader[T])(bytes: ByteString): T = + reader(ByteArrayInputStream(bytes.toByteArray)) + def readBytes(numBytes: Int)(bs: Input): Array[Byte] = val bytes = bs.readNBytes(numBytes) // assert(bytes.length == numBytes, s"insufficient bytes to read. got ${bytes.length} but wanted $numBytes") @@ -22,26 +25,24 @@ object AuxDecoder { val numBytes = numBits / 8 require(numBytes * 8 == numBits, "requires multiple of 8") - readBytes(numBytes)(bs).foldRight(BigInt(0)) { - case (x, acc) => - val n = x.toInt - acc * 256 + (if (!signed && n < 0) then n + 256 else n) + readBytes(numBytes)(bs).foldRight(BigInt(0)) { case (x, acc) => + val n = x.toInt + acc * 256 + (if (!signed && n < 0) then n + 256 else n) } - def readMap[K,V](keyReader: Reader[K], valReader: Reader[V])(bs: Input) = + def readMap[K, V](keyReader: Reader[K], valReader: Reader[V])(bs: Input) = val len = readUint(64)(bs) - (BigInt(0) until len).map { - case _ => - val k = keyReader(bs) - val v = valReader(bs) - k -> v + (BigInt(0) until len).map { case _ => + val k = keyReader(bs) + val v = valReader(bs) + k -> v }.toMap - def readSet[K,V](valReader: Reader[V])(bs: Input) = + def readSet[K, V](valReader: Reader[V])(bs: Input) = val len = readUint(64)(bs) (BigInt(0) until len).map(_ => valReader(bs)).toSet - def readList[K,V](valReader: Reader[V])(bs: Input) = + def readList[K, V](valReader: Reader[V])(bs: Input) = val len = readUint(64)(bs) (BigInt(0) until len).map(_ => valReader(bs)).toList @@ -57,7 +58,9 @@ object AuxDecoder { val x4 = r4(bs) (x1, x2, x3, x4) - def readTuple[T1, T2, T3, T4, T5](r1: Reader[T1], r2: Reader[T2], r3: Reader[T3], r4: Reader[T4], r5: Reader[T5])(bs: Input) = + def readTuple[T1, T2, T3, T4, T5](r1: Reader[T1], r2: Reader[T2], r3: Reader[T3], r4: Reader[T4], r5: Reader[T5])( + bs: Input + ) = val x1 = r1(bs) val x2 = r2(bs) val x3 = r3(bs) @@ -74,5 +77,3 @@ object AuxDecoder { (uuid, len) } - - diff --git a/src/main/scala/gtirb/MapDecoder.scala b/src/main/scala/gtirb/MapDecoder.scala deleted file mode 100644 index fa58cd5b17..0000000000 --- a/src/main/scala/gtirb/MapDecoder.scala +++ /dev/null @@ -1,58 +0,0 @@ -package gtirb -import java.io.FileInputStream -import com.google.protobuf.ByteString -import java.io.ByteArrayInputStream -import java.nio.charset.StandardCharsets - -/* - * Provides some useful decoders for certian AuxData sections in gtirb. - * - * See https://grammatech.github.io/gtirb/python/_modules/gtirb/serialization.html#MappingCodec.decode, this was pulled - * from their python API, and converted into scala - */ -object MapDecoder { - def decode_set(totalBytes: Seq[ByteString]): Map[ByteString, Set[ByteString]] = { - val totalMap: Map[ByteString, Set[ByteString]] = (for { - bytes <- totalBytes - byteStream = ByteArrayInputStream(bytes.toByteArray) - len = bytesToLong(read_bytes(8, byteStream), true) - s <- 0L until len - } yield { - val key = ByteString.copyFrom(read_bytes(16, byteStream)) - val len2 = bytesToLong(read_bytes(8, byteStream), true) - val uuids = (for (k <- 0L until len2) yield { // should maybe check this - ByteString.copyFrom(read_bytes(16, byteStream)) - }).toSet - key -> uuids - }).toMap - totalMap - } - - def decode_uuid(totalBytes: Seq[ByteString]): Map[ByteString, ByteString] = { - val totalMap: Map[ByteString, ByteString] = (for { - bytes <- totalBytes - byteStream = ByteArrayInputStream(bytes.toByteArray) - len = bytesToLong(read_bytes(8, byteStream), true) - s <- 0L until len - } yield { - val key = ByteString.copyFrom(read_bytes(16, byteStream)) - val uuid = ByteString.copyFrom(read_bytes(16, byteStream)) - key -> uuid - }).toMap - totalMap - } - - def read_bytes(size: Int, byteStream: ByteArrayInputStream): Array[Byte] = { - byteStream.readNBytes(size) - } - - def bytesToLong(bytes: Array[Byte], littleEndian: Boolean): Long = { - val buffer = java.nio.ByteBuffer.wrap(bytes) - if (littleEndian) { - buffer.order(java.nio.ByteOrder.LITTLE_ENDIAN).getLong - } else { - buffer.getLong - } - } - -} diff --git a/src/main/scala/translating/GTIRBToIR.scala b/src/main/scala/translating/GTIRBToIR.scala index 9073cbb560..5e7de04fcd 100644 --- a/src/main/scala/translating/GTIRBToIR.scala +++ b/src/main/scala/translating/GTIRBToIR.scala @@ -24,7 +24,7 @@ import scala.util.boundary import boundary.break import java.nio.ByteBuffer import util.intrusive_list.* -import util.functional.{Snoc} +import util.functional.{Snoc, foldLeft1, foldLeft0} import util.Logger private def assigned(x: Statement): immutable.Set[Variable] = x match { @@ -75,9 +75,27 @@ class GTIRBToIR( mainAddress: Option[BigInt], mainName: Option[String] ) { - private val functionNames = MapDecoder.decode_uuid(mods.map(_.auxData("functionNames").data)) - private val functionEntries = MapDecoder.decode_set(mods.map(_.auxData("functionEntries").data)) - private val functionBlocks = MapDecoder.decode_set(mods.map(_.auxData("functionBlocks").data)) + + object decoders { + import gtirb.AuxDecoder.* + lazy val decodeFunctionNames = readMap(readUuid, readUuid) + lazy val decodeFunctionEntries = readMap(readUuid, readSet(readUuid)) + lazy val decodeFunctionBlocks = readMap(readUuid, readSet(readUuid)) + } + import decoders.* + + private val functionNames = mods + .map(_.auxData("functionNames").data) + .map(AuxDecoder.decode(decodeFunctionNames)(_)) + .foldLeft0(_ ++ _) + private val functionEntries = mods + .map(_.auxData("functionEntries").data) + .map(AuxDecoder.decode(decodeFunctionEntries)(_)) + .foldLeft0(_ ++ _) + private val functionBlocks = mods + .map(_.auxData("functionBlocks").data) + .map(AuxDecoder.decode(decodeFunctionBlocks)(_)) + .foldLeft0(_ ++ _) // maps block UUIDs to their address private val blockUUIDToAddress = createAddresses() diff --git a/src/main/scala/util/functional/List.scala b/src/main/scala/util/functional/List.scala index 74ea679fdf..d57c4ab084 100644 --- a/src/main/scala/util/functional/List.scala +++ b/src/main/scala/util/functional/List.scala @@ -1,7 +1,7 @@ package util.functional import collection.immutable.LinearSeq -import collection.{SeqOps, IterableOps, Factory} +import collection.{SeqOps, IterableOps, Factory, IterableOnceOps} /** * This unapplier enables pattern matching on the /last/ element of @@ -65,3 +65,23 @@ def sequence[T, L, CC[U] <: IterableOps[U, CC, CC[U]]](xs: CC[Either[L, T]]): Ei case (_, right) => Right(right) } } + +extension [A, CC[X] <: IterableOps[X, CC, CC[X]], C <: CC[A]](coll: IterableOps[A, CC, C]) + + /** + * Performs a left fold on the given *non-empty* iterable, using the first element of the iterable + * as the base case for the fold. + */ + def foldLeft1(f: (A, A) => A): A = + def error = throw IllegalArgumentException("foldLeft1 called with empty iterable") + + val (hd, tl) = coll.splitAt(1) + tl.foldLeft[A](hd.headOption.getOrElse(error))(f) + + + /** + * Performs a left fold on the given iterable, using an *empty collection* as the base case for the fold. + * The type of the empty collection is inferred from the type of the original iterable. + */ + def foldLeft0(f: (A, A) => A)(implicit factory: Factory[Nothing, A]): A = + coll.foldLeft[A](factory.newBuilder.result)(f) diff --git a/src/test/scala/AuxDecoderTest.scala b/src/test/scala/AuxDecoderTest.scala index b2e5b397c6..958625fc61 100644 --- a/src/test/scala/AuxDecoderTest.scala +++ b/src/test/scala/AuxDecoderTest.scala @@ -12,7 +12,7 @@ class AuxDecoderTest extends AnyFunSuite, CaptureOutput { def bytes(x: Array[Byte]) = java.io.ByteArrayInputStream(x) def bytes(x: Int*) = - java.io.ByteArrayInputStream(Array[Byte](x.map(_.toByte) : _*)) + java.io.ByteArrayInputStream(Array[Byte](x.map(_.toByte): _*)) test("reading unsigned long") { assertResult(255) { @@ -26,6 +26,7 @@ class AuxDecoderTest extends AnyFunSuite, CaptureOutput { } } - lazy val functionEntriesData = "DgAAAAAAAAAKN8AQfJJCx6z/YC/0ajCuAQAAAAAAAAC63E936mNIwLttnN9qQwpSEC0VjLcjTrWfq3ProZkm4QEAAAAAAAAA8+/+FHF9QLyY8Cp3s20b7UK9rpkxpUlVn3nywB/Pa6MBAAAAAAAAABDsf7+EvUScja3NVtybLYB8D9RjUu1GUJuU7h50M6qCAQAAAAAAAADPt2wBnjVBh72u9sO3UmghhYEnBCBdRgqUP83Wk83TKgEAAAAAAAAABH/y67RjR/CceNXW4dxJX5v1FZgF+UVhpIPWkdaYGM8BAAAAAAAAABE9oSg0AkvBr4d3nZzAJBueV40Xb8lNGL/X4rr0M+yNAQAAAAAAAADwmKneRoNNo7x59rfe+fM2y2YqeTQETDizbRO9Gd5rTgEAAAAAAAAATtUSuPRtRpKUyIiDPj5JWdgBS4GeWUYpkXOWrjz19dsBAAAAAAAAAHtFQipdkEVBlIMEUcxGTzjjOXYE/IhEDYMFs5/1CguSAQAAAAAAAABEjFAwt29EyqDlVadlTiLU7efo2YpZRjKoGwMiNg5x7AEAAAAAAAAARcz/PMuZRmSsGVbCF4FnpfPpZJPuKUVCovWrMD5+jrUBAAAAAAAAAAHijBSRNEmvo+oayuSZKB31ZSjpkoFFbpQb5ZqbQCvBAQAAAAAAAABYmEPRwHtEIr+F+AjLrUdn9g3j8z3UR3uy4oljzAjVDgEAAAAAAAAAaXmP/QmsQ2uAJBCKxtQSsQ==" + lazy val functionEntriesData = + "DgAAAAAAAAAKN8AQfJJCx6z/YC/0ajCuAQAAAAAAAAC63E936mNIwLttnN9qQwpSEC0VjLcjTrWfq3ProZkm4QEAAAAAAAAA8+/+FHF9QLyY8Cp3s20b7UK9rpkxpUlVn3nywB/Pa6MBAAAAAAAAABDsf7+EvUScja3NVtybLYB8D9RjUu1GUJuU7h50M6qCAQAAAAAAAADPt2wBnjVBh72u9sO3UmghhYEnBCBdRgqUP83Wk83TKgEAAAAAAAAABH/y67RjR/CceNXW4dxJX5v1FZgF+UVhpIPWkdaYGM8BAAAAAAAAABE9oSg0AkvBr4d3nZzAJBueV40Xb8lNGL/X4rr0M+yNAQAAAAAAAADwmKneRoNNo7x59rfe+fM2y2YqeTQETDizbRO9Gd5rTgEAAAAAAAAATtUSuPRtRpKUyIiDPj5JWdgBS4GeWUYpkXOWrjz19dsBAAAAAAAAAHtFQipdkEVBlIMEUcxGTzjjOXYE/IhEDYMFs5/1CguSAQAAAAAAAABEjFAwt29EyqDlVadlTiLU7efo2YpZRjKoGwMiNg5x7AEAAAAAAAAARcz/PMuZRmSsGVbCF4FnpfPpZJPuKUVCovWrMD5+jrUBAAAAAAAAAAHijBSRNEmvo+oayuSZKB31ZSjpkoFFbpQb5ZqbQCvBAQAAAAAAAABYmEPRwHtEIr+F+AjLrUdn9g3j8z3UR3uy4oljzAjVDgEAAAAAAAAAaXmP/QmsQ2uAJBCKxtQSsQ==" lazy val functionEntriesBytes = Base64.getDecoder().decode(functionEntriesData) } From efdf667ad1d46ac2771579a37f3665535a46f8df Mon Sep 17 00:00:00 2001 From: rina Date: Fri, 20 Jun 2025 17:58:09 +1000 Subject: [PATCH 05/51] scalafmt --- src/main/scala/util/functional/List.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/scala/util/functional/List.scala b/src/main/scala/util/functional/List.scala index d57c4ab084..a7d7035e31 100644 --- a/src/main/scala/util/functional/List.scala +++ b/src/main/scala/util/functional/List.scala @@ -78,7 +78,6 @@ extension [A, CC[X] <: IterableOps[X, CC, CC[X]], C <: CC[A]](coll: IterableOps[ val (hd, tl) = coll.splitAt(1) tl.foldLeft[A](hd.headOption.getOrElse(error))(f) - /** * Performs a left fold on the given iterable, using an *empty collection* as the base case for the fold. * The type of the empty collection is inferred from the type of the original iterable. From 19df41e53dd1685277741ede62c1103362649579 Mon Sep 17 00:00:00 2001 From: rina Date: Fri, 20 Jun 2025 18:01:45 +1000 Subject: [PATCH 06/51] i cant tell if this is shorter --- src/main/scala/translating/GTIRBToIR.scala | 32 ++++++++++------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/src/main/scala/translating/GTIRBToIR.scala b/src/main/scala/translating/GTIRBToIR.scala index 5e7de04fcd..62d867e9d8 100644 --- a/src/main/scala/translating/GTIRBToIR.scala +++ b/src/main/scala/translating/GTIRBToIR.scala @@ -76,26 +76,22 @@ class GTIRBToIR( mainName: Option[String] ) { - object decoders { + object auxdata { import gtirb.AuxDecoder.* - lazy val decodeFunctionNames = readMap(readUuid, readUuid) - lazy val decodeFunctionEntries = readMap(readUuid, readSet(readUuid)) - lazy val decodeFunctionBlocks = readMap(readUuid, readSet(readUuid)) + val functionNames = mods + .map(_.auxData("functionNames").data) + .map(decode(readMap(readUuid, readUuid))(_)) + .foldLeft0(_ ++ _) + val functionEntries = mods + .map(_.auxData("functionEntries").data) + .map(decode(readMap(readUuid, readSet(readUuid)))(_)) + .foldLeft0(_ ++ _) + val functionBlocks = mods + .map(_.auxData("functionBlocks").data) + .map(decode(readMap(readUuid, readSet(readUuid)))(_)) + .foldLeft0(_ ++ _) } - import decoders.* - - private val functionNames = mods - .map(_.auxData("functionNames").data) - .map(AuxDecoder.decode(decodeFunctionNames)(_)) - .foldLeft0(_ ++ _) - private val functionEntries = mods - .map(_.auxData("functionEntries").data) - .map(AuxDecoder.decode(decodeFunctionEntries)(_)) - .foldLeft0(_ ++ _) - private val functionBlocks = mods - .map(_.auxData("functionBlocks").data) - .map(AuxDecoder.decode(decodeFunctionBlocks)(_)) - .foldLeft0(_ ++ _) + import auxdata.* // maps block UUIDs to their address private val blockUUIDToAddress = createAddresses() From 191730d07a0b4d98c98d0a09290fb7b7f1964268 Mon Sep 17 00:00:00 2001 From: rina Date: Fri, 20 Jun 2025 18:04:31 +1000 Subject: [PATCH 07/51] foldLeft1 is just reduce - allie --- src/main/scala/translating/GTIRBToIR.scala | 2 +- src/main/scala/util/functional/List.scala | 10 ---------- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/src/main/scala/translating/GTIRBToIR.scala b/src/main/scala/translating/GTIRBToIR.scala index 62d867e9d8..c690f3a2db 100644 --- a/src/main/scala/translating/GTIRBToIR.scala +++ b/src/main/scala/translating/GTIRBToIR.scala @@ -24,7 +24,7 @@ import scala.util.boundary import boundary.break import java.nio.ByteBuffer import util.intrusive_list.* -import util.functional.{Snoc, foldLeft1, foldLeft0} +import util.functional.{Snoc, foldLeft0} import util.Logger private def assigned(x: Statement): immutable.Set[Variable] = x match { diff --git a/src/main/scala/util/functional/List.scala b/src/main/scala/util/functional/List.scala index a7d7035e31..f025984566 100644 --- a/src/main/scala/util/functional/List.scala +++ b/src/main/scala/util/functional/List.scala @@ -68,16 +68,6 @@ def sequence[T, L, CC[U] <: IterableOps[U, CC, CC[U]]](xs: CC[Either[L, T]]): Ei extension [A, CC[X] <: IterableOps[X, CC, CC[X]], C <: CC[A]](coll: IterableOps[A, CC, C]) - /** - * Performs a left fold on the given *non-empty* iterable, using the first element of the iterable - * as the base case for the fold. - */ - def foldLeft1(f: (A, A) => A): A = - def error = throw IllegalArgumentException("foldLeft1 called with empty iterable") - - val (hd, tl) = coll.splitAt(1) - tl.foldLeft[A](hd.headOption.getOrElse(error))(f) - /** * Performs a left fold on the given iterable, using an *empty collection* as the base case for the fold. * The type of the empty collection is inferred from the type of the original iterable. From b94a1a40e3db9b33b8a3a8346f6e1d42da2e8747 Mon Sep 17 00:00:00 2001 From: rina Date: Fri, 20 Jun 2025 23:55:56 +1000 Subject: [PATCH 08/51] what the fkkkkkkkkkkkkkkkkkk --- src/main/scala/gtirb/AuxDecoder.scala | 34 ++++++++++- src/main/scala/translating/GTIRBReadELF.scala | 60 +++++++++++++++++++ 2 files changed, 93 insertions(+), 1 deletion(-) diff --git a/src/main/scala/gtirb/AuxDecoder.scala b/src/main/scala/gtirb/AuxDecoder.scala index 60fe3fa7ad..6251bd5a15 100644 --- a/src/main/scala/gtirb/AuxDecoder.scala +++ b/src/main/scala/gtirb/AuxDecoder.scala @@ -1,7 +1,11 @@ package gtirb +import java.util.Base64 import java.io.ByteArrayInputStream +import java.nio.charset.StandardCharsets + import com.google.protobuf.ByteString +import com.grammatech.gtirb.proto.AuxData.AuxData object AuxDecoder { @@ -11,9 +15,12 @@ object AuxDecoder { def decode[T](reader: Reader[T])(bytes: ByteString): T = reader(ByteArrayInputStream(bytes.toByteArray)) + def decode[T](reader: Reader[T])(aux: AuxData): T = + reader(ByteArrayInputStream(aux.data.toByteArray)) + def readBytes(numBytes: Int)(bs: Input): Array[Byte] = val bytes = bs.readNBytes(numBytes) - // assert(bytes.length == numBytes, s"insufficient bytes to read. got ${bytes.length} but wanted $numBytes") + assert(bytes.length == numBytes, s"insufficient bytes to read. got ${bytes.length} but wanted $numBytes") bytes def readBool(bs: Input) = @@ -21,6 +28,11 @@ object AuxDecoder { def readUint(numBits: Int) = readInt(numBits, false) + def readString(bs: Input) = + val len = readUint(64)(bs) + assert(len <= Int.MaxValue, "string length out of int32 range") + new String(readBytes(len.toInt)(bs), StandardCharsets.UTF_8) + def readInt(numBits: Int, signed: Boolean = false)(bs: Input) = val numBytes = numBits / 8 require(numBytes * 8 == numBits, "requires multiple of 8") @@ -51,6 +63,13 @@ object AuxDecoder { val x2 = r2(bs) (x1, x2) + def readTuple[T1, T2, T3](r1: Reader[T1], r2: Reader[T2], r3: Reader[T3])(bs: Input) = + val x1 = r1(bs) + val x2 = r2(bs) + val x3 = r3(bs) + (x1, x2, x3) + + def readTuple[T1, T2, T3, T4](r1: Reader[T1], r2: Reader[T2], r3: Reader[T3], r4: Reader[T4])(bs: Input) = val x1 = r1(bs) val x2 = r2(bs) @@ -68,8 +87,21 @@ object AuxDecoder { val x5 = r5(bs) (x1, x2, x3, x4, x5) + def readTuple[T1, T2, T3, T4, T5, T6](r1: Reader[T1], r2: Reader[T2], r3: Reader[T3], r4: Reader[T4], r5: Reader[T5], r6: Reader[T6])( + bs: Input + ) = + val x1 = r1(bs) + val x2 = r2(bs) + val x3 = r3(bs) + val x4 = r4(bs) + val x5 = r5(bs) + val x6 = r6(bs) + (x1, x2, x3, x4, x5, x6) + + def readUuid(bs: Input) = ByteString.copyFrom(readBytes(16)(bs)) + // Base64.getEncoder().encodeToString(readBytes(16)(bs)) def readOffset(bs: Input) = val uuid = readUuid(bs) diff --git a/src/main/scala/translating/GTIRBReadELF.scala b/src/main/scala/translating/GTIRBReadELF.scala index f1135bd1f2..7e7b75d7d5 100644 --- a/src/main/scala/translating/GTIRBReadELF.scala +++ b/src/main/scala/translating/GTIRBReadELF.scala @@ -1,5 +1,9 @@ package translating +import gtirb.AuxDecoder + +import java.io.ByteArrayInputStream + import com.google.protobuf.ByteString import com.grammatech.gtirb.proto.CFG.EdgeType.* import com.grammatech.gtirb.proto.CFG.CFG @@ -8,12 +12,68 @@ import com.grammatech.gtirb.proto.CFG.EdgeLabel import com.grammatech.gtirb.proto.Module.Module import com.grammatech.gtirb.proto.Symbol.Symbol +import scala.collection.mutable + object GTIRBReadELF { + case class Elf64Rela(r_offset: BigInt, r_info: BigInt, r_addend: BigInt, r_sym: Long, r_type: Int) + + // https://refspecs.linuxbase.org/elf/gabi4+/ch4.reloc.html + // https://github.com/ARM-software/abi-aa/blob/main/aaelf64/aaelf64.rst#relocation-types + def readRela(bs: AuxDecoder.Input) = + import AuxDecoder.* + val (r_offset, r_info, r_addend) = readTuple(readUint(64), readUint(64), readUint(64))(bs) + val r_sym = r_info >> 32 + val r_type = r_info & 0xffffffffL + Elf64Rela(r_offset, r_info, r_addend, r_sym.toLong, r_type.toInt) + + val readElfSymbolTableIdxInfo = + import AuxDecoder.* + readMap(readUuid, readList(readTuple(readString, readUint(64)))) + + def parseRelaTab(bstr: ByteString) = { + val bs = ByteArrayInputStream(bstr.toByteArray) + var relas = mutable.ArrayBuffer[Elf64Rela]() + while (bs.available() > 0) + relas += readRela(bs) + relas.toList + } + + // see also: + // https://www.javadoc.io/doc/net.fornwall/jelf/latest/net/fornwall/jelf/ElfSymbol.html + // + // https://gist.github.com/x0nu11byt3/bcb35c3de461e5fb66173071a2379779 + // + // https://www.man7.org/linux/man-pages/man5/elf.5.html + def getExternalFunctions(mod: Module) = { val proxyBlockUuids = mod.proxies.map(_.uuid).toSet val externalFunctionSymbols = mod.symbols.filter(x => proxyBlockUuids.contains(x.getReferentUuid)) + val externalFunctionsByUuid = externalFunctionSymbols.map(x => x.uuid -> x).toMap + + val sectionsByName = mod.sections.map(x => x.name -> x).toMap + val relaDyns = parseRelaTab(sectionsByName(".rela.dyn").byteIntervals.head.contents) + val relaPlts = parseRelaTab(sectionsByName(".rela.plt").byteIntervals.head.contents) + + val tabidx = AuxDecoder.decode(readElfSymbolTableIdxInfo)(mod.auxData("elfSymbolTabIdxInfo")).flatMap { + case (sym, idxs) => idxs.map(_ -> sym) + } + + println() + println(".rela.dyn") + relaDyns.foreach { + case x => + val symuuid = tabidx((".dynsym", x.r_sym.toInt)) + println(s"$x " + externalFunctionsByUuid.get(symuuid).map(_.name)) + } + println(".rela.plt") + relaPlts.foreach { + case x => + val symuuid = tabidx((".dynsym", x.r_sym.toInt)) + println(s"$x " + externalFunctionsByUuid.get(symuuid).map(_.name)) + } + } } From cebe0235368397259bf6e33f9cc4af52bc9cd33e Mon Sep 17 00:00:00 2001 From: rina Date: Sat, 21 Jun 2025 00:44:11 +1000 Subject: [PATCH 09/51] kaitai https://formats.kaitai.io/elf/java.html maybe we just vendor the java file lol --- build.mill | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/build.mill b/build.mill index 0ded7486f6..9670abe0d8 100644 --- a/build.mill +++ b/build.mill @@ -302,5 +302,12 @@ object `package` extends RootModule with ScalaModule { os.move(newFile, tagsFile, replaceExisting = true, atomicMove = true) } + object kaitai extends JavaModule { + val ksc = ivy"io.kaitai:kaitai-struct-compiler_2.12:0.10" + + override def runIvyDeps = Agg(ksc) + override def mainClass = Some("io.kaitai.struct.JavaMain") + } + } From 31e0cfe3f9ded08ac7a86cb954a73ed0a74a26e4 Mon Sep 17 00:00:00 2001 From: rina Date: Sat, 21 Jun 2025 01:17:38 +1000 Subject: [PATCH 10/51] kaitai integration. ASS BECAUSE SCALA [no ci] companion objects are awful. i can't access the java public static inner class ;-; --- build.mill | 24 ++++++++++++++++++- src/main/scala/translating/GTIRBReadELF.scala | 13 ++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/build.mill b/build.mill index 9670abe0d8..37cbfd0c67 100644 --- a/build.mill +++ b/build.mill @@ -2,6 +2,7 @@ package build import $packages._ import mill._ +import mill.util.Jvm import scalalib._ import $file.basilmill.mdbook.mdbookBinary @@ -38,7 +39,7 @@ object `package` extends RootModule with ScalaModule { def mainClass = Some("Main") def millSourcePath = super.millSourcePath / "src" - def ivyDeps = Agg(scalactic, sourceCode, mainArgs, upickle) + def ivyDeps = Agg(scalactic, sourceCode, mainArgs, upickle, kaitai.kaitaiRuntime) def sources = Task.Sources { Seq(PathRef(this.millSourcePath / "main" / "scala")) } @@ -303,11 +304,32 @@ object `package` extends RootModule with ScalaModule { } object kaitai extends JavaModule { + + def kaitaiSource = Task { + val out = Task.dest / "a.ksy" + os.write(out, requests.get("https://github.com/kaitai-io/kaitai_struct_formats/raw/refs/heads/master/executable/elf.ksy")) + out + } + val ksc = ivy"io.kaitai:kaitai-struct-compiler_2.12:0.10" + def kaitaiRuntime = ivy"io.kaitai:kaitai-struct-runtime:0.10" override def runIvyDeps = Agg(ksc) override def mainClass = Some("io.kaitai.struct.JavaMain") + + def kaitaiGenerated = Task { + Jvm.callProcess( + mainClass = mainClass().get, + mainArgs = Seq(kaitaiSource().toString, "--target", "java", "--outdir", Task.dest.toString, "--java-package", "elf", "--no-auto-read"), + stdout = os.Inherit, + classPath = runClasspath().map(_.path).toList + ) + PathRef(Task.dest) + } } + override def generatedSources = Task { + super.generatedSources() ++ Seq(kaitai.kaitaiGenerated()) + } } diff --git a/src/main/scala/translating/GTIRBReadELF.scala b/src/main/scala/translating/GTIRBReadELF.scala index 7e7b75d7d5..9e40f36a14 100644 --- a/src/main/scala/translating/GTIRBReadELF.scala +++ b/src/main/scala/translating/GTIRBReadELF.scala @@ -12,8 +12,15 @@ import com.grammatech.gtirb.proto.CFG.EdgeLabel import com.grammatech.gtirb.proto.Module.Module import com.grammatech.gtirb.proto.Symbol.Symbol +import io.kaitai.struct.{KaitaiStream, ByteBufferKaitaiStream} +import elf.Elf.RelocationSection + import scala.collection.mutable +private class DummyElf(stream: KaitaiStream) extends elf.Elf(stream) { + override def bits() = elf.Elf.Bits.B64 +} + object GTIRBReadELF { case class Elf64Rela(r_offset: BigInt, r_info: BigInt, r_addend: BigInt, r_sym: Long, r_type: Int) @@ -32,6 +39,9 @@ object GTIRBReadELF { readMap(readUuid, readList(readTuple(readString, readUint(64)))) def parseRelaTab(bstr: ByteString) = { + val io = ByteBufferKaitaiStream(bstr.toByteArray) + val x = elf.Elf.RelocationSection(io, null, DummyElf(null), true, true) + val bs = ByteArrayInputStream(bstr.toByteArray) var relas = mutable.ArrayBuffer[Elf64Rela]() while (bs.available() > 0) @@ -39,6 +49,9 @@ object GTIRBReadELF { relas.toList } + + + // see also: // https://www.javadoc.io/doc/net.fornwall/jelf/latest/net/fornwall/jelf/ElfSymbol.html // From f67df7083a2399c06eb37ae02a2c5c9b890fc562 Mon Sep 17 00:00:00 2001 From: rina Date: Sat, 21 Jun 2025 11:39:04 +1000 Subject: [PATCH 11/51] Revert "kaitai integration. ASS BECAUSE SCALA [no ci]" This reverts commit 31e0cfe3f9ded08ac7a86cb954a73ed0a74a26e4. --- build.mill | 24 +------------------ src/main/scala/translating/GTIRBReadELF.scala | 13 ---------- 2 files changed, 1 insertion(+), 36 deletions(-) diff --git a/build.mill b/build.mill index 37cbfd0c67..9670abe0d8 100644 --- a/build.mill +++ b/build.mill @@ -2,7 +2,6 @@ package build import $packages._ import mill._ -import mill.util.Jvm import scalalib._ import $file.basilmill.mdbook.mdbookBinary @@ -39,7 +38,7 @@ object `package` extends RootModule with ScalaModule { def mainClass = Some("Main") def millSourcePath = super.millSourcePath / "src" - def ivyDeps = Agg(scalactic, sourceCode, mainArgs, upickle, kaitai.kaitaiRuntime) + def ivyDeps = Agg(scalactic, sourceCode, mainArgs, upickle) def sources = Task.Sources { Seq(PathRef(this.millSourcePath / "main" / "scala")) } @@ -304,32 +303,11 @@ object `package` extends RootModule with ScalaModule { } object kaitai extends JavaModule { - - def kaitaiSource = Task { - val out = Task.dest / "a.ksy" - os.write(out, requests.get("https://github.com/kaitai-io/kaitai_struct_formats/raw/refs/heads/master/executable/elf.ksy")) - out - } - val ksc = ivy"io.kaitai:kaitai-struct-compiler_2.12:0.10" - def kaitaiRuntime = ivy"io.kaitai:kaitai-struct-runtime:0.10" override def runIvyDeps = Agg(ksc) override def mainClass = Some("io.kaitai.struct.JavaMain") - - def kaitaiGenerated = Task { - Jvm.callProcess( - mainClass = mainClass().get, - mainArgs = Seq(kaitaiSource().toString, "--target", "java", "--outdir", Task.dest.toString, "--java-package", "elf", "--no-auto-read"), - stdout = os.Inherit, - classPath = runClasspath().map(_.path).toList - ) - PathRef(Task.dest) - } } - override def generatedSources = Task { - super.generatedSources() ++ Seq(kaitai.kaitaiGenerated()) - } } diff --git a/src/main/scala/translating/GTIRBReadELF.scala b/src/main/scala/translating/GTIRBReadELF.scala index 9e40f36a14..7e7b75d7d5 100644 --- a/src/main/scala/translating/GTIRBReadELF.scala +++ b/src/main/scala/translating/GTIRBReadELF.scala @@ -12,15 +12,8 @@ import com.grammatech.gtirb.proto.CFG.EdgeLabel import com.grammatech.gtirb.proto.Module.Module import com.grammatech.gtirb.proto.Symbol.Symbol -import io.kaitai.struct.{KaitaiStream, ByteBufferKaitaiStream} -import elf.Elf.RelocationSection - import scala.collection.mutable -private class DummyElf(stream: KaitaiStream) extends elf.Elf(stream) { - override def bits() = elf.Elf.Bits.B64 -} - object GTIRBReadELF { case class Elf64Rela(r_offset: BigInt, r_info: BigInt, r_addend: BigInt, r_sym: Long, r_type: Int) @@ -39,9 +32,6 @@ object GTIRBReadELF { readMap(readUuid, readList(readTuple(readString, readUint(64)))) def parseRelaTab(bstr: ByteString) = { - val io = ByteBufferKaitaiStream(bstr.toByteArray) - val x = elf.Elf.RelocationSection(io, null, DummyElf(null), true, true) - val bs = ByteArrayInputStream(bstr.toByteArray) var relas = mutable.ArrayBuffer[Elf64Rela]() while (bs.available() > 0) @@ -49,9 +39,6 @@ object GTIRBReadELF { relas.toList } - - - // see also: // https://www.javadoc.io/doc/net.fornwall/jelf/latest/net/fornwall/jelf/ElfSymbol.html // From 6adf92dd5603100d8aab105d9bb6f787dda71f01 Mon Sep 17 00:00:00 2001 From: rina Date: Sat, 21 Jun 2025 11:39:06 +1000 Subject: [PATCH 12/51] Revert "kaitai https://formats.kaitai.io/elf/java.html" This reverts commit cebe0235368397259bf6e33f9cc4af52bc9cd33e. --- build.mill | 7 ------- 1 file changed, 7 deletions(-) diff --git a/build.mill b/build.mill index 9670abe0d8..0ded7486f6 100644 --- a/build.mill +++ b/build.mill @@ -302,12 +302,5 @@ object `package` extends RootModule with ScalaModule { os.move(newFile, tagsFile, replaceExisting = true, atomicMove = true) } - object kaitai extends JavaModule { - val ksc = ivy"io.kaitai:kaitai-struct-compiler_2.12:0.10" - - override def runIvyDeps = Agg(ksc) - override def mainClass = Some("io.kaitai.struct.JavaMain") - } - } From 677bea2d62c8cd93d32cd0020a3c3c6bdc0cbdc7 Mon Sep 17 00:00:00 2001 From: rina Date: Sat, 21 Jun 2025 12:07:28 +1000 Subject: [PATCH 13/51] groupMapReduce --- src/main/scala/translating/GTIRBReadELF.scala | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/main/scala/translating/GTIRBReadELF.scala b/src/main/scala/translating/GTIRBReadELF.scala index 7e7b75d7d5..a31f27ab61 100644 --- a/src/main/scala/translating/GTIRBReadELF.scala +++ b/src/main/scala/translating/GTIRBReadELF.scala @@ -13,6 +13,7 @@ import com.grammatech.gtirb.proto.Module.Module import com.grammatech.gtirb.proto.Symbol.Symbol import scala.collection.mutable +import scala.collection.immutable.SortedMap object GTIRBReadELF { @@ -59,19 +60,21 @@ object GTIRBReadELF { val tabidx = AuxDecoder.decode(readElfSymbolTableIdxInfo)(mod.auxData("elfSymbolTabIdxInfo")).flatMap { case (sym, idxs) => idxs.map(_ -> sym) - } + }.groupMapReduce(kv => kv.head.head)(kv => SortedMap(kv.head.last -> kv.last))(_++_) + + println(tabidx) println() println(".rela.dyn") relaDyns.foreach { case x => - val symuuid = tabidx((".dynsym", x.r_sym.toInt)) + val symuuid = tabidx(".dynsym")(x.r_sym.toInt) println(s"$x " + externalFunctionsByUuid.get(symuuid).map(_.name)) } println(".rela.plt") relaPlts.foreach { case x => - val symuuid = tabidx((".dynsym", x.r_sym.toInt)) + val symuuid = tabidx(".dynsym")(x.r_sym.toInt) println(s"$x " + externalFunctionsByUuid.get(symuuid).map(_.name)) } From 1f8073618c0ebc9924f5382da0dc6f1c465c7b8c Mon Sep 17 00:00:00 2001 From: rina Date: Sat, 21 Jun 2025 12:58:38 +1000 Subject: [PATCH 14/51] change r_type to long, add docs. BTW, ... the static class problem in 31e0cfe3f9ded08ac7a86cb954a73ed0a74a26e4 was me being silly --- src/main/scala/translating/GTIRBReadELF.scala | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/main/scala/translating/GTIRBReadELF.scala b/src/main/scala/translating/GTIRBReadELF.scala index a31f27ab61..7611c02113 100644 --- a/src/main/scala/translating/GTIRBReadELF.scala +++ b/src/main/scala/translating/GTIRBReadELF.scala @@ -17,7 +17,15 @@ import scala.collection.immutable.SortedMap object GTIRBReadELF { - case class Elf64Rela(r_offset: BigInt, r_info: BigInt, r_addend: BigInt, r_sym: Long, r_type: Int) + /** + * An `Elf64_Rela` structure, as described by the [System V ABI](https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.reloc.html). + * The three fields `r_offset`, `r_info`, and `r_addend` are as described in the struct. + * The last two fields, `r_sym` and `r_type`, are extracted from the `r_info` value. + * + * The [ABI supplement for AArch64](https://github.com/ARM-software/abi-aa/blob/main/aaelf64/aaelf64.rst#relocation-types) + * provides information about the interpretation of the `r_type` values. + */ + case class Elf64Rela(r_offset: BigInt, r_info: BigInt, r_addend: BigInt, r_sym: Long, r_type: Long) // https://refspecs.linuxbase.org/elf/gabi4+/ch4.reloc.html // https://github.com/ARM-software/abi-aa/blob/main/aaelf64/aaelf64.rst#relocation-types From 856616c834594d8964f2f608883f3986e766eaec Mon Sep 17 00:00:00 2001 From: rina Date: Sat, 21 Jun 2025 13:09:40 +1000 Subject: [PATCH 15/51] unfold --- src/main/scala/translating/GTIRBReadELF.scala | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/main/scala/translating/GTIRBReadELF.scala b/src/main/scala/translating/GTIRBReadELF.scala index 7611c02113..929649c9eb 100644 --- a/src/main/scala/translating/GTIRBReadELF.scala +++ b/src/main/scala/translating/GTIRBReadELF.scala @@ -34,19 +34,18 @@ object GTIRBReadELF { val (r_offset, r_info, r_addend) = readTuple(readUint(64), readUint(64), readUint(64))(bs) val r_sym = r_info >> 32 val r_type = r_info & 0xffffffffL - Elf64Rela(r_offset, r_info, r_addend, r_sym.toLong, r_type.toInt) + Elf64Rela(r_offset, r_info, r_addend, r_sym.toLong, r_type.toLong) val readElfSymbolTableIdxInfo = import AuxDecoder.* readMap(readUuid, readList(readTuple(readString, readUint(64)))) - def parseRelaTab(bstr: ByteString) = { + def parseRelaTab(bstr: ByteString) = val bs = ByteArrayInputStream(bstr.toByteArray) - var relas = mutable.ArrayBuffer[Elf64Rela]() - while (bs.available() > 0) - relas += readRela(bs) - relas.toList - } + List.unfold(bs) { + case bs if bs.available() > 0 => Some(readRela(bs), bs) + case _ => None + } // see also: // https://www.javadoc.io/doc/net.fornwall/jelf/latest/net/fornwall/jelf/ElfSymbol.html From 1d280c88a00371410bf79a5745f734f5d3e63689 Mon Sep 17 00:00:00 2001 From: rina Date: Sat, 21 Jun 2025 22:33:36 +1000 Subject: [PATCH 16/51] a lot more work. all parts from readelfloader work now --- src/main/scala/gtirb/AuxDecoder.scala | 14 +++ src/main/scala/translating/GTIRBReadELF.scala | 87 ++++++++++++++++--- 2 files changed, 91 insertions(+), 10 deletions(-) diff --git a/src/main/scala/gtirb/AuxDecoder.scala b/src/main/scala/gtirb/AuxDecoder.scala index 6251bd5a15..6dfdd1a223 100644 --- a/src/main/scala/gtirb/AuxDecoder.scala +++ b/src/main/scala/gtirb/AuxDecoder.scala @@ -6,12 +6,26 @@ import java.nio.charset.StandardCharsets import com.google.protobuf.ByteString import com.grammatech.gtirb.proto.AuxData.AuxData +import com.grammatech.gtirb.proto.Module.Module + + object AuxDecoder { + enum AuxKind[T](val name: String, val decoder: Reader[T]) { + case ElfSymbolTabIdxInfo extends AuxKind("elfSymbolTabIdxInfo", readMap(readUuid, readList(readTuple(readString, readUint(64))))) + case ElfSymbolInfo extends AuxKind("elfSymbolInfo", readMap(readUuid, readTuple(readUint(64), readString, readString, readString, readUint(64)))) + case FunctionEntries extends AuxKind("functionEntries", readMap(readUuid, readSet(readUuid))) + case FunctionBlocks extends AuxKind("functionBlocks", readMap(readUuid, readSet(readUuid))) + case FunctionNames extends AuxKind("functionNames", readMap(readUuid, readUuid)) + } + type Input = ByteArrayInputStream type Reader[T] = Input => T + def decodeAux[T](known: AuxKind[T])(mod: Module) = + decode(known.decoder)(mod.auxData(known.name)) + def decode[T](reader: Reader[T])(bytes: ByteString): T = reader(ByteArrayInputStream(bytes.toByteArray)) diff --git a/src/main/scala/translating/GTIRBReadELF.scala b/src/main/scala/translating/GTIRBReadELF.scala index 929649c9eb..1a5a69fde7 100644 --- a/src/main/scala/translating/GTIRBReadELF.scala +++ b/src/main/scala/translating/GTIRBReadELF.scala @@ -1,6 +1,7 @@ package translating import gtirb.AuxDecoder +import gtirb.AuxDecoder.{AuxKind, decodeAux} import java.io.ByteArrayInputStream @@ -11,10 +12,14 @@ import com.grammatech.gtirb.proto.CFG.Edge import com.grammatech.gtirb.proto.CFG.EdgeLabel import com.grammatech.gtirb.proto.Module.Module import com.grammatech.gtirb.proto.Symbol.Symbol +import com.grammatech.gtirb.proto.ByteInterval.Block +import com.grammatech.gtirb.proto.ByteInterval.ByteInterval +import com.grammatech.gtirb.proto.Symbol.Symbol.OptionalPayload import scala.collection.mutable import scala.collection.immutable.SortedMap + object GTIRBReadELF { /** @@ -36,10 +41,6 @@ object GTIRBReadELF { val r_type = r_info & 0xffffffffL Elf64Rela(r_offset, r_info, r_addend, r_sym.toLong, r_type.toLong) - val readElfSymbolTableIdxInfo = - import AuxDecoder.* - readMap(readUuid, readList(readTuple(readString, readUint(64)))) - def parseRelaTab(bstr: ByteString) = val bs = ByteArrayInputStream(bstr.toByteArray) List.unfold(bs) { @@ -58,32 +59,98 @@ object GTIRBReadELF { val proxyBlockUuids = mod.proxies.map(_.uuid).toSet val externalFunctionSymbols = mod.symbols.filter(x => proxyBlockUuids.contains(x.getReferentUuid)) + val symbolsByUuid = mod.symbols.map(x => x.uuid -> x).toMap + + val dataBlocksByUuid = (for { + sec <- mod.sections.toList + interval <- sec.byteIntervals + (b, innerb) <- interval.blocks.collect { + case b @ Block(_, Block.Value.Data(dat), _) => (b, dat) + // case b @ Block(_, Block.Value.Code(cod), _) => (b, cod) + } + } yield innerb.uuid -> (innerb, b, interval, sec)).toMap + + val codeBlocksByUuid = (for { + sec <- mod.sections.toList + interval <- sec.byteIntervals + (b, innerb) <- interval.blocks.collect { + case b @ Block(_, Block.Value.Code(dat), _) => (b, dat) + } + } yield innerb.uuid -> (innerb, b, interval, sec)).toMap + - val externalFunctionsByUuid = externalFunctionSymbols.map(x => x.uuid -> x).toMap val sectionsByName = mod.sections.map(x => x.name -> x).toMap val relaDyns = parseRelaTab(sectionsByName(".rela.dyn").byteIntervals.head.contents) val relaPlts = parseRelaTab(sectionsByName(".rela.plt").byteIntervals.head.contents) - val tabidx = AuxDecoder.decode(readElfSymbolTableIdxInfo)(mod.auxData("elfSymbolTabIdxInfo")).flatMap { + val symbolTabIdx = AuxDecoder.decodeAux(AuxDecoder.AuxKind.ElfSymbolTabIdxInfo)(mod) + val tabidx = symbolTabIdx.flatMap { case (sym, idxs) => idxs.map(_ -> sym) }.groupMapReduce(kv => kv.head.head)(kv => SortedMap(kv.head.last -> kv.last))(_++_) - - println(tabidx) + // println(tabidx) + + val symbolKinds = decodeAux(AuxKind.ElfSymbolInfo)(mod) + + import scala.math.Ordering.Implicits.seqOrdering + val allSymbols = symbolKinds.map { + case (k, pos) => + val sym = symbolsByUuid(k) + val addr = for { + uuid <- sym.optionalPayload.referentUuid + (_, block : Block, ival : ByteInterval, _) <- dataBlocksByUuid.get(uuid).orElse(codeBlocksByUuid.get(uuid)) + } yield (block.offset + ival.address) + val value = sym.optionalPayload._value.fold("")("val=" + _.toString) + (symbolTabIdx(k), addr, pos) -> s"${sym.name} $value" + }.to(SortedMap) + println(allSymbols.mkString("\n")) println() println(".rela.dyn") relaDyns.foreach { case x => val symuuid = tabidx(".dynsym")(x.r_sym.toInt) - println(s"$x " + externalFunctionsByUuid.get(symuuid).map(_.name)) + println(s"$x " + symbolsByUuid.get(symuuid).map(_.name).filter(_.nonEmpty)) } println(".rela.plt") relaPlts.foreach { case x => val symuuid = tabidx(".dynsym")(x.r_sym.toInt) - println(s"$x " + externalFunctionsByUuid.get(symuuid).map(_.name)) + println(s"$x " + symbolsByUuid.get(symuuid).map(_.name).filter(_.nonEmpty)) + } + + val specGlobals = symbolKinds.toList.collect { + case (uuid, (size, "OBJECT", "GLOBAL", "DEFAULT", idx)) => + val sym = symbolsByUuid(uuid) + val (data, block, interval, sec) = dataBlocksByUuid(sym.optionalPayload.referentUuid.get) + // assert(size == data.size) + assert(mod.sections(idx.toInt - 1) == sec) + (sym.name, size * 8, None, interval.address + block.offset) } + println(specGlobals) + + val funcNames = decodeAux(AuxKind.FunctionNames)(mod) + val funcNamesInverse = funcNames.map(_.swap) + + val funcEntries = decodeAux(AuxKind.FunctionEntries)(mod) + val funentry = symbolKinds.toList.collect { + case (symuuid, (size, "FUNC", "GLOBAL", "DEFAULT", idx)) if idx != 0 => + + val nameSymbol = symbolsByUuid(symuuid) + val funcUuid = funcNamesInverse(symuuid) + val entries = funcEntries(funcUuid) + + assert(entries.size == 1, "function with non-singular entry") + val entry = entries.head + val (_, bl, ival, _) = codeBlocksByUuid(entry) + val addr = bl.offset + ival.address + + (nameSymbol.name, size * 8, addr) + } + + println(funentry) + + } } From 805342f48801508689297ba52c5e17a2cbea3045 Mon Sep 17 00:00:00 2001 From: rina Date: Sat, 21 Jun 2025 22:40:53 +1000 Subject: [PATCH 17/51] ToScala: deriving creates multi-line output sometimes --- src/main/scala/ir/dsl/ToScalaDeriving.scala | 28 ++++++++++++++------- src/main/scala/util/Twine.scala | 5 ++++ 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/src/main/scala/ir/dsl/ToScalaDeriving.scala b/src/main/scala/ir/dsl/ToScalaDeriving.scala index a34b2c4c84..97d0a84f63 100644 --- a/src/main/scala/ir/dsl/ToScalaDeriving.scala +++ b/src/main/scala/ir/dsl/ToScalaDeriving.scala @@ -302,16 +302,26 @@ object ToScalaDeriving { inline isSingleton: Boolean, x: T ): Twine = - val args: Twine = inline isSingleton match - case true => Twine.empty - case false => - val elems = x.asInstanceOf[Product].productIterator - val args = (instances.iterator zip elems) - .map((f, x) => f.asInstanceOf[ToScala[Any]].toScalaLines(x)) - .toList - Twine("(" :: args.intersperse(", ") ::: List(")")) - Twine(name, args) + val (open, close) = inline isSingleton match + case true => ("", "") + case false => ("(", ")") + + val args = + val elems = x.asInstanceOf[Product].productIterator + (instances.iterator zip elems) + .map((f, x) => f.asInstanceOf[ToScala[Any]].toScalaLines(x)) + .toList + + if (args.exists(Twine.shallowIsMultiline)) { + Twine.indentNested( + name + open, + args, + close + ) + } else { + Twine(name + open, Twine(args.intersperse(", ")), close) + } /** * Helper class for wrapping a lambda function into a ToScala instance, diff --git a/src/main/scala/util/Twine.scala b/src/main/scala/util/Twine.scala index 26a4ff463b..03d5d30f78 100644 --- a/src/main/scala/util/Twine.scala +++ b/src/main/scala/util/Twine.scala @@ -172,6 +172,11 @@ object Twine { def lines(parts: (String | Twine)*) = Lines(parts.map(Twine(_))) + def shallowIsMultiline(tw: Twine) = tw match { + case _: Lines | _: Indent => true + case _ => false + } + /** * Indents a nested structure, placing the indented `elems` between `head` and `tail`, * and separating them by `sep` and `newline`. From 8f9911fa6c310794838e022d48a8f333e378edda Mon Sep 17 00:00:00 2001 From: rina Date: Sat, 21 Jun 2025 22:46:07 +1000 Subject: [PATCH 18/51] scalafmt --- src/main/scala/gtirb/AuxDecoder.scala | 24 +++++--- src/main/scala/ir/dsl/ToScala.scala | 4 +- src/main/scala/ir/dsl/ToScalaDeriving.scala | 6 +- src/main/scala/translating/GTIRBReadELF.scala | 59 +++++++++---------- .../scala/translating/ReadELFLoader.scala | 5 +- src/main/scala/util/RunUtils.scala | 5 +- 6 files changed, 47 insertions(+), 56 deletions(-) diff --git a/src/main/scala/gtirb/AuxDecoder.scala b/src/main/scala/gtirb/AuxDecoder.scala index 6dfdd1a223..37ae54ee64 100644 --- a/src/main/scala/gtirb/AuxDecoder.scala +++ b/src/main/scala/gtirb/AuxDecoder.scala @@ -8,13 +8,16 @@ import com.google.protobuf.ByteString import com.grammatech.gtirb.proto.AuxData.AuxData import com.grammatech.gtirb.proto.Module.Module - - object AuxDecoder { enum AuxKind[T](val name: String, val decoder: Reader[T]) { - case ElfSymbolTabIdxInfo extends AuxKind("elfSymbolTabIdxInfo", readMap(readUuid, readList(readTuple(readString, readUint(64))))) - case ElfSymbolInfo extends AuxKind("elfSymbolInfo", readMap(readUuid, readTuple(readUint(64), readString, readString, readString, readUint(64)))) + case ElfSymbolTabIdxInfo + extends AuxKind("elfSymbolTabIdxInfo", readMap(readUuid, readList(readTuple(readString, readUint(64))))) + case ElfSymbolInfo + extends AuxKind( + "elfSymbolInfo", + readMap(readUuid, readTuple(readUint(64), readString, readString, readString, readUint(64))) + ) case FunctionEntries extends AuxKind("functionEntries", readMap(readUuid, readSet(readUuid))) case FunctionBlocks extends AuxKind("functionBlocks", readMap(readUuid, readSet(readUuid))) case FunctionNames extends AuxKind("functionNames", readMap(readUuid, readUuid)) @@ -83,7 +86,6 @@ object AuxDecoder { val x3 = r3(bs) (x1, x2, x3) - def readTuple[T1, T2, T3, T4](r1: Reader[T1], r2: Reader[T2], r3: Reader[T3], r4: Reader[T4])(bs: Input) = val x1 = r1(bs) val x2 = r2(bs) @@ -101,9 +103,14 @@ object AuxDecoder { val x5 = r5(bs) (x1, x2, x3, x4, x5) - def readTuple[T1, T2, T3, T4, T5, T6](r1: Reader[T1], r2: Reader[T2], r3: Reader[T3], r4: Reader[T4], r5: Reader[T5], r6: Reader[T6])( - bs: Input - ) = + def readTuple[T1, T2, T3, T4, T5, T6]( + r1: Reader[T1], + r2: Reader[T2], + r3: Reader[T3], + r4: Reader[T4], + r5: Reader[T5], + r6: Reader[T6] + )(bs: Input) = val x1 = r1(bs) val x2 = r2(bs) val x3 = r3(bs) @@ -112,7 +119,6 @@ object AuxDecoder { val x6 = r6(bs) (x1, x2, x3, x4, x5, x6) - def readUuid(bs: Input) = ByteString.copyFrom(readBytes(16)(bs)) // Base64.getEncoder().encodeToString(readBytes(16)(bs)) diff --git a/src/main/scala/ir/dsl/ToScala.scala b/src/main/scala/ir/dsl/ToScala.scala index e9a58e3d3f..dfe3fcf8c2 100644 --- a/src/main/scala/ir/dsl/ToScala.scala +++ b/src/main/scala/ir/dsl/ToScala.scala @@ -94,8 +94,8 @@ given [T](using ToScala[T]): ToScalaLines[Set[T]] with given [K, V](using ToScala[K], ToScala[V]): ToScalaLines[Map[K, V]] with extension (x: Map[K, V]) def toScalaLines = - val pairs = x.map { - case (k,v) => Twine(k.toScalaLines, " -> ", v.toScalaLines) + val pairs = x.map { case (k, v) => + Twine(k.toScalaLines, " -> ", v.toScalaLines) } Twine.indentNested("Map(", pairs, ")") diff --git a/src/main/scala/ir/dsl/ToScalaDeriving.scala b/src/main/scala/ir/dsl/ToScalaDeriving.scala index 97d0a84f63..3f5cfe62a0 100644 --- a/src/main/scala/ir/dsl/ToScalaDeriving.scala +++ b/src/main/scala/ir/dsl/ToScalaDeriving.scala @@ -314,11 +314,7 @@ object ToScalaDeriving { .toList if (args.exists(Twine.shallowIsMultiline)) { - Twine.indentNested( - name + open, - args, - close - ) + Twine.indentNested(name + open, args, close) } else { Twine(name + open, Twine(args.intersperse(", ")), close) } diff --git a/src/main/scala/translating/GTIRBReadELF.scala b/src/main/scala/translating/GTIRBReadELF.scala index 1a5a69fde7..0fd063695f 100644 --- a/src/main/scala/translating/GTIRBReadELF.scala +++ b/src/main/scala/translating/GTIRBReadELF.scala @@ -19,7 +19,6 @@ import com.grammatech.gtirb.proto.Symbol.Symbol.OptionalPayload import scala.collection.mutable import scala.collection.immutable.SortedMap - object GTIRBReadELF { /** @@ -64,68 +63,66 @@ object GTIRBReadELF { val dataBlocksByUuid = (for { sec <- mod.sections.toList interval <- sec.byteIntervals - (b, innerb) <- interval.blocks.collect { - case b @ Block(_, Block.Value.Data(dat), _) => (b, dat) - // case b @ Block(_, Block.Value.Code(cod), _) => (b, cod) + (b, innerb) <- interval.blocks.collect { case b @ Block(_, Block.Value.Data(dat), _) => + (b, dat) + // case b @ Block(_, Block.Value.Code(cod), _) => (b, cod) } } yield innerb.uuid -> (innerb, b, interval, sec)).toMap val codeBlocksByUuid = (for { sec <- mod.sections.toList interval <- sec.byteIntervals - (b, innerb) <- interval.blocks.collect { - case b @ Block(_, Block.Value.Code(dat), _) => (b, dat) + (b, innerb) <- interval.blocks.collect { case b @ Block(_, Block.Value.Code(dat), _) => + (b, dat) } } yield innerb.uuid -> (innerb, b, interval, sec)).toMap - - val sectionsByName = mod.sections.map(x => x.name -> x).toMap val relaDyns = parseRelaTab(sectionsByName(".rela.dyn").byteIntervals.head.contents) val relaPlts = parseRelaTab(sectionsByName(".rela.plt").byteIntervals.head.contents) val symbolTabIdx = AuxDecoder.decodeAux(AuxDecoder.AuxKind.ElfSymbolTabIdxInfo)(mod) - val tabidx = symbolTabIdx.flatMap { - case (sym, idxs) => idxs.map(_ -> sym) - }.groupMapReduce(kv => kv.head.head)(kv => SortedMap(kv.head.last -> kv.last))(_++_) + val tabidx = symbolTabIdx + .flatMap { case (sym, idxs) => + idxs.map(_ -> sym) + } + .groupMapReduce(kv => kv.head.head)(kv => SortedMap(kv.head.last -> kv.last))(_ ++ _) // println(tabidx) val symbolKinds = decodeAux(AuxKind.ElfSymbolInfo)(mod) import scala.math.Ordering.Implicits.seqOrdering - val allSymbols = symbolKinds.map { - case (k, pos) => + val allSymbols = symbolKinds + .map { case (k, pos) => val sym = symbolsByUuid(k) val addr = for { uuid <- sym.optionalPayload.referentUuid - (_, block : Block, ival : ByteInterval, _) <- dataBlocksByUuid.get(uuid).orElse(codeBlocksByUuid.get(uuid)) + (_, block: Block, ival: ByteInterval, _) <- dataBlocksByUuid.get(uuid).orElse(codeBlocksByUuid.get(uuid)) } yield (block.offset + ival.address) val value = sym.optionalPayload._value.fold("")("val=" + _.toString) (symbolTabIdx(k), addr, pos) -> s"${sym.name} $value" - }.to(SortedMap) + } + .to(SortedMap) println(allSymbols.mkString("\n")) println() println(".rela.dyn") - relaDyns.foreach { - case x => - val symuuid = tabidx(".dynsym")(x.r_sym.toInt) - println(s"$x " + symbolsByUuid.get(symuuid).map(_.name).filter(_.nonEmpty)) + relaDyns.foreach { case x => + val symuuid = tabidx(".dynsym")(x.r_sym.toInt) + println(s"$x " + symbolsByUuid.get(symuuid).map(_.name).filter(_.nonEmpty)) } println(".rela.plt") - relaPlts.foreach { - case x => - val symuuid = tabidx(".dynsym")(x.r_sym.toInt) - println(s"$x " + symbolsByUuid.get(symuuid).map(_.name).filter(_.nonEmpty)) + relaPlts.foreach { case x => + val symuuid = tabidx(".dynsym")(x.r_sym.toInt) + println(s"$x " + symbolsByUuid.get(symuuid).map(_.name).filter(_.nonEmpty)) } - val specGlobals = symbolKinds.toList.collect { - case (uuid, (size, "OBJECT", "GLOBAL", "DEFAULT", idx)) => - val sym = symbolsByUuid(uuid) - val (data, block, interval, sec) = dataBlocksByUuid(sym.optionalPayload.referentUuid.get) - // assert(size == data.size) - assert(mod.sections(idx.toInt - 1) == sec) - (sym.name, size * 8, None, interval.address + block.offset) + val specGlobals = symbolKinds.toList.collect { case (uuid, (size, "OBJECT", "GLOBAL", "DEFAULT", idx)) => + val sym = symbolsByUuid(uuid) + val (data, block, interval, sec) = dataBlocksByUuid(sym.optionalPayload.referentUuid.get) + // assert(size == data.size) + assert(mod.sections(idx.toInt - 1) == sec) + (sym.name, size * 8, None, interval.address + block.offset) } println(specGlobals) @@ -150,7 +147,5 @@ object GTIRBReadELF { println(funentry) - - } } diff --git a/src/main/scala/translating/ReadELFLoader.scala b/src/main/scala/translating/ReadELFLoader.scala index 92786b3347..44a453d802 100644 --- a/src/main/scala/translating/ReadELFLoader.scala +++ b/src/main/scala/translating/ReadELFLoader.scala @@ -57,10 +57,7 @@ case class ReadELFData( ) derives ir.dsl.ToScala object ReadELFLoader { - def visitSyms( - ctx: SymsContext, - config: ILLoadingConfig - ): ReadELFData = { + def visitSyms(ctx: SymsContext, config: ILLoadingConfig): ReadELFData = { val externalFunctions = ctx.relocationTable.asScala .filter(_.relocationTableHeader != null) .flatMap(r => visitRelocationTableExtFunc(r)) diff --git a/src/main/scala/util/RunUtils.scala b/src/main/scala/util/RunUtils.scala index 5b1f927ef4..fd8210d311 100644 --- a/src/main/scala/util/RunUtils.scala +++ b/src/main/scala/util/RunUtils.scala @@ -199,10 +199,7 @@ object IRLoading { GTIRBConverter.createIR() } - def loadReadELF( - fileName: String, - config: ILLoadingConfig - ): ReadELFData = { + def loadReadELF(fileName: String, config: ILLoadingConfig): ReadELFData = { val lexer = ReadELFLexer(CharStreams.fromFileName(fileName)) val tokens = CommonTokenStream(lexer) val parser = ReadELFParser(tokens) From 3c45043c806e64e153a467dbe719a350df508169 Mon Sep 17 00:00:00 2001 From: rina Date: Sat, 21 Jun 2025 22:53:32 +1000 Subject: [PATCH 19/51] tehe revert me --- .scalafmt.conf | 2 + src/main/scala/analysis/GammaDomains.scala | 10 ++- src/main/scala/analysis/Lattice.scala | 61 ++++++++------- .../scala/analysis/LatticeCollections.scala | 50 +++++++----- .../ReachingDefinitionsAnalysis.scala | 15 ++-- src/main/scala/analysis/RegionInjector.scala | 5 +- .../analysis/VariableDependencyAnalysis.scala | 57 +++++++------- .../DataStructureAnalysis.scala | 32 ++++---- .../data_structure_analysis/Graph.scala | 25 +++--- .../data_structure_analysis/IntervalDSA.scala | 77 ++++++++++--------- .../SymbolicValueAnalysis.scala | 5 +- .../scala/analysis/solvers/IDESolver.scala | 34 ++++---- .../analysis/solvers/UnionFindSolver.scala | 7 +- src/main/scala/gtirb/AuxDecoder.scala | 16 ++-- src/main/scala/ir/IRCursor.scala | 29 +++---- src/main/scala/ir/Program.scala | 20 +++-- src/main/scala/ir/dsl/DSL.scala | 15 ++-- src/main/scala/ir/dsl/ToScala.scala | 5 +- src/main/scala/ir/eval/ExprEval.scala | 5 +- src/main/scala/ir/eval/InterpretBasilIR.scala | 5 +- src/main/scala/ir/eval/Interpreter.scala | 5 +- src/main/scala/ir/invariant/CFGCorrect.scala | 10 ++- .../ir/invariant/SingleCallBlockEnd.scala | 5 +- .../ir/parsing/BasilEarlyBNFCVisitor.scala | 23 +++--- .../ir/parsing/BasilMainBNFCVisitor.scala | 5 +- .../scala/ir/transforms/ExternalRemover.scala | 13 ++-- src/main/scala/ir/transforms/Inline.scala | 15 ++-- src/main/scala/ir/transforms/PCTracking.scala | 5 +- .../ir/transforms/ProcedureParameters.scala | 45 ++++++----- src/main/scala/ir/transforms/Simp.scala | 50 +++++++----- src/main/scala/translating/GTIRBReadELF.scala | 61 ++++++++------- src/main/scala/translating/GTIRBToIR.scala | 9 ++- src/main/scala/translating/IRToBoogie.scala | 15 ++-- .../scala/translating/IRToBoogieNoVC.scala | 10 ++- .../translating/SpecificationLoader.scala | 20 +++-- src/main/scala/util/RunUtils.scala | 10 ++- src/main/scala/util/Twine.scala | 23 +++--- src/main/scala/util/functional/List.scala | 5 +- src/test/scala/InterpretTestConstProp.scala | 5 +- src/test/scala/LiveVarsAnalysisTests.scala | 5 +- src/test/scala/ir/CILVisitorTest.scala | 5 +- src/test/scala/ir/IRTest.scala | 10 ++- src/test/scala/ir/IRToDSLTest.scala | 5 +- .../TestValueDomainWithInterpreter.scala | 10 ++- 44 files changed, 481 insertions(+), 363 deletions(-) diff --git a/.scalafmt.conf b/.scalafmt.conf index 196a3419f0..7914e1c341 100644 --- a/.scalafmt.conf +++ b/.scalafmt.conf @@ -6,3 +6,5 @@ indent.defnSite = 2 optIn.configStyleArguments = false align.preset = none docstrings.style = keep +newlines.beforeCurlyLambdaParams = multilineWithCaseOnly +newlines.afterCurlyLambdaParams = squash diff --git a/src/main/scala/analysis/GammaDomains.scala b/src/main/scala/analysis/GammaDomains.scala index 0cd82dfa33..30bf753c03 100644 --- a/src/main/scala/analysis/GammaDomains.scala +++ b/src/main/scala/analysis/GammaDomains.scala @@ -166,8 +166,9 @@ class PredicateDomain(summaries: Procedure => ProcedureSummary) extends Predicat case a: Assert => and(b, expectPredicate(a.body)).simplify case i: IndirectCall => top case c: DirectCall => - c.actualParams.foldLeft(Conj(summaries(c.target).requires.map(_.pred).toSet).simplify) { case (p, (v, e)) => - p.replace(BVTerm.Var(v), exprToBVTerm(e).get).replace(GammaTerm.Var(v), exprToGammaTerm(e).get).simplify + c.actualParams.foldLeft(Conj(summaries(c.target).requires.map(_.pred).toSet).simplify) { + case (p, (v, e)) => + p.replace(BVTerm.Var(v), exprToBVTerm(e).get).replace(GammaTerm.Var(v), exprToGammaTerm(e).get).simplify } case g: GoTo => b case r: Return => b @@ -232,8 +233,9 @@ class WpDualDomain(summaries: Procedure => ProcedureSummary) extends PredicateEn case a: Assert => or(b, not(expectPredicate(a.body))).simplify case i: IndirectCall => bot case c: DirectCall => - not(c.actualParams.foldLeft(Conj(summaries(c.target).requires.map(_.pred).toSet).simplify) { case (p, (v, e)) => - p.replace(BVTerm.Var(v), exprToBVTerm(e).get).replace(GammaTerm.Var(v), exprToGammaTerm(e).get).simplify + not(c.actualParams.foldLeft(Conj(summaries(c.target).requires.map(_.pred).toSet).simplify) { + case (p, (v, e)) => + p.replace(BVTerm.Var(v), exprToBVTerm(e).get).replace(GammaTerm.Var(v), exprToGammaTerm(e).get).simplify }) case g: GoTo => b case r: Return => b diff --git a/src/main/scala/analysis/Lattice.scala b/src/main/scala/analysis/Lattice.scala index a0b03f09b9..1dd3d33e62 100644 --- a/src/main/scala/analysis/Lattice.scala +++ b/src/main/scala/analysis/Lattice.scala @@ -200,8 +200,9 @@ class SASILattice extends Lattice[StridedWrappedInterval] { SIBottom } else { // create singleton intervals for each value and then join them - x.foldLeft(bottom) { case (acc, v) => - lub(acc, singletonSI(v, w)) + x.foldLeft(bottom) { + case (acc, v) => + lub(acc, singletonSI(v, w)) } } } @@ -286,10 +287,11 @@ class ValueSetLattice[T] extends Lattice[ValueSet[T]] { case (VSTop, _) => VSTop case (_, VSTop) => VSTop case (VS(m1), VS(m2)) => - VS(m1.keys.foldLeft(m2) { case (acc, k) => - val v1 = m1(k) - val v2 = m2(k) - acc + (k -> lattice.lub(v1, v2)) + VS(m1.keys.foldLeft(m2) { + case (acc, k) => + val v1 = m1(k) + val v2 = m2(k) + acc + (k -> lattice.lub(v1, v2)) }) } } @@ -379,10 +381,11 @@ class ValueSetLattice[T] extends Lattice[ValueSet[T]] { case (VSTop, _) => VSTop case (_, VSTop) => VSTop case (VS(m1), VS(m2)) => - VS(m1.keys.foldLeft(m2) { case (acc, k) => - val v1 = m1(k) - val v2 = m2(k) - acc + (k -> lattice.add(v1, v2)) + VS(m1.keys.foldLeft(m2) { + case (acc, k) => + val v1 = m1(k) + val v2 = m2(k) + acc + (k -> lattice.add(v1, v2)) }) } } @@ -392,8 +395,9 @@ class ValueSetLattice[T] extends Lattice[ValueSet[T]] { case VSBottom => VSBottom case VSTop => VSTop case VS(m) => - VS(m.map { case (k, s) => - k -> lattice.add(s, y.value, y.size) // TODO: is the size correct here? + VS(m.map { + case (k, s) => + k -> lattice.add(s, y.value, y.size) // TODO: is the size correct here? }) } } @@ -405,10 +409,11 @@ class ValueSetLattice[T] extends Lattice[ValueSet[T]] { case (VSBottom, t) => VSBottom case (t, VSBottom) => t case (VS(m1), VS(m2)) => - VS(m1.keys.foldLeft(m2) { case (acc, k) => - val v1 = m1(k) - val v2 = m2(k) - acc + (k -> lattice.sub(v1, v2)) + VS(m1.keys.foldLeft(m2) { + case (acc, k) => + val v1 = m1(k) + val v2 = m2(k) + acc + (k -> lattice.sub(v1, v2)) }) } } @@ -418,8 +423,9 @@ class ValueSetLattice[T] extends Lattice[ValueSet[T]] { case VSTop => VSTop case VSBottom => VSBottom case VS(m) => - VS(m.map { case (k, s) => - k -> lattice.sub(s, y.value, y.size) // TODO: is the size correct here? + VS(m.map { + case (k, s) => + k -> lattice.sub(s, y.value, y.size) // TODO: is the size correct here? }) } } @@ -445,8 +451,9 @@ class ValueSetLattice[T] extends Lattice[ValueSet[T]] { case VSBottom => VSBottom case VSTop => VSTop case VS(m) => - VS(m.map { case (k, SI(s, l, u, w)) => - k -> SI(s, lattice.lowestPossibleValue, u, w) + VS(m.map { + case (k, SI(s, l, u, w)) => + k -> SI(s, lattice.lowestPossibleValue, u, w) }) } } @@ -456,8 +463,9 @@ class ValueSetLattice[T] extends Lattice[ValueSet[T]] { case VSBottom => VSBottom case VSTop => VSTop case VS(m) => - VS(m.map { case (k, SI(s, l, u, w)) => - k -> SI(s, l, lattice.highestPossibleValue, w) + VS(m.map { + case (k, SI(s, l, u, w)) => + k -> SI(s, l, lattice.highestPossibleValue, w) }) } } @@ -546,10 +554,11 @@ class FlagLattice extends Lattice[Flag] { case (BOTTOM_Flag, t) => t case (t, BOTTOM_Flag) => t case (FlagMap(m1), FlagMap(m2)) => - FlagMap(m1.keys.foldLeft(m2) { case (acc, k) => - val v1 = m1(k) - val v2 = m2(k) - acc + (k -> lattice.lub(v1, v2)) + FlagMap(m1.keys.foldLeft(m2) { + case (acc, k) => + val v1 = m1(k) + val v2 = m2(k) + acc + (k -> lattice.lub(v1, v2)) }) } } diff --git a/src/main/scala/analysis/LatticeCollections.scala b/src/main/scala/analysis/LatticeCollections.scala index 3fe7a37949..be50e90304 100644 --- a/src/main/scala/analysis/LatticeCollections.scala +++ b/src/main/scala/analysis/LatticeCollections.scala @@ -225,16 +225,19 @@ private def latticeMapJoin[D, L]( case (Top(), _) => Top() case (Bottom(), b) => b case (TopMap(a), TopMap(b)) => - TopMap(a.foldLeft(b) { case (m, (k, v)) => - m + (k -> join(m.getOrElse(k, top), v)) + TopMap(a.foldLeft(b) { + case (m, (k, v)) => + m + (k -> join(m.getOrElse(k, top), v)) }) case (TopMap(a), BottomMap(b)) => - TopMap(b.foldLeft(a) { case (m, (k, v)) => - m + (k -> join(m.getOrElse(k, top), v)) + TopMap(b.foldLeft(a) { + case (m, (k, v)) => + m + (k -> join(m.getOrElse(k, top), v)) }) case (BottomMap(a), BottomMap(b)) => - BottomMap(a.foldLeft(b) { case (m, (k, v)) => - m + (k -> join(m.getOrElse(k, bottom), v)) + BottomMap(a.foldLeft(b) { + case (m, (k, v)) => + m + (k -> join(m.getOrElse(k, bottom), v)) }) case (a, b) => latticeMapJoin(b, a, join, top, bottom) } @@ -253,16 +256,19 @@ private def latticeMapMeet[D, L]( case (Top(), b) => b case (Bottom(), _) => Bottom() case (TopMap(a), TopMap(b)) => - TopMap(a.foldLeft(b) { case (m, (k, v)) => - m + (k -> meet(m.getOrElse(k, top), v)) + TopMap(a.foldLeft(b) { + case (m, (k, v)) => + m + (k -> meet(m.getOrElse(k, top), v)) }) case (TopMap(a), BottomMap(b)) => - BottomMap(a.foldLeft(b) { case (m, (k, v)) => - m + (k -> meet(m.getOrElse(k, bottom), v)) + BottomMap(a.foldLeft(b) { + case (m, (k, v)) => + m + (k -> meet(m.getOrElse(k, bottom), v)) }) case (BottomMap(a), BottomMap(b)) => - BottomMap(a.foldLeft(b) { case (m, (k, v)) => - m + (k -> meet(m.getOrElse(k, bottom), v)) + BottomMap(a.foldLeft(b) { + case (m, (k, v)) => + m + (k -> meet(m.getOrElse(k, bottom), v)) }) case (a, b) => latticeMapMeet(b, a, meet, top, bottom) } @@ -319,20 +325,24 @@ trait MapDomain[D, L] extends AbstractDomain[LatticeMap[D, L]] { case (Top(), _) => Top() case (_, Top()) => Top() case (BottomMap(a), BottomMap(b)) => - BottomMap(a.foldLeft(b) { case (m, (b, v)) => - m + (b -> widenTerm(m.getOrElse(b, botTerm), v, pos)) + BottomMap(a.foldLeft(b) { + case (m, (b, v)) => + m + (b -> widenTerm(m.getOrElse(b, botTerm), v, pos)) }) case (BottomMap(a), TopMap(b)) => - TopMap(a.foldLeft(b) { case (m, (b, v)) => - m + (b -> widenTerm(m.getOrElse(b, botTerm), v, pos)) + TopMap(a.foldLeft(b) { + case (m, (b, v)) => + m + (b -> widenTerm(m.getOrElse(b, botTerm), v, pos)) }) case (TopMap(a), BottomMap(b)) => - TopMap(b.foldLeft(a) { case (m, (a, v)) => - m + (a -> widenTerm(v, m.getOrElse(a, botTerm), pos)) + TopMap(b.foldLeft(a) { + case (m, (a, v)) => + m + (a -> widenTerm(v, m.getOrElse(a, botTerm), pos)) }) case (TopMap(a), TopMap(b)) => - TopMap(a.foldLeft(b) { case (m, (b, v)) => - m + (b -> widenTerm(m.getOrElse(b, botTerm), v, pos)) + TopMap(a.foldLeft(b) { + case (m, (b, v)) => + m + (b -> widenTerm(m.getOrElse(b, botTerm), v, pos)) }) } diff --git a/src/main/scala/analysis/ReachingDefinitionsAnalysis.scala b/src/main/scala/analysis/ReachingDefinitionsAnalysis.scala index 9894c63c54..84b1faa18f 100644 --- a/src/main/scala/analysis/ReachingDefinitionsAnalysis.scala +++ b/src/main/scala/analysis/ReachingDefinitionsAnalysis.scala @@ -42,8 +42,9 @@ trait ReachingDefinitionsAnalysis(program: Program) { vars: Set[Variable], s: (Map[Variable, Set[Assign]], Map[Variable, Set[Assign]]) ): (Map[Variable, Set[Assign]], Map[Variable, Set[Assign]]) = { - vars.foldLeft((s(0), Map.empty[Variable, Set[Assign]])) { case ((state, acc), v) => - (state, acc + (v -> state(v))) + vars.foldLeft((s(0), Map.empty[Variable, Set[Assign]])) { + case ((state, acc), v) => + (state, acc + (v -> state(v))) } } @@ -57,8 +58,9 @@ trait ReachingDefinitionsAnalysis(program: Program) { // for lhs, addOrReplace the definition val rhs = assign.rhs.variables val lhs = assign.lhs - val rhsUseDefs: Map[Variable, Set[Assign]] = rhs.foldLeft(Map.empty[Variable, Set[Assign]]) { case (acc, v) => - acc + (v -> s(0)(v)) + val rhsUseDefs: Map[Variable, Set[Assign]] = rhs.foldLeft(Map.empty[Variable, Set[Assign]]) { + case (acc, v) => + acc + (v -> s(0)(v)) } (s(0) + (lhs -> Set(assign)), rhsUseDefs) case assert: Assert => @@ -68,8 +70,9 @@ trait ReachingDefinitionsAnalysis(program: Program) { case memoryLoad: MemoryLoad => val lhs = memoryLoad.lhs val rhs = memoryLoad.index.variables - val rhsUseDefs: Map[Variable, Set[Assign]] = rhs.foldLeft(Map.empty[Variable, Set[Assign]]) { case (acc, v) => - acc + (v -> s(0)(v)) + val rhsUseDefs: Map[Variable, Set[Assign]] = rhs.foldLeft(Map.empty[Variable, Set[Assign]]) { + case (acc, v) => + acc + (v -> s(0)(v)) } (s(0) + (lhs -> Set(memoryLoad)), rhsUseDefs) case assume: Assume => diff --git a/src/main/scala/analysis/RegionInjector.scala b/src/main/scala/analysis/RegionInjector.scala index 8e4932a095..1cfc9c016c 100644 --- a/src/main/scala/analysis/RegionInjector.scala +++ b/src/main/scala/analysis/RegionInjector.scala @@ -188,8 +188,9 @@ class RegionInjectorMRA(override val program: Program, mmm: MemoryModelMap) exte } override def sharedRegions(): Iterable[MergedRegion] = { - mergedRegions.collect { case (_: DataRegion | _: HeapRegion, region: MergedRegion) => - region + mergedRegions.collect { + case (_: DataRegion | _: HeapRegion, region: MergedRegion) => + region } } } diff --git a/src/main/scala/analysis/VariableDependencyAnalysis.scala b/src/main/scala/analysis/VariableDependencyAnalysis.scala index 462ce3f0b3..bc026b5fbd 100644 --- a/src/main/scala/analysis/VariableDependencyAnalysis.scala +++ b/src/main/scala/analysis/VariableDependencyAnalysis.scala @@ -53,8 +53,9 @@ trait ProcVariableDependencyAnalysisFunctions( else d match { case Left(v) => - call.actualParams.toList.foldLeft(Map[DL, EdgeFunction[LatticeSet[Variable]]]()) { case (m, (inVar, expr)) => - if expr.variables.contains(v) then m + (Left(inVar) -> IdEdge()) else m + call.actualParams.toList.foldLeft(Map[DL, EdgeFunction[LatticeSet[Variable]]]()) { + case (m, (inVar, expr)) => + if expr.variables.contains(v) then m + (Left(inVar) -> IdEdge()) else m } case Right(_) => call.actualParams.toList.foldLeft(Map[DL, EdgeFunction[LatticeSet[Variable]]](d -> IdEdge())) { @@ -104,8 +105,9 @@ trait ProcVariableDependencyAnalysisFunctions( varDepsSummaries .get(call.target) .map(summary => { - summary.foldLeft(Map[DL, EdgeFunction[LatticeSet[Variable]]]()) { case (m, (outVar, deps)) => - if deps.contains(v) then m + (Left(outVar) -> IdEdge()) else m + summary.foldLeft(Map[DL, EdgeFunction[LatticeSet[Variable]]]()) { + case (m, (outVar, deps)) => + if deps.contains(v) then m + (Left(outVar) -> IdEdge()) else m } }) .getOrElse(Map()) @@ -127,36 +129,39 @@ trait ProcVariableDependencyAnalysisFunctions( val init: Map[DL, EdgeFunction[LatticeSet[Variable]]] = if call.outParams.exists(_._2 == v) then Map() else Map(d -> IdEdge()) - call.actualParams.foldLeft(init) { case (m, (inVar, expr)) => - if !expr.variables.contains(v) then m - else { - summary.foldLeft(m) { case (m, (endVar, deps)) => - endVar match { - case endVar: LocalVar if call.target.formalOutParam.contains(endVar) => { - if deps.contains(inVar) then m + (Left(call.outParams(endVar)) -> IdEdge()) - else m - } - case _ => m + call.actualParams.foldLeft(init) { + case (m, (inVar, expr)) => + if !expr.variables.contains(v) then m + else { + summary.foldLeft(m) { + case (m, (endVar, deps)) => + endVar match { + case endVar: LocalVar if call.target.formalOutParam.contains(endVar) => { + if deps.contains(inVar) then m + (Left(call.outParams(endVar)) -> IdEdge()) + else m + } + case _ => m + } } } - } } } case Right(_) => val initialise = call.outParams.foldLeft(Map[DL, EdgeFunction[LatticeSet[Variable]]](d -> IdEdge())) { case (m, (formalVar, resultVar)) => m + (Left(resultVar) -> ConstEdge(FiniteSet(Set()))) } - val ret = summary.foldLeft(initialise) { case (m, (endVar, deps)) => - endVar match { - case endVar: LocalVar if call.target.formalOutParam.contains(endVar) => - deps match { - case Top() | DiffSet(_) => m + (Left(call.outParams(endVar)) -> ConstEdge(Top())) - case FiniteSet(s) if s == Set() => - m + (Left(call.outParams(endVar)) -> ConstEdge(FiniteSet(Set()))) - case _ => m - } - case _ => m - } + val ret = summary.foldLeft(initialise) { + case (m, (endVar, deps)) => + endVar match { + case endVar: LocalVar if call.target.formalOutParam.contains(endVar) => + deps match { + case Top() | DiffSet(_) => m + (Left(call.outParams(endVar)) -> ConstEdge(Top())) + case FiniteSet(s) if s == Set() => + m + (Left(call.outParams(endVar)) -> ConstEdge(FiniteSet(Set()))) + case _ => m + } + case _ => m + } } ret } diff --git a/src/main/scala/analysis/data_structure_analysis/DataStructureAnalysis.scala b/src/main/scala/analysis/data_structure_analysis/DataStructureAnalysis.scala index 663d2b142d..c3958898fa 100644 --- a/src/main/scala/analysis/data_structure_analysis/DataStructureAnalysis.scala +++ b/src/main/scala/analysis/data_structure_analysis/DataStructureAnalysis.scala @@ -139,13 +139,14 @@ class DataStructureAnalysis( } // assert(calleeGraph.formals.isEmpty || buGraph.varToCell(begin(callee)).equals(calleeGraph.formals)) - calleeGraph.globalMapping.foreach { case (range: AddressRange, Field(node: Node, offset: BigInt)) => - val field = calleeGraph.find(node) - val res = buGraph.mergeCells( - buGraph.globalMapping(range).node.getCell(buGraph.globalMapping(range).offset), - field.node.getCell(field.offset + offset) - ) - buGraph.handleOverlapping(res) + calleeGraph.globalMapping.foreach { + case (range: AddressRange, Field(node: Node, offset: BigInt)) => + val field = calleeGraph.find(node) + val res = buGraph.mergeCells( + buGraph.globalMapping(range).node.getCell(buGraph.globalMapping(range).offset), + field.node.getCell(field.offset + offset) + ) + buGraph.handleOverlapping(res) } if (buGraph.varToCell.contains(callee)) { @@ -204,14 +205,15 @@ class DataStructureAnalysis( node.cloneNode(callersGraph, calleesGraph) } - callersGraph.globalMapping.foreach { case (range: AddressRange, Field(oldNode, internal)) => - // val node = callersGraph - val field = callersGraph.find(oldNode) - val res = calleesGraph.mergeCells( - calleesGraph.globalMapping(range).node.getCell(calleesGraph.globalMapping(range).offset), - field.node.getCell(field.offset + internal) - ) - calleesGraph.handleOverlapping(res) + callersGraph.globalMapping.foreach { + case (range: AddressRange, Field(oldNode, internal)) => + // val node = callersGraph + val field = callersGraph.find(oldNode) + val res = calleesGraph.mergeCells( + calleesGraph.globalMapping(range).node.getCell(calleesGraph.globalMapping(range).offset), + field.node.getCell(field.offset + internal) + ) + calleesGraph.handleOverlapping(res) } callSite.paramCells.keySet.foreach { variable => diff --git a/src/main/scala/analysis/data_structure_analysis/Graph.scala b/src/main/scala/analysis/data_structure_analysis/Graph.scala index 2ff14de7b6..a550756f56 100644 --- a/src/main/scala/analysis/data_structure_analysis/Graph.scala +++ b/src/main/scala/analysis/data_structure_analysis/Graph.scala @@ -943,18 +943,19 @@ class Graph(using Counter)( newGraph.stackMapping.update(offset, idToNode(node.id)) } - globalMapping.foreach { case (range: AddressRange, Field(node, offset)) => - assert(newGraph.globalMapping.contains(range)) - val cell: Cell = find(node.getCell(offset)) - val finalNode: Node = cell.node.get - nodes.add(finalNode) - if !idToNode.contains(finalNode.id) then - val newNode = finalNode.cloneSelf(newGraph) - idToNode.update(finalNode.id, newNode) - newGraph.globalMapping.update( - range, - Field(idToNode(finalNode.id), cell.offset + (offset - finalNode.getCell(offset).offset)) - ) + globalMapping.foreach { + case (range: AddressRange, Field(node, offset)) => + assert(newGraph.globalMapping.contains(range)) + val cell: Cell = find(node.getCell(offset)) + val finalNode: Node = cell.node.get + nodes.add(finalNode) + if !idToNode.contains(finalNode.id) then + val newNode = finalNode.cloneSelf(newGraph) + idToNode.update(finalNode.id, newNode) + newGraph.globalMapping.update( + range, + Field(idToNode(finalNode.id), cell.offset + (offset - finalNode.getCell(offset).offset)) + ) } val queue = mutable.Queue[Node]() diff --git a/src/main/scala/analysis/data_structure_analysis/IntervalDSA.scala b/src/main/scala/analysis/data_structure_analysis/IntervalDSA.scala index b89424c4b5..72e036431f 100644 --- a/src/main/scala/analysis/data_structure_analysis/IntervalDSA.scala +++ b/src/main/scala/analysis/data_structure_analysis/IntervalDSA.scala @@ -43,18 +43,19 @@ class IntervalGraph( symVal: SymValSet[DSInterval], current: Map[SymBase, IntervalNode] ): Map[SymBase, IntervalNode] = { - symVal.state.filter((base, _) => base != NonPointer).foldLeft(current) { case (result, (base, symOffsets)) => - val node = find(result.getOrElse(base, init(base, None))) - base match - case Heap(call) => node.flags.heap = true - case Stack(proc) => node.flags.stack = true - case Global => node.flags.global = true - case NonPointer => - throw new Exception("Attempted to create a node from an Non-pointer symbolic base") - case unknown: (Ret | Par | Loaded) => - node.flags.unknown = true - node.flags.incomplete = true - result + (base -> node) + symVal.state.filter((base, _) => base != NonPointer).foldLeft(current) { + case (result, (base, symOffsets)) => + val node = find(result.getOrElse(base, init(base, None))) + base match + case Heap(call) => node.flags.heap = true + case Stack(proc) => node.flags.stack = true + case Global => node.flags.global = true + case NonPointer => + throw new Exception("Attempted to create a node from an Non-pointer symbolic base") + case unknown: (Ret | Par | Loaded) => + node.flags.unknown = true + node.flags.incomplete = true + result + (base -> node) } } @@ -73,9 +74,10 @@ class IntervalGraph( globalNode.add(DSInterval(address.toInt, address.toInt)) // ignore size, could be a composite type } - globalOffsets.foreach { case (address, relocated) => - globalNode.add(address.toInt) - globalNode.add(relocated.toInt) + globalOffsets.foreach { + case (address, relocated) => + globalNode.add(address.toInt) + globalNode.add(relocated.toInt) } externalFunctions.foreach(e => @@ -85,10 +87,11 @@ class IntervalGraph( ext.node.flags.foreign = true ) - globalOffsets.map(_.swap).foreach { case (address, relocated) => - val pointee = find(globalNode.get(address.toInt)) - val pointer = find(globalNode).add(DSInterval(relocated.toInt, relocated.toInt + 8)) - pointer.setPointee(pointee) + globalOffsets.map(_.swap).foreach { + case (address, relocated) => + val pointee = find(globalNode.get(address.toInt)) + val pointer = find(globalNode).add(DSInterval(relocated.toInt, relocated.toInt + 8)) + pointer.setPointee(pointee) } globalNode @@ -97,8 +100,9 @@ class IntervalGraph( def buildNodes(): Map[SymBase, IntervalNode] = { val global = globalNode(irContext.globals ++ irContext.funcEntries, irContext.globalOffsets, irContext.externalFunctions) - sva.state.foldLeft(Map[SymBase, IntervalNode](Global -> global)) { case (m, (variable, valueSet)) => - symValToNodes(valueSet, m) + sva.state.foldLeft(Map[SymBase, IntervalNode](Global -> global)) { + case (m, (variable, valueSet)) => + symValToNodes(valueSet, m) } } @@ -166,14 +170,15 @@ class IntervalGraph( // returns the cells corresponding to the def symValToCells(symVal: SymValSet[DSInterval]): Set[IntervalCell] = { val pairs = symVal.state.filter((base, _) => base != NonPointer) - pairs.foldLeft(Set[IntervalCell]()) { case (results, (base: SymBase, offsets: DSInterval)) => - val (node, adjustment) = findNode(nodes(base)) - if offsets == Top then results + node.collapse() - else - results ++ offsets.toIntervals - .filter(i => base != Global || isGlobal(i.start.get)) - .map(_.move(i => i + adjustment)) - .map(node.add) + pairs.foldLeft(Set[IntervalCell]()) { + case (results, (base: SymBase, offsets: DSInterval)) => + val (node, adjustment) = findNode(nodes(base)) + if offsets == Top then results + node.collapse() + else + results ++ offsets.toIntervals + .filter(i => base != Global || isGlobal(i.start.get)) + .map(_.move(i => i + adjustment)) + .map(node.add) } } @@ -923,17 +928,19 @@ object IntervalDSA { cons.inParams .filterNot(f => unchanged.exists(i => f._1.name.startsWith(i))) .filter(f => cons.target.formalInParam.contains(f._1)) - .foreach { case (formal, actual) => - val (sourceExpr, targetExpr) = if phase == TD then (actual, formal) else (formal, actual) - exprTransfer(sourceExpr, targetExpr, source, target, oldToNew) + .foreach { + case (formal, actual) => + val (sourceExpr, targetExpr) = if phase == TD then (actual, formal) else (formal, actual) + exprTransfer(sourceExpr, targetExpr, source, target, oldToNew) } cons.outParams .filterNot(f => unchanged.exists(i => f._1.name.startsWith(i))) .filter(f => cons.target.formalOutParam.contains(f._1)) - .foreach { case (out, actual) => - val (sourceExpr, targetExpr) = if phase == TD then (actual, out) else (out, actual) - exprTransfer(sourceExpr, targetExpr, source, target, oldToNew) + .foreach { + case (out, actual) => + val (sourceExpr, targetExpr) = if phase == TD then (actual, out) else (out, actual) + exprTransfer(sourceExpr, targetExpr, source, target, oldToNew) } // TODO add unification between unused indirect call out params and their corresponding input version } diff --git a/src/main/scala/analysis/data_structure_analysis/SymbolicValueAnalysis.scala b/src/main/scala/analysis/data_structure_analysis/SymbolicValueAnalysis.scala index 73ce45b927..c142898225 100644 --- a/src/main/scala/analysis/data_structure_analysis/SymbolicValueAnalysis.scala +++ b/src/main/scala/analysis/data_structure_analysis/SymbolicValueAnalysis.scala @@ -433,8 +433,9 @@ class SymValuesDomain[T <: Offsets](using symValSetDomain: SymValSetDomain[T]) e join(a, retInitSymValSet, block) case ind: IndirectCall => a // TODO possibly map every live variable to top case ret: Return => - val update = SymValues(ret.outParams.map { case (outVar: LocalVar, value: Expr) => - outVar -> SymValues.exprToSymValSet(a)(value) + val update = SymValues(ret.outParams.map { + case (outVar: LocalVar, value: Expr) => + outVar -> SymValues.exprToSymValSet(a)(value) }) join(a, update, block) diff --git a/src/main/scala/analysis/solvers/IDESolver.scala b/src/main/scala/analysis/solvers/IDESolver.scala index efcad7b1f3..3e72d2c69f 100644 --- a/src/main/scala/analysis/solvers/IDESolver.scala +++ b/src/main/scala/analysis/solvers/IDESolver.scala @@ -153,17 +153,18 @@ abstract class IDESolver[ this.analyze() val res = mutable.Map[Procedure, mutable.Map[DL, mutable.Map[DL, EdgeFunction[T]]]]() - x.foreach { case ((n, d1, d2), e) => - if (isExit(n)) { - val exit: EE = n.asInstanceOf[EE] - val proc = IRWalk.procedure(exit) - val m1 = res.getOrElseUpdate( - proc, - mutable.Map[DL, mutable.Map[DL, EdgeFunction[T]]]().withDefaultValue(mutable.Map[DL, EdgeFunction[T]]()) - ) - val m2 = m1.getOrElseUpdate(d1, mutable.Map[DL, EdgeFunction[T]]()) - m2 += d2 -> e - } + x.foreach { + case ((n, d1, d2), e) => + if (isExit(n)) { + val exit: EE = n.asInstanceOf[EE] + val proc = IRWalk.procedure(exit) + val m1 = res.getOrElseUpdate( + proc, + mutable.Map[DL, mutable.Map[DL, EdgeFunction[T]]]().withDefaultValue(mutable.Map[DL, EdgeFunction[T]]()) + ) + val m2 = m1.getOrElseUpdate(d1, mutable.Map[DL, EdgeFunction[T]]()) + m2 += d2 -> e + } } Logger.debug(s"Function summaries:\n${res .map { (f, s) => @@ -238,11 +239,12 @@ abstract class IDESolver[ /** Restructures the analysis output to match `restructuredlattice`. */ def restructure(y: lattice.Element): restructuredlattice.Element = { - y.foldLeft(Map[CFGPosition, Map[D, valuelattice.Element]]()) { case (acc, ((n, dl), e)) => - dl match { - case Left(d) => acc + (n -> (acc.getOrElse(n, Map[D, valuelattice.Element]()) + (d -> e))) - case _ => acc - } + y.foldLeft(Map[CFGPosition, Map[D, valuelattice.Element]]()) { + case (acc, ((n, dl), e)) => + dl match { + case Left(d) => acc + (n -> (acc.getOrElse(n, Map[D, valuelattice.Element]()) + (d -> e))) + case _ => acc + } } } } diff --git a/src/main/scala/analysis/solvers/UnionFindSolver.scala b/src/main/scala/analysis/solvers/UnionFindSolver.scala index 4012a0d724..44906cec5c 100644 --- a/src/main/scala/analysis/solvers/UnionFindSolver.scala +++ b/src/main/scala/analysis/solvers/UnionFindSolver.scala @@ -37,9 +37,10 @@ class UnionFindSolver[A] { mkUnion(v2, t1) case (f1: Cons[A], f2: Cons[A]) if f1.doMatch(f2) => mkUnion(f1, f2) - f1.args.zip(f2.args).foreach { case (a1, a2) => - Logger.debug(s"Unifying subterms $a1 and $a2") - unify(a1, a2) + f1.args.zip(f2.args).foreach { + case (a1, a2) => + Logger.debug(s"Unifying subterms $a1 and $a2") + unify(a1, a2) } case (x, y) => throw new UnificationFailure(s"Cannot unify $t1 and $t2 (with representatives $x and $y)") diff --git a/src/main/scala/gtirb/AuxDecoder.scala b/src/main/scala/gtirb/AuxDecoder.scala index 37ae54ee64..2d1a52bbab 100644 --- a/src/main/scala/gtirb/AuxDecoder.scala +++ b/src/main/scala/gtirb/AuxDecoder.scala @@ -54,17 +54,19 @@ object AuxDecoder { val numBytes = numBits / 8 require(numBytes * 8 == numBits, "requires multiple of 8") - readBytes(numBytes)(bs).foldRight(BigInt(0)) { case (x, acc) => - val n = x.toInt - acc * 256 + (if (!signed && n < 0) then n + 256 else n) + readBytes(numBytes)(bs).foldRight(BigInt(0)) { + case (x, acc) => + val n = x.toInt + acc * 256 + (if (!signed && n < 0) then n + 256 else n) } def readMap[K, V](keyReader: Reader[K], valReader: Reader[V])(bs: Input) = val len = readUint(64)(bs) - (BigInt(0) until len).map { case _ => - val k = keyReader(bs) - val v = valReader(bs) - k -> v + (BigInt(0) until len).map { + case _ => + val k = keyReader(bs) + val v = valReader(bs) + k -> v }.toMap def readSet[K, V](valReader: Reader[V])(bs: Input) = diff --git a/src/main/scala/ir/IRCursor.scala b/src/main/scala/ir/IRCursor.scala index 0e8b22379c..7f8a59882a 100644 --- a/src/main/scala/ir/IRCursor.scala +++ b/src/main/scala/ir/IRCursor.scala @@ -318,8 +318,9 @@ def getDetachedBlocks(p: Procedure) = { def dotBlockGraph(proc: Procedure): String = { val o = getDetachedBlocks(proc) dotBlockGraph( - proc.collect { case b: Block => - b + proc.collect { + case b: Block => + b }, o.reachableFromBlockEmptyPred ) @@ -329,8 +330,9 @@ def dotBlockGraph(prog: Program): String = { val e = prog.procedures.toSet.flatMap(getDetachedBlocks(_).reachableFromBlockEmptyPred) dotBlockGraph( - prog.collect { case b: Block => - b + prog.collect { + case b: Block => + b }, e ) @@ -338,15 +340,16 @@ def dotBlockGraph(prog: Program): String = { def dotBlockGraph(blocks: Iterable[Block], orphaned: Set[Block]): String = { val printer = translating.BasilIRPrettyPrinter() - val labels: Map[CFGPosition, String] = (blocks.collect { case b: Block => - b -> { - (b.statements.toList.map(printer.apply(_) + ";") ++ { - b.jump match { - case g: GoTo => List() - case o => List(printer(o) + ";") - } - }).map(" " + _).mkString("\n") - } + val labels: Map[CFGPosition, String] = (blocks.collect { + case b: Block => + b -> { + (b.statements.toList.map(printer.apply(_) + ";") ++ { + b.jump match { + case g: GoTo => List() + case o => List(printer(o) + ";") + } + }).map(" " + _).mkString("\n") + } }).toMap toDot[Block](blocks.toSet, IntraProcBlockIRCursor, labels, orphaned) diff --git a/src/main/scala/ir/Program.scala b/src/main/scala/ir/Program.scala index b40986bbdc..afdfdd0991 100644 --- a/src/main/scala/ir/Program.scala +++ b/src/main/scala/ir/Program.scala @@ -78,8 +78,9 @@ class Program( } val t = toMap(this) val o = toMap(p) - (mainProcedure.name == p.mainProcedure.name) && (t.keys == o.keys) && t.keys.forall { case k => - t(k).deepEquals(o(k)) + (mainProcedure.name == p.mainProcedure.name) && (t.keys == o.keys) && t.keys.forall { + case k => + t(k).deepEquals(o(k)) } } @@ -326,8 +327,9 @@ class Procedure private ( } private def deepEqualsProc(p: Procedure) = { name == p.name && (p.blocks.size == blocks.size) && { - p.blocksBookended.zip(blocksBookended).forall { case ((l: Block), (r: Block)) => - l.deepEqualsDbg(r) + p.blocksBookended.zip(blocksBookended).forall { + case ((l: Block), (r: Block)) => + l.deepEqualsDbg(r) } } } @@ -565,8 +567,9 @@ class Block private ( case o => false } private def deepEqualsBlock(b: Block): Boolean = { - (label == b.label) && statements.zip(b.statements).forall { case (l, r) => - l.deepEqualsDbg(r) + (label == b.label) && statements.zip(b.statements).forall { + case (l, r) => + l.deepEqualsDbg(r) } } @@ -609,8 +612,9 @@ class Block private ( assert(!incomingJumps.contains(g)) } - def calls: Set[Procedure] = statements.toSet.collect { case d: DirectCall => - d.target + def calls: Set[Procedure] = statements.toSet.collect { + case d: DirectCall => + d.target } def modifies: Set[Global] = statements.flatMap(_.modifies).toSet diff --git a/src/main/scala/ir/dsl/DSL.scala b/src/main/scala/ir/dsl/DSL.scala index d8bf2eab48..ccdc92d425 100644 --- a/src/main/scala/ir/dsl/DSL.scala +++ b/src/main/scala/ir/dsl/DSL.scala @@ -244,8 +244,9 @@ case class EventuallyBlock( override def deepEquals(o: Object) = o match { case EventuallyBlock(`label`, osl, oj, `address`) => - j.deepEquals(oj) && sl.size == osl.size && osl.toList.zip(sl).forall { case (l, r) => - l.deepEquals(r) + j.deepEquals(oj) && sl.size == osl.size && osl.toList.zip(sl).forall { + case (l, r) => + l.deepEquals(r) } case _ => false @@ -315,8 +316,9 @@ case class EventuallyProcedure( override def deepEquals(o: Object) = o match { case EventuallyProcedure(`label`, `in`, `out`, b, `entryBlockLabel`, `returnBlockLabel`, `address`) => { b.size == blocks.size && { - b.zip(blocks).forall { case (l, r) => - l.deepEquals(r) + b.zip(blocks).forall { + case (l, r) => + l.deepEquals(r) } } } @@ -406,8 +408,9 @@ case class EventuallyProgram( override def deepEquals(o: Object) = o match { case EventuallyProgram(mp, op, im) => { - mp.deepEquals(mainProcedure) && op.size == otherProcedures.size && op.zip(otherProcedures).forall { case (l, r) => - l.deepEquals(r) + mp.deepEquals(mainProcedure) && op.size == otherProcedures.size && op.zip(otherProcedures).forall { + case (l, r) => + l.deepEquals(r) } } } diff --git a/src/main/scala/ir/dsl/ToScala.scala b/src/main/scala/ir/dsl/ToScala.scala index dfe3fcf8c2..bb454b7138 100644 --- a/src/main/scala/ir/dsl/ToScala.scala +++ b/src/main/scala/ir/dsl/ToScala.scala @@ -94,8 +94,9 @@ given [T](using ToScala[T]): ToScalaLines[Set[T]] with given [K, V](using ToScala[K], ToScala[V]): ToScalaLines[Map[K, V]] with extension (x: Map[K, V]) def toScalaLines = - val pairs = x.map { case (k, v) => - Twine(k.toScalaLines, " -> ", v.toScalaLines) + val pairs = x.map { + case (k, v) => + Twine(k.toScalaLines, " -> ", v.toScalaLines) } Twine.indentNested("Map(", pairs, ")") diff --git a/src/main/scala/ir/eval/ExprEval.scala b/src/main/scala/ir/eval/ExprEval.scala index e60e025260..b0871cbe9e 100644 --- a/src/main/scala/ir/eval/ExprEval.scala +++ b/src/main/scala/ir/eval/ExprEval.scala @@ -293,8 +293,9 @@ def statePartialEvalExpr[S](l: Loader[S, InterpreterError])(exp: Expr): State[S, } State.protect( () => ns, - { case e => - Errored(e.toString) + { + case e => + Errored(e.toString) }: PartialFunction[Exception, InterpreterError] ) diff --git a/src/main/scala/ir/eval/InterpretBasilIR.scala b/src/main/scala/ir/eval/InterpretBasilIR.scala index 5c71247bd2..fa2acf8d66 100644 --- a/src/main/scala/ir/eval/InterpretBasilIR.scala +++ b/src/main/scala/ir/eval/InterpretBasilIR.scala @@ -329,8 +329,9 @@ object InterpFuns { f.setNext(Run(IRWalk.firstInBlock(gt.targets.head))) } case gt: GoTo => - val assumes = gt.targets.flatMap(_.statements.headOption).collect { case a: Assume => - a + val assumes = gt.targets.flatMap(_.statements.headOption).collect { + case a: Assume => + a } for { _ <- diff --git a/src/main/scala/ir/eval/Interpreter.scala b/src/main/scala/ir/eval/Interpreter.scala index dfae63a5c7..4c2a5e072a 100644 --- a/src/main/scala/ir/eval/Interpreter.scala +++ b/src/main/scala/ir/eval/Interpreter.scala @@ -192,8 +192,9 @@ case class MemoryState( /** Debug return useful values * */ def getGlobalVals: Map[String, BitVecLiteral] = { - stackFrames(globalFrame).collect { case (k, Scalar(b: BitVecLiteral)) => - k -> b + stackFrames(globalFrame).collect { + case (k, Scalar(b: BitVecLiteral)) => + k -> b } } diff --git a/src/main/scala/ir/invariant/CFGCorrect.scala b/src/main/scala/ir/invariant/CFGCorrect.scala index 8187ad6e6b..b4ec94a286 100644 --- a/src/main/scala/ir/invariant/CFGCorrect.scala +++ b/src/main/scala/ir/invariant/CFGCorrect.scala @@ -6,14 +6,16 @@ import scala.collection.mutable def cfgCorrect(p: Program | Procedure) = { - val forwardsInter = p.collect { case d @ DirectCall(tgt, _, _, _) => - (d.parent.parent, tgt) + val forwardsInter = p.collect { + case d @ DirectCall(tgt, _, _, _) => + (d.parent.parent, tgt) } val revForwardsInter = forwardsInter.groupBy(_._2).map((dest, origs) => (dest, origs.map(_._1).toSet)).toMap val forwardsInterMap = forwardsInter.groupBy(_._1).map((orig, dests) => (orig, dests.map(_._2).toSet)).toMap - val forwardsIntra = p.collect { case g @ GoTo(targets, _) => - targets.map((t: Block) => (g.parent, t)) + val forwardsIntra = p.collect { + case g @ GoTo(targets, _) => + targets.map((t: Block) => (g.parent, t)) }.flatten val revForwardsIntra = forwardsIntra.groupBy(_._2).map((dest, origs) => (dest, origs.map(_._1).toSet)).toMap diff --git a/src/main/scala/ir/invariant/SingleCallBlockEnd.scala b/src/main/scala/ir/invariant/SingleCallBlockEnd.scala index 1215fde191..f99a1f3b3f 100644 --- a/src/main/scala/ir/invariant/SingleCallBlockEnd.scala +++ b/src/main/scala/ir/invariant/SingleCallBlockEnd.scala @@ -4,8 +4,9 @@ import ir.* def singleCallBlockEnd(p: Program): Boolean = { p.forall { case b: Block => { - val calls = (b.statements.collect { case c: Call => - b.statements.lastOption.contains(c) + val calls = (b.statements.collect { + case c: Call => + b.statements.lastOption.contains(c) }) (calls.size <= 1) && calls.headOption.getOrElse(true) } diff --git a/src/main/scala/ir/parsing/BasilEarlyBNFCVisitor.scala b/src/main/scala/ir/parsing/BasilEarlyBNFCVisitor.scala index 296ab8f616..370816437d 100644 --- a/src/main/scala/ir/parsing/BasilEarlyBNFCVisitor.scala +++ b/src/main/scala/ir/parsing/BasilEarlyBNFCVisitor.scala @@ -64,17 +64,18 @@ case class BasilEarlyBNFCVisitor[A]() // Members declared in Program.Visitor override def visit(x: syntax.Prog, arg: A) = - x.listdeclaration_.asScala.foldLeft(Declarations.empty) { case (decls, x) => - try { - decls.merge(x.accept(this, arg)) - } catch { - case e: IllegalArgumentException => - throw ParseException( - "encountered duplicate declarations with the same name", - x.asInstanceOf[HasParsePosition], - e - ) - } + x.listdeclaration_.asScala.foldLeft(Declarations.empty) { + case (decls, x) => + try { + decls.merge(x.accept(this, arg)) + } catch { + case e: IllegalArgumentException => + throw ParseException( + "encountered duplicate declarations with the same name", + x.asInstanceOf[HasParsePosition], + e + ) + } } // Members declared in MExpr.Visitor diff --git a/src/main/scala/ir/parsing/BasilMainBNFCVisitor.scala b/src/main/scala/ir/parsing/BasilMainBNFCVisitor.scala index ab149f837a..00bb5a8c90 100644 --- a/src/main/scala/ir/parsing/BasilMainBNFCVisitor.scala +++ b/src/main/scala/ir/parsing/BasilMainBNFCVisitor.scala @@ -139,8 +139,9 @@ case class InnerBasilBNFCVisitor[A]( // Members declared in CallLVars.Visitor override def visit(x: syntax.NoOutParams, arg: A): BasilParseValue = Nil override def visit(x: syntax.LocalVars, arg: A): BasilParseValue = - val innerlocals = x.listlvar_.asScala.collect { case x: syntax.LVarDef => - x + val innerlocals = x.listlvar_.asScala.collect { + case x: syntax.LVarDef => + x }.toList if (innerlocals.nonEmpty) { throw ParseException( diff --git a/src/main/scala/ir/transforms/ExternalRemover.scala b/src/main/scala/ir/transforms/ExternalRemover.scala index 73c63751a0..73740b6f45 100644 --- a/src/main/scala/ir/transforms/ExternalRemover.scala +++ b/src/main/scala/ir/transforms/ExternalRemover.scala @@ -3,11 +3,12 @@ import ir.* import cilvisitor.* def removeBodyOfExternal(external: Set[String])(prog: Program) = { - prog.procedures.foreach { case p => - if (external.contains(p.procName)) { - // update the modifies set before removing the body - p.modifies.addAll(p.blocks.flatMap(_.modifies)) - p.replaceBlocks(Seq()) - } + prog.procedures.foreach { + case p => + if (external.contains(p.procName)) { + // update the modifies set before removing the body + p.modifies.addAll(p.blocks.flatMap(_.modifies)) + p.replaceBlocks(Seq()) + } } } diff --git a/src/main/scala/ir/transforms/Inline.scala b/src/main/scala/ir/transforms/Inline.scala index 93a47c5390..4ea29f889e 100644 --- a/src/main/scala/ir/transforms/Inline.scala +++ b/src/main/scala/ir/transforms/Inline.scala @@ -60,8 +60,9 @@ def convertJumpRenaming(blockName: String => String, varName: CILVisitor, x: Jum case Return(label, out) => EventuallyReturn( out.toList - .map { case (v: Variable, e: Expr) => - ((v.name, visit_expr(varName, e))) + .map { + case (v: Variable, e: Expr) => + ((v.name, visit_expr(varName, e))) } .to(ArraySeq), label @@ -150,11 +151,13 @@ def inlineCall(prog: Program, c: DirectCall): Unit = { case r: EventuallyReturn => r.params.toMap case _ => throw Exception("returnblock should have a return statement") } - val outAssignments = c.outParams.map { case (formal: LocalVar, lvar: Variable) => - LocalAssign(lvar, targetReturnValues(formal.name)) + val outAssignments = c.outParams.map { + case (formal: LocalVar, lvar: Variable) => + LocalAssign(lvar, targetReturnValues(formal.name)) } - val inAssignments = c.actualParams.map { case (formal: LocalVar, actual: Expr) => - LocalAssign(visit_rvar(varRenamer, formal), actual) + val inAssignments = c.actualParams.map { + case (formal: LocalVar, actual: Expr) => + LocalAssign(visit_rvar(varRenamer, formal), actual) } afterCallBlock.statements.prependAll(outAssignments) entryTempBlock.statements.prependAll(inAssignments) diff --git a/src/main/scala/ir/transforms/PCTracking.scala b/src/main/scala/ir/transforms/PCTracking.scala index f63d9720df..ca3dabac42 100644 --- a/src/main/scala/ir/transforms/PCTracking.scala +++ b/src/main/scala/ir/transforms/PCTracking.scala @@ -26,8 +26,9 @@ object PCTracking { case PCTrackingOption.Keep => Logger.info(s"[!] Removing PC-tracking assertion statements, keeping PC assignments") - program.collect { case x @ Assert(_, _, Some("pc-tracking")) => - x.parent.statements.remove(x) + program.collect { + case x @ Assert(_, _, Some("pc-tracking")) => + x.parent.statements.remove(x) } case PCTrackingOption.Assert => Logger.info(s"[!] Inserting PC-tracking requires/ensures") diff --git a/src/main/scala/ir/transforms/ProcedureParameters.scala b/src/main/scala/ir/transforms/ProcedureParameters.scala index 971c19252e..4b4b3fcf26 100644 --- a/src/main/scala/ir/transforms/ProcedureParameters.scala +++ b/src/main/scala/ir/transforms/ProcedureParameters.scala @@ -127,18 +127,21 @@ def liftProcedureCallAbstraction(ctx: util.IRContext): util.IRContext = { transforms.applyRPO(ctx.program) val liveLab = () => - liveVars.collect { case (b: Block, r) => - b -> { - val live = r.toList.collect { case (v, TwoElementTop) => - v - } - val dead = r.toList.collect { case (v, TwoElementBottom) => - v + liveVars.collect { + case (b: Block, r) => + b -> { + val live = r.toList.collect { + case (v, TwoElementTop) => + v + } + val dead = r.toList.collect { + case (v, TwoElementBottom) => + v + } + val livel = live.map(_.name).toList.sorted.mkString(", ") + // val deadl = dead.map(_.name).toList.sorted.mkString(", ") + s"Live: $livel" } - val livel = live.map(_.name).toList.sorted.mkString(", ") - // val deadl = dead.map(_.name).toList.sorted.mkString(", ") - s"Live: $livel" - } }.toMap DebugDumpIRLogger.writeToFile( @@ -200,8 +203,9 @@ def collectVariables(p: Procedure): (Set[Variable], Set[Variable]) = { } })) ++ p.blocks .map(_.jump) - .collect { case r: Return => - r.outParams.toSet.map(_._1) + .collect { + case r: Return => + r.outParams.toSet.map(_._1) } .flatten val rvars = p.blocks.toSet.flatMap(_.statements.flatMap(s => { @@ -373,8 +377,9 @@ def inOutParams( case (p, Some(x)) => (p, ReadWriteAnalysis.onlyGlobal(x)) } - val procEnd = p.procedures.map { case p => - p -> p.returnBlock.getOrElse(p) + val procEnd = p.procedures.map { + case p => + p -> p.returnBlock.getOrElse(p) }.toMap val lives: Map[Procedure, (Set[Variable], Set[Variable])] = p.procedures @@ -383,8 +388,9 @@ def inOutParams( def toLiveSet(p: Option[Map[Variable, TwoElement]]): Set[Variable] = { p.map(p => { - p.collect { case (v, TwoElementTop) => - v + p.collect { + case (v, TwoElementTop) => + v }.toSet }).getOrElse(overapprox) } @@ -445,8 +451,9 @@ def inOutParams( val origIn = oldParams(proc)._1 val origOut = oldParams(proc)._2 - val calls = proc.collect { case c: DirectCall => - c + val calls = proc.collect { + case c: DirectCall => + c } val modifiedFromCall = diff --git a/src/main/scala/ir/transforms/Simp.scala b/src/main/scala/ir/transforms/Simp.scala index 8d294f8644..a8ef443927 100644 --- a/src/main/scala/ir/transforms/Simp.scala +++ b/src/main/scala/ir/transforms/Simp.scala @@ -199,8 +199,9 @@ def removeSlices(p: Procedure): Unit = { .flatten .groupBy(_._1) .map((k, v) => (k, v.map(_._2).toSet)) - .collect { case (k: LocalVar, v) => - (k, v) + .collect { + case (k: LocalVar, v) => + (k, v) } enum HighZeroBits: case Bits(n: Int) // (i) and (ii) hold; the n highest bits are redundant @@ -261,8 +262,9 @@ def removeSlices(p: Procedure): Unit = { } lhs -> varHighZeroBits.get(rep) }) - .collect { case (l, Some(x)) /* remove anything we have no information on */ => - (l, x) + .collect { + case (l, Some(x)) /* remove anything we have no information on */ => + (l, x) } class CheckUsesHaveExtend() extends CILVisitor { val result: mutable.HashMap[LocalVar, HighZeroBits] = @@ -286,8 +288,9 @@ def removeSlices(p: Procedure): Unit = { result.toMap } } - val toSmallen = CheckUsesHaveExtend()(varsWithExtend)(p).collect { case (v, HighZeroBits.Bits(x)) => - v -> x + val toSmallen = CheckUsesHaveExtend()(varsWithExtend)(p).collect { + case (v, HighZeroBits.Bits(x)) => + v -> x }.toMap class ReplaceAlwaysSlicedVars(varHighZeroBits: Map[LocalVar, Int]) extends CILVisitor { override def vexpr(v: Expr) = { @@ -406,8 +409,9 @@ def getRedundantAssignments(procedure: Procedure): Set[Assign] = { var removeOld = toRemove val r = toRemove - .collect { case (v, VS.Assigned(d)) => - d + .collect { + case (v, VS.Assigned(d)) => + d } .toSet .flatten @@ -598,12 +602,14 @@ class GuardVisitor(validate: Boolean = false) extends CILVisitor { var defs = Map[Variable, Set[Assign]]() def allDefinitions(p: Procedure): Map[Variable, Set[Assign]] = { - p.collect { case a: Assign => - a.assignees.map(l => l -> a) + p.collect { + case a: Assign => + a.assignees.map(l => l -> a) }.flatten .groupBy(_._1) - .map { case (v, ass) => - v -> ass.map(_._2).toSet + .map { + case (v, ass) => + v -> ass.map(_._2).toSet } } @@ -1438,8 +1444,9 @@ object CopyProp { def replaceVar(lhs: Variable, rhs: Option[Expr] = None) = { st = st - .filterNot { case (l, r) => - r.variables.contains(lhs) || l == lhs + .filterNot { + case (l, r) => + r.variables.contains(lhs) || l == lhs } rhs.foreach(nrhs => st = st.updated(lhs, nrhs)) } @@ -1858,11 +1865,13 @@ def findDefinitelyExits(p: Program) = { val solve = interprocSummaryFixpointSolver(ldom, dom) val res = solve.solveProgInterProc(p, true) ProcReturnInfo( - res.collect { case (p, PathExit.Return) => - p + res.collect { + case (p, PathExit.Return) => + p }.toSet, - res.collect { case (p, PathExit.NoReturn) => - p + res.collect { + case (p, PathExit.NoReturn) => + p }.toSet ) } @@ -1982,8 +1991,9 @@ def fixupGuards(p: Procedure): Unit = { def removeDuplicateGuard(b: Iterable[Block]): Unit = { b.foreach { case block: Block if IRWalk.firstInBlock(block).isInstanceOf[Assume] => { - val assumes = block.statements.collect { case a: Assume => - a + val assumes = block.statements.collect { + case a: Assume => + a }.toList val chosen = assumes.head.body diff --git a/src/main/scala/translating/GTIRBReadELF.scala b/src/main/scala/translating/GTIRBReadELF.scala index 0fd063695f..67c13c0838 100644 --- a/src/main/scala/translating/GTIRBReadELF.scala +++ b/src/main/scala/translating/GTIRBReadELF.scala @@ -63,17 +63,19 @@ object GTIRBReadELF { val dataBlocksByUuid = (for { sec <- mod.sections.toList interval <- sec.byteIntervals - (b, innerb) <- interval.blocks.collect { case b @ Block(_, Block.Value.Data(dat), _) => - (b, dat) - // case b @ Block(_, Block.Value.Code(cod), _) => (b, cod) + (b, innerb) <- interval.blocks.collect { + case b @ Block(_, Block.Value.Data(dat), _) => + (b, dat) + // case b @ Block(_, Block.Value.Code(cod), _) => (b, cod) } } yield innerb.uuid -> (innerb, b, interval, sec)).toMap val codeBlocksByUuid = (for { sec <- mod.sections.toList interval <- sec.byteIntervals - (b, innerb) <- interval.blocks.collect { case b @ Block(_, Block.Value.Code(dat), _) => - (b, dat) + (b, innerb) <- interval.blocks.collect { + case b @ Block(_, Block.Value.Code(dat), _) => + (b, dat) } } yield innerb.uuid -> (innerb, b, interval, sec)).toMap @@ -83,8 +85,9 @@ object GTIRBReadELF { val symbolTabIdx = AuxDecoder.decodeAux(AuxDecoder.AuxKind.ElfSymbolTabIdxInfo)(mod) val tabidx = symbolTabIdx - .flatMap { case (sym, idxs) => - idxs.map(_ -> sym) + .flatMap { + case (sym, idxs) => + idxs.map(_ -> sym) } .groupMapReduce(kv => kv.head.head)(kv => SortedMap(kv.head.last -> kv.last))(_ ++ _) // println(tabidx) @@ -93,36 +96,40 @@ object GTIRBReadELF { import scala.math.Ordering.Implicits.seqOrdering val allSymbols = symbolKinds - .map { case (k, pos) => - val sym = symbolsByUuid(k) - val addr = for { - uuid <- sym.optionalPayload.referentUuid - (_, block: Block, ival: ByteInterval, _) <- dataBlocksByUuid.get(uuid).orElse(codeBlocksByUuid.get(uuid)) - } yield (block.offset + ival.address) - val value = sym.optionalPayload._value.fold("")("val=" + _.toString) - (symbolTabIdx(k), addr, pos) -> s"${sym.name} $value" + .map { + case (k, pos) => + val sym = symbolsByUuid(k) + val addr = for { + uuid <- sym.optionalPayload.referentUuid + (_, block: Block, ival: ByteInterval, _) <- dataBlocksByUuid.get(uuid).orElse(codeBlocksByUuid.get(uuid)) + } yield (block.offset + ival.address) + val value = sym.optionalPayload._value.fold("")("val=" + _.toString) + (symbolTabIdx(k), addr, pos) -> s"${sym.name} $value" } .to(SortedMap) println(allSymbols.mkString("\n")) println() println(".rela.dyn") - relaDyns.foreach { case x => - val symuuid = tabidx(".dynsym")(x.r_sym.toInt) - println(s"$x " + symbolsByUuid.get(symuuid).map(_.name).filter(_.nonEmpty)) + relaDyns.foreach { + case x => + val symuuid = tabidx(".dynsym")(x.r_sym.toInt) + println(s"$x " + symbolsByUuid.get(symuuid).map(_.name).filter(_.nonEmpty)) } println(".rela.plt") - relaPlts.foreach { case x => - val symuuid = tabidx(".dynsym")(x.r_sym.toInt) - println(s"$x " + symbolsByUuid.get(symuuid).map(_.name).filter(_.nonEmpty)) + relaPlts.foreach { + case x => + val symuuid = tabidx(".dynsym")(x.r_sym.toInt) + println(s"$x " + symbolsByUuid.get(symuuid).map(_.name).filter(_.nonEmpty)) } - val specGlobals = symbolKinds.toList.collect { case (uuid, (size, "OBJECT", "GLOBAL", "DEFAULT", idx)) => - val sym = symbolsByUuid(uuid) - val (data, block, interval, sec) = dataBlocksByUuid(sym.optionalPayload.referentUuid.get) - // assert(size == data.size) - assert(mod.sections(idx.toInt - 1) == sec) - (sym.name, size * 8, None, interval.address + block.offset) + val specGlobals = symbolKinds.toList.collect { + case (uuid, (size, "OBJECT", "GLOBAL", "DEFAULT", idx)) => + val sym = symbolsByUuid(uuid) + val (data, block, interval, sec) = dataBlocksByUuid(sym.optionalPayload.referentUuid.get) + // assert(size == data.size) + assert(mod.sections(idx.toInt - 1) == sec) + (sym.name, size * 8, None, interval.address + block.offset) } println(specGlobals) diff --git a/src/main/scala/translating/GTIRBToIR.scala b/src/main/scala/translating/GTIRBToIR.scala index c690f3a2db..a16ad0c0c9 100644 --- a/src/main/scala/translating/GTIRBToIR.scala +++ b/src/main/scala/translating/GTIRBToIR.scala @@ -382,10 +382,11 @@ class GTIRBToIR( procedure.entryBlock = block } - block.address.foreach { case addr => - val pcCorrectExpr = BinaryExpr(EQ, Register("_PC", 64), BitVecLiteral(addr, 64)) - val assertPC = Assert(pcCorrectExpr, Some("pc-tracking"), Some("pc-tracking")) - block.statements.append(assertPC) + block.address.foreach { + case addr => + val pcCorrectExpr = BinaryExpr(EQ, Register("_PC", 64), BitVecLiteral(addr, 64)) + val assertPC = Assert(pcCorrectExpr, Some("pc-tracking"), Some("pc-tracking")) + block.statements.append(assertPC) } block } diff --git a/src/main/scala/translating/IRToBoogie.scala b/src/main/scala/translating/IRToBoogie.scala index 0fd734808f..127a926787 100644 --- a/src/main/scala/translating/IRToBoogie.scala +++ b/src/main/scala/translating/IRToBoogie.scala @@ -640,14 +640,16 @@ class IRToBoogie( private def translateAtomicStart(a: AtomicSection): List[BCmd] = { val sharedLoads = a.getBlocks.flatMap { b => - b.statements.collect { case load @ MemoryLoad(_, _: SharedMemory, _, _, _, _) => - load + b.statements.collect { + case load @ MemoryLoad(_, _: SharedMemory, _, _, _, _) => + load } } val sharedStores = a.getBlocks.flatMap { b => - b.statements.collect { case store @ MemoryStore(_: SharedMemory, _, _, _, _, _) => - store + b.statements.collect { + case store @ MemoryStore(_: SharedMemory, _, _, _, _, _) => + store } } @@ -669,8 +671,9 @@ class IRToBoogie( private def translateAtomicEnd(a: AtomicSection): List[BCmd] = { val sharedStores = a.getBlocks.flatMap { b => - b.statements.collect { case store @ MemoryStore(_: SharedMemory, _, _, _, _, _) => - store + b.statements.collect { + case store @ MemoryStore(_: SharedMemory, _, _, _, _, _) => + store } } diff --git a/src/main/scala/translating/IRToBoogieNoVC.scala b/src/main/scala/translating/IRToBoogieNoVC.scala index 6212f8910a..aa33bb9bcc 100644 --- a/src/main/scala/translating/IRToBoogieNoVC.scala +++ b/src/main/scala/translating/IRToBoogieNoVC.scala @@ -188,11 +188,13 @@ class FindVars extends CILVisitor { SkipChildren() } - def globals = (vars ++ mems).collect { case g: Global => - g + def globals = (vars ++ mems).collect { + case g: Global => + g } - def locals = vars.collect { case v: LocalVar => - v + def locals = vars.collect { + case v: LocalVar => + v } } diff --git a/src/main/scala/translating/SpecificationLoader.scala b/src/main/scala/translating/SpecificationLoader.scala index 63fbb8f96b..28c1253789 100644 --- a/src/main/scala/translating/SpecificationLoader.scala +++ b/src/main/scala/translating/SpecificationLoader.scala @@ -389,8 +389,9 @@ case class SpecificationLoader(symbols: Set[SpecGlobal], program: Program) { r } - val requires = ctx.requires.asScala.collect { case r: ParsedRequiresContext => - visitExpr(r.expr, nameToGlobals, params) + val requires = ctx.requires.asScala.collect { + case r: ParsedRequiresContext => + visitExpr(r.expr, nameToGlobals, params) }.toList val modifies = Option(ctx.modifies) match { @@ -398,16 +399,19 @@ case class SpecificationLoader(symbols: Set[SpecGlobal], program: Program) { case None => List() } - val ensures = ctx.ensures.asScala.collect { case e: ParsedEnsuresContext => - visitExpr(e.expr, nameToGlobals, params) + val ensures = ctx.ensures.asScala.collect { + case e: ParsedEnsuresContext => + visitExpr(e.expr, nameToGlobals, params) }.toList - val requiresDirect = ctx.requires.asScala.collect { case r: DirectRequiresContext => - r.QUOTESTRING.getText.stripPrefix("\"").stripSuffix("\"") + val requiresDirect = ctx.requires.asScala.collect { + case r: DirectRequiresContext => + r.QUOTESTRING.getText.stripPrefix("\"").stripSuffix("\"") }.toList - val ensuresDirect = ctx.ensures.asScala.collect { case r: DirectEnsuresContext => - r.QUOTESTRING.getText.stripPrefix("\"").stripSuffix("\"") + val ensuresDirect = ctx.ensures.asScala.collect { + case r: DirectEnsuresContext => + r.QUOTESTRING.getText.stripPrefix("\"").stripSuffix("\"") }.toList val rely = Option(ctx.relies) match { diff --git a/src/main/scala/util/RunUtils.scala b/src/main/scala/util/RunUtils.scala index fd8210d311..e6221d3d22 100644 --- a/src/main/scala/util/RunUtils.scala +++ b/src/main/scala/util/RunUtils.scala @@ -654,8 +654,9 @@ object StaticAnalysis { toVisit.pushAll( IntraProcBlockIRCursor .succ(next) - .diff(visited.collect[Block] { case b: Block => - b + .diff(visited.collect[Block] { + case b: Block => + b }) ) @@ -760,8 +761,9 @@ object RunUtils { File(s"${s}_blockgraph-after-dsa.dot"), dotBlockGraph( program, - (program.collect { case b: Block => - b -> pp_block(b) + (program.collect { + case b: Block => + b -> pp_block(b) }).toMap ) ) diff --git a/src/main/scala/util/Twine.scala b/src/main/scala/util/Twine.scala index 03d5d30f78..f1ea0f1b8f 100644 --- a/src/main/scala/util/Twine.scala +++ b/src/main/scala/util/Twine.scala @@ -64,17 +64,18 @@ sealed trait Twine { // furthermore, newline and indent should only be inserted upon // reaching a non-empty literal string. multiple Lines nodes placed within // each other should not insert additional newlines or indentation. - lines.foreach { case l => - val first = firstInLine.once() - if (!first) - doNewline = Once() - helper(l, ind) - - // if no newline was introduced by this list element, we should manually - // add one if needed. this allows blank lines to be produced by placing - // Twine.empty within Lines. - if (!first && doNewline.once()) - sb ++= newline + lines.foreach { + case l => + val first = firstInLine.once() + if (!first) + doNewline = Once() + helper(l, ind) + + // if no newline was introduced by this list element, we should manually + // add one if needed. this allows blank lines to be produced by placing + // Twine.empty within Lines. + if (!first && doNewline.once()) + sb ++= newline } case Concat(tws) => tws.foreach(helper(_, ind)) } diff --git a/src/main/scala/util/functional/List.scala b/src/main/scala/util/functional/List.scala index f025984566..a25b6a4089 100644 --- a/src/main/scala/util/functional/List.scala +++ b/src/main/scala/util/functional/List.scala @@ -51,8 +51,9 @@ def sequence[DD[V] <: IterableOps[V, DD, DD[V]], CC[U] <: IterableOps[U, CC, CC[ def cc(x: T): CC[T] = xs.iterableFactory.newBuilder.addOne(x).result val base: DD[CC[T]] = dd(cc0()) - xs.foldRight(base) { case (ys, rest) => - ys.flatMap((y: T) => rest.map((r: CC[T]) => cc(y) ++ r)) + xs.foldRight(base) { + case (ys, rest) => + ys.flatMap((y: T) => rest.map((r: CC[T]) => cc(y) ++ r)) } } diff --git a/src/test/scala/InterpretTestConstProp.scala b/src/test/scala/InterpretTestConstProp.scala index 0080f75dc1..b05fb628d0 100644 --- a/src/test/scala/InterpretTestConstProp.scala +++ b/src/test/scala/InterpretTestConstProp.scala @@ -59,8 +59,9 @@ class InterpretTestConstProp ir.transforms.clearParams(ictx.program) val analyses = RunUtils.staticAnalysis(StaticAnalysisConfig(None, None, None), ictx) - val analysisres = analyses.intraProcConstProp.collect { case (block: Block, v) => - block -> v + val analysisres = analyses.intraProcConstProp.collect { + case (block: Block, v) => + block -> v } val result = runTestInterpreter(ictx, analysisres) diff --git a/src/test/scala/LiveVarsAnalysisTests.scala b/src/test/scala/LiveVarsAnalysisTests.scala index d5e120af75..ac1cb3e759 100644 --- a/src/test/scala/LiveVarsAnalysisTests.scala +++ b/src/test/scala/LiveVarsAnalysisTests.scala @@ -269,8 +269,9 @@ class LiveVarsAnalysisTests extends AnyFunSuite, CaptureOutput, BASILTest { info("bean1") info( analysisResults.keySet - .collect { case b: Block => - b.label + .collect { + case b: Block => + b.label } .mkString("; ") ) diff --git a/src/test/scala/ir/CILVisitorTest.scala b/src/test/scala/ir/CILVisitorTest.scala index cfc2c35acd..1051b57b86 100644 --- a/src/test/scala/ir/CILVisitorTest.scala +++ b/src/test/scala/ir/CILVisitorTest.scala @@ -19,8 +19,9 @@ class FindVars extends CILVisitor { SkipChildren() } - def globals = vars.collect { case g: Global => - g + def globals = vars.collect { + case g: Global => + g } } diff --git a/src/test/scala/ir/IRTest.scala b/src/test/scala/ir/IRTest.scala index cf7941a7e0..80b4082d32 100644 --- a/src/test/scala/ir/IRTest.scala +++ b/src/test/scala/ir/IRTest.scala @@ -84,8 +84,9 @@ class IRTest extends AnyFunSuite with CaptureOutput { val blocks = p.labelToBlock - val directcalls = p.collect { case c: DirectCall => - c + val directcalls = p.collect { + case c: DirectCall => + c } assert(p.toSet.contains(blocks("l_main_1").jump)) @@ -313,8 +314,9 @@ class IRTest extends AnyFunSuite with CaptureOutput { ) ) - val blockOrder = p.mainProcedure.preOrderIterator.collect { case b: Block => - b.label + val blockOrder = p.mainProcedure.preOrderIterator.collect { + case b: Block => + b.label }.toList // assert(blockOrder == List("lmain", "lmain1", "lmainret", "lmain3")) diff --git a/src/test/scala/ir/IRToDSLTest.scala b/src/test/scala/ir/IRToDSLTest.scala index d9da3142fa..dfb6e89f90 100644 --- a/src/test/scala/ir/IRToDSLTest.scala +++ b/src/test/scala/ir/IRToDSLTest.scala @@ -130,8 +130,9 @@ class IRToDSLTest extends AnyFunSuite with CaptureOutput { // for each procedure, check that the conversion is correct, // i.e., is structurally equal to the original dsl procedure - (dslprog.allProcedures zip irprog.procedures).foreach { case (dslproc, proc) => - assertDeepEquality(dslproc) { IRToDSL.convertProcedure(proc) } + (dslprog.allProcedures zip irprog.procedures).foreach { + case (dslproc, proc) => + assertDeepEquality(dslproc) { IRToDSL.convertProcedure(proc) } } } diff --git a/src/test/scala/test_util/TestValueDomainWithInterpreter.scala b/src/test/scala/test_util/TestValueDomainWithInterpreter.scala index fd8cd60857..7095565d88 100644 --- a/src/test/scala/test_util/TestValueDomainWithInterpreter.scala +++ b/src/test/scala/test_util/TestValueDomainWithInterpreter.scala @@ -130,10 +130,12 @@ trait TestValueDomainWithInterpreter[T] { val interpretResult = State.execute(initState, InterpFuns.callProcedure(interp)(startProc, startParams)) val breakres: List[(BreakPoint, _, List[(String, Expr, Option[Expr])])] = interpretResult(1) - val checkResults = breakres.flatMap { case (bp, _, evaledExprs) => - evaledExprs.grouped(2).map(_.toList).map { case List((_, variable, varValue), (name, test, evaled)) => - CheckResult(name, bp, test, variable, varValue, evaled) - } + val checkResults = breakres.flatMap { + case (bp, _, evaledExprs) => + evaledExprs.grouped(2).map(_.toList).map { + case List((_, variable, varValue), (name, test, evaled)) => + CheckResult(name, bp, test, variable, varValue, evaled) + } }.toList InterpreterTestResult(interpretResult(0).nextCmd, checkResults) From 91d84c0b33ff0c0c95dba17c76498deee101974c Mon Sep 17 00:00:00 2001 From: rina Date: Sun, 22 Jun 2025 01:34:43 +1000 Subject: [PATCH 20/51] macros lol --- build.mill | 2 +- src/main/scala/gtirb/AuxDecoder.scala | 56 ++++++--------------------- 2 files changed, 12 insertions(+), 46 deletions(-) diff --git a/build.mill b/build.mill index 0ded7486f6..b9c099a7dc 100644 --- a/build.mill +++ b/build.mill @@ -23,7 +23,7 @@ object `package` extends RootModule with ScalaModule { def moduleDeps = Seq(basilAntlr, basilProto, bnfc) - def scalacOptions: T[Seq[String]] = Seq("-deprecation") + def scalacOptions: T[Seq[String]] = Seq("-deprecation", "-Xprint:typer") val javaTests = ivy"com.novocode:junit-interface:0.11" val scalaTests = ivy"org.scalatest::scalatest:3.2.19" diff --git a/src/main/scala/gtirb/AuxDecoder.scala b/src/main/scala/gtirb/AuxDecoder.scala index 2d1a52bbab..dcb882cba8 100644 --- a/src/main/scala/gtirb/AuxDecoder.scala +++ b/src/main/scala/gtirb/AuxDecoder.scala @@ -12,11 +12,11 @@ object AuxDecoder { enum AuxKind[T](val name: String, val decoder: Reader[T]) { case ElfSymbolTabIdxInfo - extends AuxKind("elfSymbolTabIdxInfo", readMap(readUuid, readList(readTuple(readString, readUint(64))))) + extends AuxKind("elfSymbolTabIdxInfo", readMap(readUuid, readList(readTuple((readString, readUint(64)))))) case ElfSymbolInfo extends AuxKind( "elfSymbolInfo", - readMap(readUuid, readTuple(readUint(64), readString, readString, readString, readUint(64))) + readMap(readUuid, readTuple((readUint(64), readString, readString, readString, readUint(64)))) ) case FunctionEntries extends AuxKind("functionEntries", readMap(readUuid, readSet(readUuid))) case FunctionBlocks extends AuxKind("functionBlocks", readMap(readUuid, readSet(readUuid))) @@ -77,49 +77,15 @@ object AuxDecoder { val len = readUint(64)(bs) (BigInt(0) until len).map(_ => valReader(bs)).toList - def readTuple[T1, T2](r1: Reader[T1], r2: Reader[T2])(bs: Input) = - val x1 = r1(bs) - val x2 = r2(bs) - (x1, x2) - - def readTuple[T1, T2, T3](r1: Reader[T1], r2: Reader[T2], r3: Reader[T3])(bs: Input) = - val x1 = r1(bs) - val x2 = r2(bs) - val x3 = r3(bs) - (x1, x2, x3) - - def readTuple[T1, T2, T3, T4](r1: Reader[T1], r2: Reader[T2], r3: Reader[T3], r4: Reader[T4])(bs: Input) = - val x1 = r1(bs) - val x2 = r2(bs) - val x3 = r3(bs) - val x4 = r4(bs) - (x1, x2, x3, x4) - - def readTuple[T1, T2, T3, T4, T5](r1: Reader[T1], r2: Reader[T2], r3: Reader[T3], r4: Reader[T4], r5: Reader[T5])( - bs: Input - ) = - val x1 = r1(bs) - val x2 = r2(bs) - val x3 = r3(bs) - val x4 = r4(bs) - val x5 = r5(bs) - (x1, x2, x3, x4, x5) - - def readTuple[T1, T2, T3, T4, T5, T6]( - r1: Reader[T1], - r2: Reader[T2], - r3: Reader[T3], - r4: Reader[T4], - r5: Reader[T5], - r6: Reader[T6] - )(bs: Input) = - val x1 = r1(bs) - val x2 = r2(bs) - val x3 = r3(bs) - val x4 = r4(bs) - val x5 = r5(bs) - val x6 = r6(bs) - (x1, x2, x3, x4, x5, x6) + type Return[x] = x match { case Reader[t] => t } + + inline def readTuple[T <: Tuple](xs: T)(bs: Input): Tuple.Map[T, Return] = + readTupleInner(xs)(bs).asInstanceOf[Tuple.Map[T, Return]] + + private inline def readTupleInner[T <: Tuple](xs: T)(bs: Input): Tuple = + inline xs match + case xs: (Reader[_] *: _) => xs.head(bs) *: readTupleInner(xs.tail)(bs) + case EmptyTuple => EmptyTuple def readUuid(bs: Input) = ByteString.copyFrom(readBytes(16)(bs)) From 5f86644102386cdb85aea0bfbf6a27a677d085e3 Mon Sep 17 00:00:00 2001 From: rina Date: Sun, 22 Jun 2025 01:42:32 +1000 Subject: [PATCH 21/51] auto tupling?? --- build.mill | 2 +- src/main/scala/gtirb/AuxDecoder.scala | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/build.mill b/build.mill index b9c099a7dc..0ded7486f6 100644 --- a/build.mill +++ b/build.mill @@ -23,7 +23,7 @@ object `package` extends RootModule with ScalaModule { def moduleDeps = Seq(basilAntlr, basilProto, bnfc) - def scalacOptions: T[Seq[String]] = Seq("-deprecation", "-Xprint:typer") + def scalacOptions: T[Seq[String]] = Seq("-deprecation") val javaTests = ivy"com.novocode:junit-interface:0.11" val scalaTests = ivy"org.scalatest::scalatest:3.2.19" diff --git a/src/main/scala/gtirb/AuxDecoder.scala b/src/main/scala/gtirb/AuxDecoder.scala index dcb882cba8..b5f934cee3 100644 --- a/src/main/scala/gtirb/AuxDecoder.scala +++ b/src/main/scala/gtirb/AuxDecoder.scala @@ -12,11 +12,11 @@ object AuxDecoder { enum AuxKind[T](val name: String, val decoder: Reader[T]) { case ElfSymbolTabIdxInfo - extends AuxKind("elfSymbolTabIdxInfo", readMap(readUuid, readList(readTuple((readString, readUint(64)))))) + extends AuxKind("elfSymbolTabIdxInfo", readMap(readUuid, readList(readTuple(readString, readUint(64))))) case ElfSymbolInfo extends AuxKind( "elfSymbolInfo", - readMap(readUuid, readTuple((readUint(64), readString, readString, readString, readUint(64)))) + readMap(readUuid, readTuple(readUint(64), readString, readString, readString, readUint(64))) ) case FunctionEntries extends AuxKind("functionEntries", readMap(readUuid, readSet(readUuid))) case FunctionBlocks extends AuxKind("functionBlocks", readMap(readUuid, readSet(readUuid))) From e6e38a50d17e1c5520642ce0007b7213074694f2 Mon Sep 17 00:00:00 2001 From: rina Date: Sun, 22 Jun 2025 11:50:52 +1000 Subject: [PATCH 22/51] ReadTuple --- src/main/scala/gtirb/AuxDecoder.scala | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/main/scala/gtirb/AuxDecoder.scala b/src/main/scala/gtirb/AuxDecoder.scala index b5f934cee3..6113dd5f02 100644 --- a/src/main/scala/gtirb/AuxDecoder.scala +++ b/src/main/scala/gtirb/AuxDecoder.scala @@ -77,15 +77,16 @@ object AuxDecoder { val len = readUint(64)(bs) (BigInt(0) until len).map(_ => valReader(bs)).toList - type Return[x] = x match { case Reader[t] => t } + type ReadTuple[T <: Tuple] <: Tuple = T match + case Reader[out] *: rest => out *: ReadTuple[rest] + case EmptyTuple => EmptyTuple - inline def readTuple[T <: Tuple](xs: T)(bs: Input): Tuple.Map[T, Return] = - readTupleInner(xs)(bs).asInstanceOf[Tuple.Map[T, Return]] - - private inline def readTupleInner[T <: Tuple](xs: T)(bs: Input): Tuple = + inline def readTuple[T <: Tuple](xs: T)(bs: Input): ReadTuple[T] = inline xs match - case xs: (Reader[_] *: _) => xs.head(bs) *: readTupleInner(xs.tail)(bs) - case EmptyTuple => EmptyTuple + case xs: (Reader[o] *: rest) => + xs match + case h *: t => h(bs) *: readTuple[rest](t)(bs) + case _: EmptyTuple => EmptyTuple def readUuid(bs: Input) = ByteString.copyFrom(readBytes(16)(bs)) From 14468635ae4eba1c07bf4a91382c4da455b0f4ae Mon Sep 17 00:00:00 2001 From: rina Date: Sun, 22 Jun 2025 11:54:42 +1000 Subject: [PATCH 23/51] move --- src/main/scala/Main.scala | 2 +- src/main/scala/{translating => gtirb}/GTIRBReadELF.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) rename src/main/scala/{translating => gtirb}/GTIRBReadELF.scala (99%) diff --git a/src/main/scala/Main.scala b/src/main/scala/Main.scala index 7ca86df1e8..e323e5e596 100644 --- a/src/main/scala/Main.scala +++ b/src/main/scala/Main.scala @@ -339,7 +339,7 @@ object Main { val fIn = java.io.FileInputStream(loadingInputs.inputFile) val ir = com.grammatech.gtirb.proto.IR.IR.parseFrom(fIn) - println(ir.modules.map(translating.GTIRBReadELF.getExternalFunctions)) + println(ir.modules.map(gtirb.GTIRBReadELF.getExternalFunctions)) return } diff --git a/src/main/scala/translating/GTIRBReadELF.scala b/src/main/scala/gtirb/GTIRBReadELF.scala similarity index 99% rename from src/main/scala/translating/GTIRBReadELF.scala rename to src/main/scala/gtirb/GTIRBReadELF.scala index 67c13c0838..1640a6ca9d 100644 --- a/src/main/scala/translating/GTIRBReadELF.scala +++ b/src/main/scala/gtirb/GTIRBReadELF.scala @@ -1,4 +1,4 @@ -package translating +package gtirb import gtirb.AuxDecoder import gtirb.AuxDecoder.{AuxKind, decodeAux} From 018da5fe34ceac31e017d1978ec619b15a1bd4f9 Mon Sep 17 00:00:00 2001 From: rina Date: Sun, 22 Jun 2025 12:58:39 +1000 Subject: [PATCH 24/51] resolver --- src/main/scala/Main.scala | 3 +- src/main/scala/gtirb/GTIRBReadELF.scala | 91 +++++------------- src/main/scala/gtirb/GTIRBResolver.scala | 112 +++++++++++++++++++++++ 3 files changed, 139 insertions(+), 67 deletions(-) create mode 100644 src/main/scala/gtirb/GTIRBResolver.scala diff --git a/src/main/scala/Main.scala b/src/main/scala/Main.scala index e323e5e596..669d2d9429 100644 --- a/src/main/scala/Main.scala +++ b/src/main/scala/Main.scala @@ -339,7 +339,8 @@ object Main { val fIn = java.io.FileInputStream(loadingInputs.inputFile) val ir = com.grammatech.gtirb.proto.IR.IR.parseFrom(fIn) - println(ir.modules.map(gtirb.GTIRBReadELF.getExternalFunctions)) + println(ir.modules.map(x => + gtirb.GTIRBReadELF.getExternalFunctions(gtirb.GTIRBResolver(x)))) return } diff --git a/src/main/scala/gtirb/GTIRBReadELF.scala b/src/main/scala/gtirb/GTIRBReadELF.scala index 1640a6ca9d..e9a29fade6 100644 --- a/src/main/scala/gtirb/GTIRBReadELF.scala +++ b/src/main/scala/gtirb/GTIRBReadELF.scala @@ -54,57 +54,22 @@ object GTIRBReadELF { // // https://www.man7.org/linux/man-pages/man5/elf.5.html - def getExternalFunctions(mod: Module) = { - - val proxyBlockUuids = mod.proxies.map(_.uuid).toSet - val externalFunctionSymbols = mod.symbols.filter(x => proxyBlockUuids.contains(x.getReferentUuid)) - val symbolsByUuid = mod.symbols.map(x => x.uuid -> x).toMap - - val dataBlocksByUuid = (for { - sec <- mod.sections.toList - interval <- sec.byteIntervals - (b, innerb) <- interval.blocks.collect { - case b @ Block(_, Block.Value.Data(dat), _) => - (b, dat) - // case b @ Block(_, Block.Value.Code(cod), _) => (b, cod) - } - } yield innerb.uuid -> (innerb, b, interval, sec)).toMap - - val codeBlocksByUuid = (for { - sec <- mod.sections.toList - interval <- sec.byteIntervals - (b, innerb) <- interval.blocks.collect { - case b @ Block(_, Block.Value.Code(dat), _) => - (b, dat) - } - } yield innerb.uuid -> (innerb, b, interval, sec)).toMap + def getExternalFunctions(gtirb: GTIRBResolver) = { - val sectionsByName = mod.sections.map(x => x.name -> x).toMap - val relaDyns = parseRelaTab(sectionsByName(".rela.dyn").byteIntervals.head.contents) - val relaPlts = parseRelaTab(sectionsByName(".rela.plt").byteIntervals.head.contents) + val mod = gtirb.mod - val symbolTabIdx = AuxDecoder.decodeAux(AuxDecoder.AuxKind.ElfSymbolTabIdxInfo)(mod) - val tabidx = symbolTabIdx - .flatMap { - case (sym, idxs) => - idxs.map(_ -> sym) - } - .groupMapReduce(kv => kv.head.head)(kv => SortedMap(kv.head.last -> kv.last))(_ ++ _) - // println(tabidx) - - val symbolKinds = decodeAux(AuxKind.ElfSymbolInfo)(mod) + val relaDyns = parseRelaTab(gtirb.sectionsByName(".rela.dyn").byteIntervals.head.contents) + val relaPlts = parseRelaTab(gtirb.sectionsByName(".rela.plt").byteIntervals.head.contents) import scala.math.Ordering.Implicits.seqOrdering - val allSymbols = symbolKinds + val allSymbols = gtirb.symbolKindsByUuid .map { case (k, pos) => - val sym = symbolsByUuid(k) - val addr = for { - uuid <- sym.optionalPayload.referentUuid - (_, block: Block, ival: ByteInterval, _) <- dataBlocksByUuid.get(uuid).orElse(codeBlocksByUuid.get(uuid)) - } yield (block.offset + ival.address) - val value = sym.optionalPayload._value.fold("")("val=" + _.toString) - (symbolTabIdx(k), addr, pos) -> s"${sym.name} $value" + val sym = k.get + println(k) + val addr = k.getReferentBlock.map(_.address) + val value = k.getScalarValue.fold("")("val=" + _.toString) + (k.symTabIdx, addr, pos) -> s"${sym.name} $value" } .to(SortedMap) println(allSymbols.mkString("\n")) @@ -113,41 +78,35 @@ object GTIRBReadELF { println(".rela.dyn") relaDyns.foreach { case x => - val symuuid = tabidx(".dynsym")(x.r_sym.toInt) - println(s"$x " + symbolsByUuid.get(symuuid).map(_.name).filter(_.nonEmpty)) + val symid = gtirb.symbolTables(".dynsym")(x.r_sym.toInt) + println(s"$x " + symid.get.name) } println(".rela.plt") relaPlts.foreach { case x => - val symuuid = tabidx(".dynsym")(x.r_sym.toInt) - println(s"$x " + symbolsByUuid.get(symuuid).map(_.name).filter(_.nonEmpty)) + val symid = gtirb.symbolTables(".dynsym")(x.r_sym.toInt) + println(s"$x " + symid.get.name) } - val specGlobals = symbolKinds.toList.collect { - case (uuid, (size, "OBJECT", "GLOBAL", "DEFAULT", idx)) => - val sym = symbolsByUuid(uuid) - val (data, block, interval, sec) = dataBlocksByUuid(sym.optionalPayload.referentUuid.get) - // assert(size == data.size) + val specGlobals = gtirb.symbolKindsByUuid.toList.collect { + case (symid, (size, "OBJECT", "GLOBAL", "DEFAULT", idx)) => + val blk = symid.getReferentBlock.get + val sec = blk.section assert(mod.sections(idx.toInt - 1) == sec) - (sym.name, size * 8, None, interval.address + block.offset) + (symid.get.name, blk.size * 8, None, blk.address) } println(specGlobals) - val funcNames = decodeAux(AuxKind.FunctionNames)(mod) - val funcNamesInverse = funcNames.map(_.swap) - - val funcEntries = decodeAux(AuxKind.FunctionEntries)(mod) - val funentry = symbolKinds.toList.collect { - case (symuuid, (size, "FUNC", "GLOBAL", "DEFAULT", idx)) if idx != 0 => + val funentry = gtirb.symbolKindsByUuid.toList.collect { + case (symid, (size, "FUNC", "GLOBAL", "DEFAULT", idx)) if idx != 0 => - val nameSymbol = symbolsByUuid(symuuid) - val funcUuid = funcNamesInverse(symuuid) - val entries = funcEntries(funcUuid) + val nameSymbol = symid.get + val funcUuid = symid.getFunction.get + val entries = funcUuid.getEntries assert(entries.size == 1, "function with non-singular entry") val entry = entries.head - val (_, bl, ival, _) = codeBlocksByUuid(entry) - val addr = bl.offset + ival.address + val addr = entry.get.address (nameSymbol.name, size * 8, addr) } diff --git a/src/main/scala/gtirb/GTIRBResolver.scala b/src/main/scala/gtirb/GTIRBResolver.scala new file mode 100644 index 0000000000..bd101befe1 --- /dev/null +++ b/src/main/scala/gtirb/GTIRBResolver.scala @@ -0,0 +1,112 @@ +package gtirb + +import gtirb.AuxDecoder +import gtirb.AuxDecoder.{AuxKind, decodeAux} + +import java.io.ByteArrayInputStream + +import com.google.protobuf.ByteString +import com.grammatech.gtirb.proto.CFG.EdgeType.* +import com.grammatech.gtirb.proto.CFG.CFG +import com.grammatech.gtirb.proto.CFG.Edge +import com.grammatech.gtirb.proto.CFG.EdgeLabel +import com.grammatech.gtirb.proto.Module.Module +import com.grammatech.gtirb.proto.Symbol.Symbol +import com.grammatech.gtirb.proto.Section.Section +import com.grammatech.gtirb.proto.ByteInterval.Block +import com.grammatech.gtirb.proto.CodeBlock.CodeBlock +import com.grammatech.gtirb.proto.DataBlock.DataBlock +import com.grammatech.gtirb.proto.ByteInterval.ByteInterval +import com.grammatech.gtirb.proto.Symbol.Symbol.OptionalPayload + +import scala.collection.mutable +import scala.collection.immutable.SortedMap + +case class GTIRBResolver(mod: Module) { + + private def b64(bs: ByteString) = + java.util.Base64.getEncoder().encodeToString(bs.toByteArray) + + sealed trait Uuid(val kind: String, val uuid: String) { + override def toString = s"$kind:$uuid" + override def equals(o: Any) = o match { + case x: Uuid => x.kind == kind && x.uuid == uuid + case _ => false + } + override def hashCode = (kind, uuid).hashCode + } + object Uuid { + class Block(xs: ByteString) extends Uuid("blok", b64(xs)) + class Function(xs: ByteString) extends Uuid("func", b64(xs)) + class Symbol(xs: ByteString) extends Uuid("symb", b64(xs)) + } + + case class BlockData(inner: DataBlock | CodeBlock, block: Block, interval: ByteInterval, section: Section) { + def uuid = inner match { + case x: DataBlock => x.uuid + case x: CodeBlock => x.uuid + } + def size = inner match { + case x: DataBlock => x.size + case x: CodeBlock => x.size + } + def address = block.offset + interval.address + } + + extension (x: Uuid.Block) + def get = blocksByUuid(x) + def isProxyBlock = proxyBlockUuids.contains(x) + + extension (x: Uuid.Symbol) + def get = symbolsByUuid(x) + def symTabIdx = symbolTabIdxByUuid(x) + def symKind = symbolKindsByUuid(x) + def getReferentBlock = for { + uuid <- x.get.optionalPayload.referentUuid + blok <- blocksByUuid.get(Uuid.Block(uuid)) + } yield blok + def getScalarValue = x.get.optionalPayload._value + def getFunction = funcNamesInverse.get(x) + + extension (x: Uuid.Function) + def getEntries = funcEntries(x) + def getName = funcNames(x) + + private def mapFirst[T,T2,U](f: T => T2)(x: (T, U)) = (f(x._1), x._2) + + val proxyBlockUuids = mod.proxies.map(x => Uuid.Block(x.uuid)).toSet + val symbolsByUuid = mod.symbols.map(x => Uuid.Symbol(x.uuid) -> x).toMap + + val blocksByUuid = (for { + sec <- mod.sections.toList + interval <- sec.byteIntervals + (uuid, innerb, outerb) <- interval.blocks.collect { + case b @ Block(_, Block.Value.Data(dat), _) => (dat.uuid, (dat : DataBlock | CodeBlock), b) + case b @ Block(_, Block.Value.Code(cod), _) => (cod.uuid, (cod : DataBlock | CodeBlock), b) + } + id: Uuid.Block = Uuid.Block(uuid) + } yield id -> BlockData(innerb, outerb, interval, sec)).toMap + + val sectionsByName = mod.sections.map(x => x.name -> x).toMap + + val symbolTabIdxByUuid = AuxDecoder.decodeAux(AuxDecoder.AuxKind.ElfSymbolTabIdxInfo)(mod).map(mapFirst(Uuid.Symbol(_))) + val symbolTables = symbolTabIdxByUuid + .flatMap { + case (sym, idxs) => + idxs.map(_ -> sym) + } + .groupMapReduce(kv => kv.head.head)(kv => SortedMap(kv.head.last -> kv.last))(_ ++ _) + + val symbolKindsByUuid = decodeAux(AuxKind.ElfSymbolInfo)(mod) + .map(mapFirst(Uuid.Symbol(_))) + + val funcNames = decodeAux(AuxKind.FunctionNames)(mod).map { + case (fun, sym) => Uuid.Function(fun) -> Uuid.Symbol(sym) + } + val funcNamesInverse = funcNames.map(_.swap) + val funcEntries = decodeAux(AuxKind.FunctionEntries)(mod).map { + case (a, b) => Uuid.Function(a) -> b.map(Uuid.Block(_)) + } + + +} From 0e38d1bb760a6302b905d8b3eb33c0814349c8d2 Mon Sep 17 00:00:00 2001 From: rina Date: Mon, 23 Jun 2025 15:35:34 +1000 Subject: [PATCH 25/51] Revert "tehe revert me" This reverts commit 3c45043c806e64e153a467dbe719a350df508169. Conflicts: src/main/scala/translating/GTIRBReadELF.scala --- .scalafmt.conf | 2 - src/main/scala/analysis/GammaDomains.scala | 10 +-- src/main/scala/analysis/Lattice.scala | 61 +++++++-------- .../scala/analysis/LatticeCollections.scala | 50 +++++------- .../ReachingDefinitionsAnalysis.scala | 15 ++-- src/main/scala/analysis/RegionInjector.scala | 5 +- .../analysis/VariableDependencyAnalysis.scala | 57 +++++++------- .../DataStructureAnalysis.scala | 32 ++++---- .../data_structure_analysis/Graph.scala | 25 +++--- .../data_structure_analysis/IntervalDSA.scala | 77 +++++++++---------- .../SymbolicValueAnalysis.scala | 5 +- .../scala/analysis/solvers/IDESolver.scala | 34 ++++---- .../analysis/solvers/UnionFindSolver.scala | 7 +- src/main/scala/gtirb/AuxDecoder.scala | 16 ++-- src/main/scala/ir/IRCursor.scala | 29 ++++--- src/main/scala/ir/Program.scala | 20 ++--- src/main/scala/ir/dsl/DSL.scala | 15 ++-- src/main/scala/ir/dsl/ToScala.scala | 5 +- src/main/scala/ir/eval/ExprEval.scala | 5 +- src/main/scala/ir/eval/InterpretBasilIR.scala | 5 +- src/main/scala/ir/eval/Interpreter.scala | 5 +- src/main/scala/ir/invariant/CFGCorrect.scala | 10 +-- .../ir/invariant/SingleCallBlockEnd.scala | 5 +- .../ir/parsing/BasilEarlyBNFCVisitor.scala | 23 +++--- .../ir/parsing/BasilMainBNFCVisitor.scala | 5 +- .../scala/ir/transforms/ExternalRemover.scala | 13 ++-- src/main/scala/ir/transforms/Inline.scala | 15 ++-- src/main/scala/ir/transforms/PCTracking.scala | 5 +- .../ir/transforms/ProcedureParameters.scala | 45 +++++------ src/main/scala/ir/transforms/Simp.scala | 50 +++++------- src/main/scala/translating/GTIRBToIR.scala | 9 +-- src/main/scala/translating/IRToBoogie.scala | 15 ++-- .../scala/translating/IRToBoogieNoVC.scala | 10 +-- .../translating/SpecificationLoader.scala | 20 ++--- src/main/scala/util/RunUtils.scala | 10 +-- src/main/scala/util/Twine.scala | 23 +++--- src/main/scala/util/functional/List.scala | 5 +- src/test/scala/InterpretTestConstProp.scala | 5 +- src/test/scala/LiveVarsAnalysisTests.scala | 5 +- src/test/scala/ir/CILVisitorTest.scala | 5 +- src/test/scala/ir/IRTest.scala | 10 +-- src/test/scala/ir/IRToDSLTest.scala | 5 +- .../TestValueDomainWithInterpreter.scala | 10 +-- 43 files changed, 336 insertions(+), 447 deletions(-) diff --git a/.scalafmt.conf b/.scalafmt.conf index 7914e1c341..196a3419f0 100644 --- a/.scalafmt.conf +++ b/.scalafmt.conf @@ -6,5 +6,3 @@ indent.defnSite = 2 optIn.configStyleArguments = false align.preset = none docstrings.style = keep -newlines.beforeCurlyLambdaParams = multilineWithCaseOnly -newlines.afterCurlyLambdaParams = squash diff --git a/src/main/scala/analysis/GammaDomains.scala b/src/main/scala/analysis/GammaDomains.scala index 30bf753c03..0cd82dfa33 100644 --- a/src/main/scala/analysis/GammaDomains.scala +++ b/src/main/scala/analysis/GammaDomains.scala @@ -166,9 +166,8 @@ class PredicateDomain(summaries: Procedure => ProcedureSummary) extends Predicat case a: Assert => and(b, expectPredicate(a.body)).simplify case i: IndirectCall => top case c: DirectCall => - c.actualParams.foldLeft(Conj(summaries(c.target).requires.map(_.pred).toSet).simplify) { - case (p, (v, e)) => - p.replace(BVTerm.Var(v), exprToBVTerm(e).get).replace(GammaTerm.Var(v), exprToGammaTerm(e).get).simplify + c.actualParams.foldLeft(Conj(summaries(c.target).requires.map(_.pred).toSet).simplify) { case (p, (v, e)) => + p.replace(BVTerm.Var(v), exprToBVTerm(e).get).replace(GammaTerm.Var(v), exprToGammaTerm(e).get).simplify } case g: GoTo => b case r: Return => b @@ -233,9 +232,8 @@ class WpDualDomain(summaries: Procedure => ProcedureSummary) extends PredicateEn case a: Assert => or(b, not(expectPredicate(a.body))).simplify case i: IndirectCall => bot case c: DirectCall => - not(c.actualParams.foldLeft(Conj(summaries(c.target).requires.map(_.pred).toSet).simplify) { - case (p, (v, e)) => - p.replace(BVTerm.Var(v), exprToBVTerm(e).get).replace(GammaTerm.Var(v), exprToGammaTerm(e).get).simplify + not(c.actualParams.foldLeft(Conj(summaries(c.target).requires.map(_.pred).toSet).simplify) { case (p, (v, e)) => + p.replace(BVTerm.Var(v), exprToBVTerm(e).get).replace(GammaTerm.Var(v), exprToGammaTerm(e).get).simplify }) case g: GoTo => b case r: Return => b diff --git a/src/main/scala/analysis/Lattice.scala b/src/main/scala/analysis/Lattice.scala index 1dd3d33e62..a0b03f09b9 100644 --- a/src/main/scala/analysis/Lattice.scala +++ b/src/main/scala/analysis/Lattice.scala @@ -200,9 +200,8 @@ class SASILattice extends Lattice[StridedWrappedInterval] { SIBottom } else { // create singleton intervals for each value and then join them - x.foldLeft(bottom) { - case (acc, v) => - lub(acc, singletonSI(v, w)) + x.foldLeft(bottom) { case (acc, v) => + lub(acc, singletonSI(v, w)) } } } @@ -287,11 +286,10 @@ class ValueSetLattice[T] extends Lattice[ValueSet[T]] { case (VSTop, _) => VSTop case (_, VSTop) => VSTop case (VS(m1), VS(m2)) => - VS(m1.keys.foldLeft(m2) { - case (acc, k) => - val v1 = m1(k) - val v2 = m2(k) - acc + (k -> lattice.lub(v1, v2)) + VS(m1.keys.foldLeft(m2) { case (acc, k) => + val v1 = m1(k) + val v2 = m2(k) + acc + (k -> lattice.lub(v1, v2)) }) } } @@ -381,11 +379,10 @@ class ValueSetLattice[T] extends Lattice[ValueSet[T]] { case (VSTop, _) => VSTop case (_, VSTop) => VSTop case (VS(m1), VS(m2)) => - VS(m1.keys.foldLeft(m2) { - case (acc, k) => - val v1 = m1(k) - val v2 = m2(k) - acc + (k -> lattice.add(v1, v2)) + VS(m1.keys.foldLeft(m2) { case (acc, k) => + val v1 = m1(k) + val v2 = m2(k) + acc + (k -> lattice.add(v1, v2)) }) } } @@ -395,9 +392,8 @@ class ValueSetLattice[T] extends Lattice[ValueSet[T]] { case VSBottom => VSBottom case VSTop => VSTop case VS(m) => - VS(m.map { - case (k, s) => - k -> lattice.add(s, y.value, y.size) // TODO: is the size correct here? + VS(m.map { case (k, s) => + k -> lattice.add(s, y.value, y.size) // TODO: is the size correct here? }) } } @@ -409,11 +405,10 @@ class ValueSetLattice[T] extends Lattice[ValueSet[T]] { case (VSBottom, t) => VSBottom case (t, VSBottom) => t case (VS(m1), VS(m2)) => - VS(m1.keys.foldLeft(m2) { - case (acc, k) => - val v1 = m1(k) - val v2 = m2(k) - acc + (k -> lattice.sub(v1, v2)) + VS(m1.keys.foldLeft(m2) { case (acc, k) => + val v1 = m1(k) + val v2 = m2(k) + acc + (k -> lattice.sub(v1, v2)) }) } } @@ -423,9 +418,8 @@ class ValueSetLattice[T] extends Lattice[ValueSet[T]] { case VSTop => VSTop case VSBottom => VSBottom case VS(m) => - VS(m.map { - case (k, s) => - k -> lattice.sub(s, y.value, y.size) // TODO: is the size correct here? + VS(m.map { case (k, s) => + k -> lattice.sub(s, y.value, y.size) // TODO: is the size correct here? }) } } @@ -451,9 +445,8 @@ class ValueSetLattice[T] extends Lattice[ValueSet[T]] { case VSBottom => VSBottom case VSTop => VSTop case VS(m) => - VS(m.map { - case (k, SI(s, l, u, w)) => - k -> SI(s, lattice.lowestPossibleValue, u, w) + VS(m.map { case (k, SI(s, l, u, w)) => + k -> SI(s, lattice.lowestPossibleValue, u, w) }) } } @@ -463,9 +456,8 @@ class ValueSetLattice[T] extends Lattice[ValueSet[T]] { case VSBottom => VSBottom case VSTop => VSTop case VS(m) => - VS(m.map { - case (k, SI(s, l, u, w)) => - k -> SI(s, l, lattice.highestPossibleValue, w) + VS(m.map { case (k, SI(s, l, u, w)) => + k -> SI(s, l, lattice.highestPossibleValue, w) }) } } @@ -554,11 +546,10 @@ class FlagLattice extends Lattice[Flag] { case (BOTTOM_Flag, t) => t case (t, BOTTOM_Flag) => t case (FlagMap(m1), FlagMap(m2)) => - FlagMap(m1.keys.foldLeft(m2) { - case (acc, k) => - val v1 = m1(k) - val v2 = m2(k) - acc + (k -> lattice.lub(v1, v2)) + FlagMap(m1.keys.foldLeft(m2) { case (acc, k) => + val v1 = m1(k) + val v2 = m2(k) + acc + (k -> lattice.lub(v1, v2)) }) } } diff --git a/src/main/scala/analysis/LatticeCollections.scala b/src/main/scala/analysis/LatticeCollections.scala index be50e90304..3fe7a37949 100644 --- a/src/main/scala/analysis/LatticeCollections.scala +++ b/src/main/scala/analysis/LatticeCollections.scala @@ -225,19 +225,16 @@ private def latticeMapJoin[D, L]( case (Top(), _) => Top() case (Bottom(), b) => b case (TopMap(a), TopMap(b)) => - TopMap(a.foldLeft(b) { - case (m, (k, v)) => - m + (k -> join(m.getOrElse(k, top), v)) + TopMap(a.foldLeft(b) { case (m, (k, v)) => + m + (k -> join(m.getOrElse(k, top), v)) }) case (TopMap(a), BottomMap(b)) => - TopMap(b.foldLeft(a) { - case (m, (k, v)) => - m + (k -> join(m.getOrElse(k, top), v)) + TopMap(b.foldLeft(a) { case (m, (k, v)) => + m + (k -> join(m.getOrElse(k, top), v)) }) case (BottomMap(a), BottomMap(b)) => - BottomMap(a.foldLeft(b) { - case (m, (k, v)) => - m + (k -> join(m.getOrElse(k, bottom), v)) + BottomMap(a.foldLeft(b) { case (m, (k, v)) => + m + (k -> join(m.getOrElse(k, bottom), v)) }) case (a, b) => latticeMapJoin(b, a, join, top, bottom) } @@ -256,19 +253,16 @@ private def latticeMapMeet[D, L]( case (Top(), b) => b case (Bottom(), _) => Bottom() case (TopMap(a), TopMap(b)) => - TopMap(a.foldLeft(b) { - case (m, (k, v)) => - m + (k -> meet(m.getOrElse(k, top), v)) + TopMap(a.foldLeft(b) { case (m, (k, v)) => + m + (k -> meet(m.getOrElse(k, top), v)) }) case (TopMap(a), BottomMap(b)) => - BottomMap(a.foldLeft(b) { - case (m, (k, v)) => - m + (k -> meet(m.getOrElse(k, bottom), v)) + BottomMap(a.foldLeft(b) { case (m, (k, v)) => + m + (k -> meet(m.getOrElse(k, bottom), v)) }) case (BottomMap(a), BottomMap(b)) => - BottomMap(a.foldLeft(b) { - case (m, (k, v)) => - m + (k -> meet(m.getOrElse(k, bottom), v)) + BottomMap(a.foldLeft(b) { case (m, (k, v)) => + m + (k -> meet(m.getOrElse(k, bottom), v)) }) case (a, b) => latticeMapMeet(b, a, meet, top, bottom) } @@ -325,24 +319,20 @@ trait MapDomain[D, L] extends AbstractDomain[LatticeMap[D, L]] { case (Top(), _) => Top() case (_, Top()) => Top() case (BottomMap(a), BottomMap(b)) => - BottomMap(a.foldLeft(b) { - case (m, (b, v)) => - m + (b -> widenTerm(m.getOrElse(b, botTerm), v, pos)) + BottomMap(a.foldLeft(b) { case (m, (b, v)) => + m + (b -> widenTerm(m.getOrElse(b, botTerm), v, pos)) }) case (BottomMap(a), TopMap(b)) => - TopMap(a.foldLeft(b) { - case (m, (b, v)) => - m + (b -> widenTerm(m.getOrElse(b, botTerm), v, pos)) + TopMap(a.foldLeft(b) { case (m, (b, v)) => + m + (b -> widenTerm(m.getOrElse(b, botTerm), v, pos)) }) case (TopMap(a), BottomMap(b)) => - TopMap(b.foldLeft(a) { - case (m, (a, v)) => - m + (a -> widenTerm(v, m.getOrElse(a, botTerm), pos)) + TopMap(b.foldLeft(a) { case (m, (a, v)) => + m + (a -> widenTerm(v, m.getOrElse(a, botTerm), pos)) }) case (TopMap(a), TopMap(b)) => - TopMap(a.foldLeft(b) { - case (m, (b, v)) => - m + (b -> widenTerm(m.getOrElse(b, botTerm), v, pos)) + TopMap(a.foldLeft(b) { case (m, (b, v)) => + m + (b -> widenTerm(m.getOrElse(b, botTerm), v, pos)) }) } diff --git a/src/main/scala/analysis/ReachingDefinitionsAnalysis.scala b/src/main/scala/analysis/ReachingDefinitionsAnalysis.scala index 84b1faa18f..9894c63c54 100644 --- a/src/main/scala/analysis/ReachingDefinitionsAnalysis.scala +++ b/src/main/scala/analysis/ReachingDefinitionsAnalysis.scala @@ -42,9 +42,8 @@ trait ReachingDefinitionsAnalysis(program: Program) { vars: Set[Variable], s: (Map[Variable, Set[Assign]], Map[Variable, Set[Assign]]) ): (Map[Variable, Set[Assign]], Map[Variable, Set[Assign]]) = { - vars.foldLeft((s(0), Map.empty[Variable, Set[Assign]])) { - case ((state, acc), v) => - (state, acc + (v -> state(v))) + vars.foldLeft((s(0), Map.empty[Variable, Set[Assign]])) { case ((state, acc), v) => + (state, acc + (v -> state(v))) } } @@ -58,9 +57,8 @@ trait ReachingDefinitionsAnalysis(program: Program) { // for lhs, addOrReplace the definition val rhs = assign.rhs.variables val lhs = assign.lhs - val rhsUseDefs: Map[Variable, Set[Assign]] = rhs.foldLeft(Map.empty[Variable, Set[Assign]]) { - case (acc, v) => - acc + (v -> s(0)(v)) + val rhsUseDefs: Map[Variable, Set[Assign]] = rhs.foldLeft(Map.empty[Variable, Set[Assign]]) { case (acc, v) => + acc + (v -> s(0)(v)) } (s(0) + (lhs -> Set(assign)), rhsUseDefs) case assert: Assert => @@ -70,9 +68,8 @@ trait ReachingDefinitionsAnalysis(program: Program) { case memoryLoad: MemoryLoad => val lhs = memoryLoad.lhs val rhs = memoryLoad.index.variables - val rhsUseDefs: Map[Variable, Set[Assign]] = rhs.foldLeft(Map.empty[Variable, Set[Assign]]) { - case (acc, v) => - acc + (v -> s(0)(v)) + val rhsUseDefs: Map[Variable, Set[Assign]] = rhs.foldLeft(Map.empty[Variable, Set[Assign]]) { case (acc, v) => + acc + (v -> s(0)(v)) } (s(0) + (lhs -> Set(memoryLoad)), rhsUseDefs) case assume: Assume => diff --git a/src/main/scala/analysis/RegionInjector.scala b/src/main/scala/analysis/RegionInjector.scala index 1cfc9c016c..8e4932a095 100644 --- a/src/main/scala/analysis/RegionInjector.scala +++ b/src/main/scala/analysis/RegionInjector.scala @@ -188,9 +188,8 @@ class RegionInjectorMRA(override val program: Program, mmm: MemoryModelMap) exte } override def sharedRegions(): Iterable[MergedRegion] = { - mergedRegions.collect { - case (_: DataRegion | _: HeapRegion, region: MergedRegion) => - region + mergedRegions.collect { case (_: DataRegion | _: HeapRegion, region: MergedRegion) => + region } } } diff --git a/src/main/scala/analysis/VariableDependencyAnalysis.scala b/src/main/scala/analysis/VariableDependencyAnalysis.scala index bc026b5fbd..462ce3f0b3 100644 --- a/src/main/scala/analysis/VariableDependencyAnalysis.scala +++ b/src/main/scala/analysis/VariableDependencyAnalysis.scala @@ -53,9 +53,8 @@ trait ProcVariableDependencyAnalysisFunctions( else d match { case Left(v) => - call.actualParams.toList.foldLeft(Map[DL, EdgeFunction[LatticeSet[Variable]]]()) { - case (m, (inVar, expr)) => - if expr.variables.contains(v) then m + (Left(inVar) -> IdEdge()) else m + call.actualParams.toList.foldLeft(Map[DL, EdgeFunction[LatticeSet[Variable]]]()) { case (m, (inVar, expr)) => + if expr.variables.contains(v) then m + (Left(inVar) -> IdEdge()) else m } case Right(_) => call.actualParams.toList.foldLeft(Map[DL, EdgeFunction[LatticeSet[Variable]]](d -> IdEdge())) { @@ -105,9 +104,8 @@ trait ProcVariableDependencyAnalysisFunctions( varDepsSummaries .get(call.target) .map(summary => { - summary.foldLeft(Map[DL, EdgeFunction[LatticeSet[Variable]]]()) { - case (m, (outVar, deps)) => - if deps.contains(v) then m + (Left(outVar) -> IdEdge()) else m + summary.foldLeft(Map[DL, EdgeFunction[LatticeSet[Variable]]]()) { case (m, (outVar, deps)) => + if deps.contains(v) then m + (Left(outVar) -> IdEdge()) else m } }) .getOrElse(Map()) @@ -129,39 +127,36 @@ trait ProcVariableDependencyAnalysisFunctions( val init: Map[DL, EdgeFunction[LatticeSet[Variable]]] = if call.outParams.exists(_._2 == v) then Map() else Map(d -> IdEdge()) - call.actualParams.foldLeft(init) { - case (m, (inVar, expr)) => - if !expr.variables.contains(v) then m - else { - summary.foldLeft(m) { - case (m, (endVar, deps)) => - endVar match { - case endVar: LocalVar if call.target.formalOutParam.contains(endVar) => { - if deps.contains(inVar) then m + (Left(call.outParams(endVar)) -> IdEdge()) - else m - } - case _ => m - } + call.actualParams.foldLeft(init) { case (m, (inVar, expr)) => + if !expr.variables.contains(v) then m + else { + summary.foldLeft(m) { case (m, (endVar, deps)) => + endVar match { + case endVar: LocalVar if call.target.formalOutParam.contains(endVar) => { + if deps.contains(inVar) then m + (Left(call.outParams(endVar)) -> IdEdge()) + else m + } + case _ => m } } + } } } case Right(_) => val initialise = call.outParams.foldLeft(Map[DL, EdgeFunction[LatticeSet[Variable]]](d -> IdEdge())) { case (m, (formalVar, resultVar)) => m + (Left(resultVar) -> ConstEdge(FiniteSet(Set()))) } - val ret = summary.foldLeft(initialise) { - case (m, (endVar, deps)) => - endVar match { - case endVar: LocalVar if call.target.formalOutParam.contains(endVar) => - deps match { - case Top() | DiffSet(_) => m + (Left(call.outParams(endVar)) -> ConstEdge(Top())) - case FiniteSet(s) if s == Set() => - m + (Left(call.outParams(endVar)) -> ConstEdge(FiniteSet(Set()))) - case _ => m - } - case _ => m - } + val ret = summary.foldLeft(initialise) { case (m, (endVar, deps)) => + endVar match { + case endVar: LocalVar if call.target.formalOutParam.contains(endVar) => + deps match { + case Top() | DiffSet(_) => m + (Left(call.outParams(endVar)) -> ConstEdge(Top())) + case FiniteSet(s) if s == Set() => + m + (Left(call.outParams(endVar)) -> ConstEdge(FiniteSet(Set()))) + case _ => m + } + case _ => m + } } ret } diff --git a/src/main/scala/analysis/data_structure_analysis/DataStructureAnalysis.scala b/src/main/scala/analysis/data_structure_analysis/DataStructureAnalysis.scala index c3958898fa..663d2b142d 100644 --- a/src/main/scala/analysis/data_structure_analysis/DataStructureAnalysis.scala +++ b/src/main/scala/analysis/data_structure_analysis/DataStructureAnalysis.scala @@ -139,14 +139,13 @@ class DataStructureAnalysis( } // assert(calleeGraph.formals.isEmpty || buGraph.varToCell(begin(callee)).equals(calleeGraph.formals)) - calleeGraph.globalMapping.foreach { - case (range: AddressRange, Field(node: Node, offset: BigInt)) => - val field = calleeGraph.find(node) - val res = buGraph.mergeCells( - buGraph.globalMapping(range).node.getCell(buGraph.globalMapping(range).offset), - field.node.getCell(field.offset + offset) - ) - buGraph.handleOverlapping(res) + calleeGraph.globalMapping.foreach { case (range: AddressRange, Field(node: Node, offset: BigInt)) => + val field = calleeGraph.find(node) + val res = buGraph.mergeCells( + buGraph.globalMapping(range).node.getCell(buGraph.globalMapping(range).offset), + field.node.getCell(field.offset + offset) + ) + buGraph.handleOverlapping(res) } if (buGraph.varToCell.contains(callee)) { @@ -205,15 +204,14 @@ class DataStructureAnalysis( node.cloneNode(callersGraph, calleesGraph) } - callersGraph.globalMapping.foreach { - case (range: AddressRange, Field(oldNode, internal)) => - // val node = callersGraph - val field = callersGraph.find(oldNode) - val res = calleesGraph.mergeCells( - calleesGraph.globalMapping(range).node.getCell(calleesGraph.globalMapping(range).offset), - field.node.getCell(field.offset + internal) - ) - calleesGraph.handleOverlapping(res) + callersGraph.globalMapping.foreach { case (range: AddressRange, Field(oldNode, internal)) => + // val node = callersGraph + val field = callersGraph.find(oldNode) + val res = calleesGraph.mergeCells( + calleesGraph.globalMapping(range).node.getCell(calleesGraph.globalMapping(range).offset), + field.node.getCell(field.offset + internal) + ) + calleesGraph.handleOverlapping(res) } callSite.paramCells.keySet.foreach { variable => diff --git a/src/main/scala/analysis/data_structure_analysis/Graph.scala b/src/main/scala/analysis/data_structure_analysis/Graph.scala index a550756f56..2ff14de7b6 100644 --- a/src/main/scala/analysis/data_structure_analysis/Graph.scala +++ b/src/main/scala/analysis/data_structure_analysis/Graph.scala @@ -943,19 +943,18 @@ class Graph(using Counter)( newGraph.stackMapping.update(offset, idToNode(node.id)) } - globalMapping.foreach { - case (range: AddressRange, Field(node, offset)) => - assert(newGraph.globalMapping.contains(range)) - val cell: Cell = find(node.getCell(offset)) - val finalNode: Node = cell.node.get - nodes.add(finalNode) - if !idToNode.contains(finalNode.id) then - val newNode = finalNode.cloneSelf(newGraph) - idToNode.update(finalNode.id, newNode) - newGraph.globalMapping.update( - range, - Field(idToNode(finalNode.id), cell.offset + (offset - finalNode.getCell(offset).offset)) - ) + globalMapping.foreach { case (range: AddressRange, Field(node, offset)) => + assert(newGraph.globalMapping.contains(range)) + val cell: Cell = find(node.getCell(offset)) + val finalNode: Node = cell.node.get + nodes.add(finalNode) + if !idToNode.contains(finalNode.id) then + val newNode = finalNode.cloneSelf(newGraph) + idToNode.update(finalNode.id, newNode) + newGraph.globalMapping.update( + range, + Field(idToNode(finalNode.id), cell.offset + (offset - finalNode.getCell(offset).offset)) + ) } val queue = mutable.Queue[Node]() diff --git a/src/main/scala/analysis/data_structure_analysis/IntervalDSA.scala b/src/main/scala/analysis/data_structure_analysis/IntervalDSA.scala index 72e036431f..b89424c4b5 100644 --- a/src/main/scala/analysis/data_structure_analysis/IntervalDSA.scala +++ b/src/main/scala/analysis/data_structure_analysis/IntervalDSA.scala @@ -43,19 +43,18 @@ class IntervalGraph( symVal: SymValSet[DSInterval], current: Map[SymBase, IntervalNode] ): Map[SymBase, IntervalNode] = { - symVal.state.filter((base, _) => base != NonPointer).foldLeft(current) { - case (result, (base, symOffsets)) => - val node = find(result.getOrElse(base, init(base, None))) - base match - case Heap(call) => node.flags.heap = true - case Stack(proc) => node.flags.stack = true - case Global => node.flags.global = true - case NonPointer => - throw new Exception("Attempted to create a node from an Non-pointer symbolic base") - case unknown: (Ret | Par | Loaded) => - node.flags.unknown = true - node.flags.incomplete = true - result + (base -> node) + symVal.state.filter((base, _) => base != NonPointer).foldLeft(current) { case (result, (base, symOffsets)) => + val node = find(result.getOrElse(base, init(base, None))) + base match + case Heap(call) => node.flags.heap = true + case Stack(proc) => node.flags.stack = true + case Global => node.flags.global = true + case NonPointer => + throw new Exception("Attempted to create a node from an Non-pointer symbolic base") + case unknown: (Ret | Par | Loaded) => + node.flags.unknown = true + node.flags.incomplete = true + result + (base -> node) } } @@ -74,10 +73,9 @@ class IntervalGraph( globalNode.add(DSInterval(address.toInt, address.toInt)) // ignore size, could be a composite type } - globalOffsets.foreach { - case (address, relocated) => - globalNode.add(address.toInt) - globalNode.add(relocated.toInt) + globalOffsets.foreach { case (address, relocated) => + globalNode.add(address.toInt) + globalNode.add(relocated.toInt) } externalFunctions.foreach(e => @@ -87,11 +85,10 @@ class IntervalGraph( ext.node.flags.foreign = true ) - globalOffsets.map(_.swap).foreach { - case (address, relocated) => - val pointee = find(globalNode.get(address.toInt)) - val pointer = find(globalNode).add(DSInterval(relocated.toInt, relocated.toInt + 8)) - pointer.setPointee(pointee) + globalOffsets.map(_.swap).foreach { case (address, relocated) => + val pointee = find(globalNode.get(address.toInt)) + val pointer = find(globalNode).add(DSInterval(relocated.toInt, relocated.toInt + 8)) + pointer.setPointee(pointee) } globalNode @@ -100,9 +97,8 @@ class IntervalGraph( def buildNodes(): Map[SymBase, IntervalNode] = { val global = globalNode(irContext.globals ++ irContext.funcEntries, irContext.globalOffsets, irContext.externalFunctions) - sva.state.foldLeft(Map[SymBase, IntervalNode](Global -> global)) { - case (m, (variable, valueSet)) => - symValToNodes(valueSet, m) + sva.state.foldLeft(Map[SymBase, IntervalNode](Global -> global)) { case (m, (variable, valueSet)) => + symValToNodes(valueSet, m) } } @@ -170,15 +166,14 @@ class IntervalGraph( // returns the cells corresponding to the def symValToCells(symVal: SymValSet[DSInterval]): Set[IntervalCell] = { val pairs = symVal.state.filter((base, _) => base != NonPointer) - pairs.foldLeft(Set[IntervalCell]()) { - case (results, (base: SymBase, offsets: DSInterval)) => - val (node, adjustment) = findNode(nodes(base)) - if offsets == Top then results + node.collapse() - else - results ++ offsets.toIntervals - .filter(i => base != Global || isGlobal(i.start.get)) - .map(_.move(i => i + adjustment)) - .map(node.add) + pairs.foldLeft(Set[IntervalCell]()) { case (results, (base: SymBase, offsets: DSInterval)) => + val (node, adjustment) = findNode(nodes(base)) + if offsets == Top then results + node.collapse() + else + results ++ offsets.toIntervals + .filter(i => base != Global || isGlobal(i.start.get)) + .map(_.move(i => i + adjustment)) + .map(node.add) } } @@ -928,19 +923,17 @@ object IntervalDSA { cons.inParams .filterNot(f => unchanged.exists(i => f._1.name.startsWith(i))) .filter(f => cons.target.formalInParam.contains(f._1)) - .foreach { - case (formal, actual) => - val (sourceExpr, targetExpr) = if phase == TD then (actual, formal) else (formal, actual) - exprTransfer(sourceExpr, targetExpr, source, target, oldToNew) + .foreach { case (formal, actual) => + val (sourceExpr, targetExpr) = if phase == TD then (actual, formal) else (formal, actual) + exprTransfer(sourceExpr, targetExpr, source, target, oldToNew) } cons.outParams .filterNot(f => unchanged.exists(i => f._1.name.startsWith(i))) .filter(f => cons.target.formalOutParam.contains(f._1)) - .foreach { - case (out, actual) => - val (sourceExpr, targetExpr) = if phase == TD then (actual, out) else (out, actual) - exprTransfer(sourceExpr, targetExpr, source, target, oldToNew) + .foreach { case (out, actual) => + val (sourceExpr, targetExpr) = if phase == TD then (actual, out) else (out, actual) + exprTransfer(sourceExpr, targetExpr, source, target, oldToNew) } // TODO add unification between unused indirect call out params and their corresponding input version } diff --git a/src/main/scala/analysis/data_structure_analysis/SymbolicValueAnalysis.scala b/src/main/scala/analysis/data_structure_analysis/SymbolicValueAnalysis.scala index c142898225..73ce45b927 100644 --- a/src/main/scala/analysis/data_structure_analysis/SymbolicValueAnalysis.scala +++ b/src/main/scala/analysis/data_structure_analysis/SymbolicValueAnalysis.scala @@ -433,9 +433,8 @@ class SymValuesDomain[T <: Offsets](using symValSetDomain: SymValSetDomain[T]) e join(a, retInitSymValSet, block) case ind: IndirectCall => a // TODO possibly map every live variable to top case ret: Return => - val update = SymValues(ret.outParams.map { - case (outVar: LocalVar, value: Expr) => - outVar -> SymValues.exprToSymValSet(a)(value) + val update = SymValues(ret.outParams.map { case (outVar: LocalVar, value: Expr) => + outVar -> SymValues.exprToSymValSet(a)(value) }) join(a, update, block) diff --git a/src/main/scala/analysis/solvers/IDESolver.scala b/src/main/scala/analysis/solvers/IDESolver.scala index 3e72d2c69f..efcad7b1f3 100644 --- a/src/main/scala/analysis/solvers/IDESolver.scala +++ b/src/main/scala/analysis/solvers/IDESolver.scala @@ -153,18 +153,17 @@ abstract class IDESolver[ this.analyze() val res = mutable.Map[Procedure, mutable.Map[DL, mutable.Map[DL, EdgeFunction[T]]]]() - x.foreach { - case ((n, d1, d2), e) => - if (isExit(n)) { - val exit: EE = n.asInstanceOf[EE] - val proc = IRWalk.procedure(exit) - val m1 = res.getOrElseUpdate( - proc, - mutable.Map[DL, mutable.Map[DL, EdgeFunction[T]]]().withDefaultValue(mutable.Map[DL, EdgeFunction[T]]()) - ) - val m2 = m1.getOrElseUpdate(d1, mutable.Map[DL, EdgeFunction[T]]()) - m2 += d2 -> e - } + x.foreach { case ((n, d1, d2), e) => + if (isExit(n)) { + val exit: EE = n.asInstanceOf[EE] + val proc = IRWalk.procedure(exit) + val m1 = res.getOrElseUpdate( + proc, + mutable.Map[DL, mutable.Map[DL, EdgeFunction[T]]]().withDefaultValue(mutable.Map[DL, EdgeFunction[T]]()) + ) + val m2 = m1.getOrElseUpdate(d1, mutable.Map[DL, EdgeFunction[T]]()) + m2 += d2 -> e + } } Logger.debug(s"Function summaries:\n${res .map { (f, s) => @@ -239,12 +238,11 @@ abstract class IDESolver[ /** Restructures the analysis output to match `restructuredlattice`. */ def restructure(y: lattice.Element): restructuredlattice.Element = { - y.foldLeft(Map[CFGPosition, Map[D, valuelattice.Element]]()) { - case (acc, ((n, dl), e)) => - dl match { - case Left(d) => acc + (n -> (acc.getOrElse(n, Map[D, valuelattice.Element]()) + (d -> e))) - case _ => acc - } + y.foldLeft(Map[CFGPosition, Map[D, valuelattice.Element]]()) { case (acc, ((n, dl), e)) => + dl match { + case Left(d) => acc + (n -> (acc.getOrElse(n, Map[D, valuelattice.Element]()) + (d -> e))) + case _ => acc + } } } } diff --git a/src/main/scala/analysis/solvers/UnionFindSolver.scala b/src/main/scala/analysis/solvers/UnionFindSolver.scala index 44906cec5c..4012a0d724 100644 --- a/src/main/scala/analysis/solvers/UnionFindSolver.scala +++ b/src/main/scala/analysis/solvers/UnionFindSolver.scala @@ -37,10 +37,9 @@ class UnionFindSolver[A] { mkUnion(v2, t1) case (f1: Cons[A], f2: Cons[A]) if f1.doMatch(f2) => mkUnion(f1, f2) - f1.args.zip(f2.args).foreach { - case (a1, a2) => - Logger.debug(s"Unifying subterms $a1 and $a2") - unify(a1, a2) + f1.args.zip(f2.args).foreach { case (a1, a2) => + Logger.debug(s"Unifying subterms $a1 and $a2") + unify(a1, a2) } case (x, y) => throw new UnificationFailure(s"Cannot unify $t1 and $t2 (with representatives $x and $y)") diff --git a/src/main/scala/gtirb/AuxDecoder.scala b/src/main/scala/gtirb/AuxDecoder.scala index 6113dd5f02..16e582ac38 100644 --- a/src/main/scala/gtirb/AuxDecoder.scala +++ b/src/main/scala/gtirb/AuxDecoder.scala @@ -54,19 +54,17 @@ object AuxDecoder { val numBytes = numBits / 8 require(numBytes * 8 == numBits, "requires multiple of 8") - readBytes(numBytes)(bs).foldRight(BigInt(0)) { - case (x, acc) => - val n = x.toInt - acc * 256 + (if (!signed && n < 0) then n + 256 else n) + readBytes(numBytes)(bs).foldRight(BigInt(0)) { case (x, acc) => + val n = x.toInt + acc * 256 + (if (!signed && n < 0) then n + 256 else n) } def readMap[K, V](keyReader: Reader[K], valReader: Reader[V])(bs: Input) = val len = readUint(64)(bs) - (BigInt(0) until len).map { - case _ => - val k = keyReader(bs) - val v = valReader(bs) - k -> v + (BigInt(0) until len).map { case _ => + val k = keyReader(bs) + val v = valReader(bs) + k -> v }.toMap def readSet[K, V](valReader: Reader[V])(bs: Input) = diff --git a/src/main/scala/ir/IRCursor.scala b/src/main/scala/ir/IRCursor.scala index 7f8a59882a..0e8b22379c 100644 --- a/src/main/scala/ir/IRCursor.scala +++ b/src/main/scala/ir/IRCursor.scala @@ -318,9 +318,8 @@ def getDetachedBlocks(p: Procedure) = { def dotBlockGraph(proc: Procedure): String = { val o = getDetachedBlocks(proc) dotBlockGraph( - proc.collect { - case b: Block => - b + proc.collect { case b: Block => + b }, o.reachableFromBlockEmptyPred ) @@ -330,9 +329,8 @@ def dotBlockGraph(prog: Program): String = { val e = prog.procedures.toSet.flatMap(getDetachedBlocks(_).reachableFromBlockEmptyPred) dotBlockGraph( - prog.collect { - case b: Block => - b + prog.collect { case b: Block => + b }, e ) @@ -340,16 +338,15 @@ def dotBlockGraph(prog: Program): String = { def dotBlockGraph(blocks: Iterable[Block], orphaned: Set[Block]): String = { val printer = translating.BasilIRPrettyPrinter() - val labels: Map[CFGPosition, String] = (blocks.collect { - case b: Block => - b -> { - (b.statements.toList.map(printer.apply(_) + ";") ++ { - b.jump match { - case g: GoTo => List() - case o => List(printer(o) + ";") - } - }).map(" " + _).mkString("\n") - } + val labels: Map[CFGPosition, String] = (blocks.collect { case b: Block => + b -> { + (b.statements.toList.map(printer.apply(_) + ";") ++ { + b.jump match { + case g: GoTo => List() + case o => List(printer(o) + ";") + } + }).map(" " + _).mkString("\n") + } }).toMap toDot[Block](blocks.toSet, IntraProcBlockIRCursor, labels, orphaned) diff --git a/src/main/scala/ir/Program.scala b/src/main/scala/ir/Program.scala index afdfdd0991..b40986bbdc 100644 --- a/src/main/scala/ir/Program.scala +++ b/src/main/scala/ir/Program.scala @@ -78,9 +78,8 @@ class Program( } val t = toMap(this) val o = toMap(p) - (mainProcedure.name == p.mainProcedure.name) && (t.keys == o.keys) && t.keys.forall { - case k => - t(k).deepEquals(o(k)) + (mainProcedure.name == p.mainProcedure.name) && (t.keys == o.keys) && t.keys.forall { case k => + t(k).deepEquals(o(k)) } } @@ -327,9 +326,8 @@ class Procedure private ( } private def deepEqualsProc(p: Procedure) = { name == p.name && (p.blocks.size == blocks.size) && { - p.blocksBookended.zip(blocksBookended).forall { - case ((l: Block), (r: Block)) => - l.deepEqualsDbg(r) + p.blocksBookended.zip(blocksBookended).forall { case ((l: Block), (r: Block)) => + l.deepEqualsDbg(r) } } } @@ -567,9 +565,8 @@ class Block private ( case o => false } private def deepEqualsBlock(b: Block): Boolean = { - (label == b.label) && statements.zip(b.statements).forall { - case (l, r) => - l.deepEqualsDbg(r) + (label == b.label) && statements.zip(b.statements).forall { case (l, r) => + l.deepEqualsDbg(r) } } @@ -612,9 +609,8 @@ class Block private ( assert(!incomingJumps.contains(g)) } - def calls: Set[Procedure] = statements.toSet.collect { - case d: DirectCall => - d.target + def calls: Set[Procedure] = statements.toSet.collect { case d: DirectCall => + d.target } def modifies: Set[Global] = statements.flatMap(_.modifies).toSet diff --git a/src/main/scala/ir/dsl/DSL.scala b/src/main/scala/ir/dsl/DSL.scala index ccdc92d425..d8bf2eab48 100644 --- a/src/main/scala/ir/dsl/DSL.scala +++ b/src/main/scala/ir/dsl/DSL.scala @@ -244,9 +244,8 @@ case class EventuallyBlock( override def deepEquals(o: Object) = o match { case EventuallyBlock(`label`, osl, oj, `address`) => - j.deepEquals(oj) && sl.size == osl.size && osl.toList.zip(sl).forall { - case (l, r) => - l.deepEquals(r) + j.deepEquals(oj) && sl.size == osl.size && osl.toList.zip(sl).forall { case (l, r) => + l.deepEquals(r) } case _ => false @@ -316,9 +315,8 @@ case class EventuallyProcedure( override def deepEquals(o: Object) = o match { case EventuallyProcedure(`label`, `in`, `out`, b, `entryBlockLabel`, `returnBlockLabel`, `address`) => { b.size == blocks.size && { - b.zip(blocks).forall { - case (l, r) => - l.deepEquals(r) + b.zip(blocks).forall { case (l, r) => + l.deepEquals(r) } } } @@ -408,9 +406,8 @@ case class EventuallyProgram( override def deepEquals(o: Object) = o match { case EventuallyProgram(mp, op, im) => { - mp.deepEquals(mainProcedure) && op.size == otherProcedures.size && op.zip(otherProcedures).forall { - case (l, r) => - l.deepEquals(r) + mp.deepEquals(mainProcedure) && op.size == otherProcedures.size && op.zip(otherProcedures).forall { case (l, r) => + l.deepEquals(r) } } } diff --git a/src/main/scala/ir/dsl/ToScala.scala b/src/main/scala/ir/dsl/ToScala.scala index bb454b7138..dfe3fcf8c2 100644 --- a/src/main/scala/ir/dsl/ToScala.scala +++ b/src/main/scala/ir/dsl/ToScala.scala @@ -94,9 +94,8 @@ given [T](using ToScala[T]): ToScalaLines[Set[T]] with given [K, V](using ToScala[K], ToScala[V]): ToScalaLines[Map[K, V]] with extension (x: Map[K, V]) def toScalaLines = - val pairs = x.map { - case (k, v) => - Twine(k.toScalaLines, " -> ", v.toScalaLines) + val pairs = x.map { case (k, v) => + Twine(k.toScalaLines, " -> ", v.toScalaLines) } Twine.indentNested("Map(", pairs, ")") diff --git a/src/main/scala/ir/eval/ExprEval.scala b/src/main/scala/ir/eval/ExprEval.scala index b0871cbe9e..e60e025260 100644 --- a/src/main/scala/ir/eval/ExprEval.scala +++ b/src/main/scala/ir/eval/ExprEval.scala @@ -293,9 +293,8 @@ def statePartialEvalExpr[S](l: Loader[S, InterpreterError])(exp: Expr): State[S, } State.protect( () => ns, - { - case e => - Errored(e.toString) + { case e => + Errored(e.toString) }: PartialFunction[Exception, InterpreterError] ) diff --git a/src/main/scala/ir/eval/InterpretBasilIR.scala b/src/main/scala/ir/eval/InterpretBasilIR.scala index fa2acf8d66..5c71247bd2 100644 --- a/src/main/scala/ir/eval/InterpretBasilIR.scala +++ b/src/main/scala/ir/eval/InterpretBasilIR.scala @@ -329,9 +329,8 @@ object InterpFuns { f.setNext(Run(IRWalk.firstInBlock(gt.targets.head))) } case gt: GoTo => - val assumes = gt.targets.flatMap(_.statements.headOption).collect { - case a: Assume => - a + val assumes = gt.targets.flatMap(_.statements.headOption).collect { case a: Assume => + a } for { _ <- diff --git a/src/main/scala/ir/eval/Interpreter.scala b/src/main/scala/ir/eval/Interpreter.scala index 4c2a5e072a..dfae63a5c7 100644 --- a/src/main/scala/ir/eval/Interpreter.scala +++ b/src/main/scala/ir/eval/Interpreter.scala @@ -192,9 +192,8 @@ case class MemoryState( /** Debug return useful values * */ def getGlobalVals: Map[String, BitVecLiteral] = { - stackFrames(globalFrame).collect { - case (k, Scalar(b: BitVecLiteral)) => - k -> b + stackFrames(globalFrame).collect { case (k, Scalar(b: BitVecLiteral)) => + k -> b } } diff --git a/src/main/scala/ir/invariant/CFGCorrect.scala b/src/main/scala/ir/invariant/CFGCorrect.scala index b4ec94a286..8187ad6e6b 100644 --- a/src/main/scala/ir/invariant/CFGCorrect.scala +++ b/src/main/scala/ir/invariant/CFGCorrect.scala @@ -6,16 +6,14 @@ import scala.collection.mutable def cfgCorrect(p: Program | Procedure) = { - val forwardsInter = p.collect { - case d @ DirectCall(tgt, _, _, _) => - (d.parent.parent, tgt) + val forwardsInter = p.collect { case d @ DirectCall(tgt, _, _, _) => + (d.parent.parent, tgt) } val revForwardsInter = forwardsInter.groupBy(_._2).map((dest, origs) => (dest, origs.map(_._1).toSet)).toMap val forwardsInterMap = forwardsInter.groupBy(_._1).map((orig, dests) => (orig, dests.map(_._2).toSet)).toMap - val forwardsIntra = p.collect { - case g @ GoTo(targets, _) => - targets.map((t: Block) => (g.parent, t)) + val forwardsIntra = p.collect { case g @ GoTo(targets, _) => + targets.map((t: Block) => (g.parent, t)) }.flatten val revForwardsIntra = forwardsIntra.groupBy(_._2).map((dest, origs) => (dest, origs.map(_._1).toSet)).toMap diff --git a/src/main/scala/ir/invariant/SingleCallBlockEnd.scala b/src/main/scala/ir/invariant/SingleCallBlockEnd.scala index f99a1f3b3f..1215fde191 100644 --- a/src/main/scala/ir/invariant/SingleCallBlockEnd.scala +++ b/src/main/scala/ir/invariant/SingleCallBlockEnd.scala @@ -4,9 +4,8 @@ import ir.* def singleCallBlockEnd(p: Program): Boolean = { p.forall { case b: Block => { - val calls = (b.statements.collect { - case c: Call => - b.statements.lastOption.contains(c) + val calls = (b.statements.collect { case c: Call => + b.statements.lastOption.contains(c) }) (calls.size <= 1) && calls.headOption.getOrElse(true) } diff --git a/src/main/scala/ir/parsing/BasilEarlyBNFCVisitor.scala b/src/main/scala/ir/parsing/BasilEarlyBNFCVisitor.scala index 370816437d..296ab8f616 100644 --- a/src/main/scala/ir/parsing/BasilEarlyBNFCVisitor.scala +++ b/src/main/scala/ir/parsing/BasilEarlyBNFCVisitor.scala @@ -64,18 +64,17 @@ case class BasilEarlyBNFCVisitor[A]() // Members declared in Program.Visitor override def visit(x: syntax.Prog, arg: A) = - x.listdeclaration_.asScala.foldLeft(Declarations.empty) { - case (decls, x) => - try { - decls.merge(x.accept(this, arg)) - } catch { - case e: IllegalArgumentException => - throw ParseException( - "encountered duplicate declarations with the same name", - x.asInstanceOf[HasParsePosition], - e - ) - } + x.listdeclaration_.asScala.foldLeft(Declarations.empty) { case (decls, x) => + try { + decls.merge(x.accept(this, arg)) + } catch { + case e: IllegalArgumentException => + throw ParseException( + "encountered duplicate declarations with the same name", + x.asInstanceOf[HasParsePosition], + e + ) + } } // Members declared in MExpr.Visitor diff --git a/src/main/scala/ir/parsing/BasilMainBNFCVisitor.scala b/src/main/scala/ir/parsing/BasilMainBNFCVisitor.scala index 00bb5a8c90..ab149f837a 100644 --- a/src/main/scala/ir/parsing/BasilMainBNFCVisitor.scala +++ b/src/main/scala/ir/parsing/BasilMainBNFCVisitor.scala @@ -139,9 +139,8 @@ case class InnerBasilBNFCVisitor[A]( // Members declared in CallLVars.Visitor override def visit(x: syntax.NoOutParams, arg: A): BasilParseValue = Nil override def visit(x: syntax.LocalVars, arg: A): BasilParseValue = - val innerlocals = x.listlvar_.asScala.collect { - case x: syntax.LVarDef => - x + val innerlocals = x.listlvar_.asScala.collect { case x: syntax.LVarDef => + x }.toList if (innerlocals.nonEmpty) { throw ParseException( diff --git a/src/main/scala/ir/transforms/ExternalRemover.scala b/src/main/scala/ir/transforms/ExternalRemover.scala index 73740b6f45..73c63751a0 100644 --- a/src/main/scala/ir/transforms/ExternalRemover.scala +++ b/src/main/scala/ir/transforms/ExternalRemover.scala @@ -3,12 +3,11 @@ import ir.* import cilvisitor.* def removeBodyOfExternal(external: Set[String])(prog: Program) = { - prog.procedures.foreach { - case p => - if (external.contains(p.procName)) { - // update the modifies set before removing the body - p.modifies.addAll(p.blocks.flatMap(_.modifies)) - p.replaceBlocks(Seq()) - } + prog.procedures.foreach { case p => + if (external.contains(p.procName)) { + // update the modifies set before removing the body + p.modifies.addAll(p.blocks.flatMap(_.modifies)) + p.replaceBlocks(Seq()) + } } } diff --git a/src/main/scala/ir/transforms/Inline.scala b/src/main/scala/ir/transforms/Inline.scala index 4ea29f889e..93a47c5390 100644 --- a/src/main/scala/ir/transforms/Inline.scala +++ b/src/main/scala/ir/transforms/Inline.scala @@ -60,9 +60,8 @@ def convertJumpRenaming(blockName: String => String, varName: CILVisitor, x: Jum case Return(label, out) => EventuallyReturn( out.toList - .map { - case (v: Variable, e: Expr) => - ((v.name, visit_expr(varName, e))) + .map { case (v: Variable, e: Expr) => + ((v.name, visit_expr(varName, e))) } .to(ArraySeq), label @@ -151,13 +150,11 @@ def inlineCall(prog: Program, c: DirectCall): Unit = { case r: EventuallyReturn => r.params.toMap case _ => throw Exception("returnblock should have a return statement") } - val outAssignments = c.outParams.map { - case (formal: LocalVar, lvar: Variable) => - LocalAssign(lvar, targetReturnValues(formal.name)) + val outAssignments = c.outParams.map { case (formal: LocalVar, lvar: Variable) => + LocalAssign(lvar, targetReturnValues(formal.name)) } - val inAssignments = c.actualParams.map { - case (formal: LocalVar, actual: Expr) => - LocalAssign(visit_rvar(varRenamer, formal), actual) + val inAssignments = c.actualParams.map { case (formal: LocalVar, actual: Expr) => + LocalAssign(visit_rvar(varRenamer, formal), actual) } afterCallBlock.statements.prependAll(outAssignments) entryTempBlock.statements.prependAll(inAssignments) diff --git a/src/main/scala/ir/transforms/PCTracking.scala b/src/main/scala/ir/transforms/PCTracking.scala index ca3dabac42..f63d9720df 100644 --- a/src/main/scala/ir/transforms/PCTracking.scala +++ b/src/main/scala/ir/transforms/PCTracking.scala @@ -26,9 +26,8 @@ object PCTracking { case PCTrackingOption.Keep => Logger.info(s"[!] Removing PC-tracking assertion statements, keeping PC assignments") - program.collect { - case x @ Assert(_, _, Some("pc-tracking")) => - x.parent.statements.remove(x) + program.collect { case x @ Assert(_, _, Some("pc-tracking")) => + x.parent.statements.remove(x) } case PCTrackingOption.Assert => Logger.info(s"[!] Inserting PC-tracking requires/ensures") diff --git a/src/main/scala/ir/transforms/ProcedureParameters.scala b/src/main/scala/ir/transforms/ProcedureParameters.scala index 4b4b3fcf26..971c19252e 100644 --- a/src/main/scala/ir/transforms/ProcedureParameters.scala +++ b/src/main/scala/ir/transforms/ProcedureParameters.scala @@ -127,21 +127,18 @@ def liftProcedureCallAbstraction(ctx: util.IRContext): util.IRContext = { transforms.applyRPO(ctx.program) val liveLab = () => - liveVars.collect { - case (b: Block, r) => - b -> { - val live = r.toList.collect { - case (v, TwoElementTop) => - v - } - val dead = r.toList.collect { - case (v, TwoElementBottom) => - v - } - val livel = live.map(_.name).toList.sorted.mkString(", ") - // val deadl = dead.map(_.name).toList.sorted.mkString(", ") - s"Live: $livel" + liveVars.collect { case (b: Block, r) => + b -> { + val live = r.toList.collect { case (v, TwoElementTop) => + v } + val dead = r.toList.collect { case (v, TwoElementBottom) => + v + } + val livel = live.map(_.name).toList.sorted.mkString(", ") + // val deadl = dead.map(_.name).toList.sorted.mkString(", ") + s"Live: $livel" + } }.toMap DebugDumpIRLogger.writeToFile( @@ -203,9 +200,8 @@ def collectVariables(p: Procedure): (Set[Variable], Set[Variable]) = { } })) ++ p.blocks .map(_.jump) - .collect { - case r: Return => - r.outParams.toSet.map(_._1) + .collect { case r: Return => + r.outParams.toSet.map(_._1) } .flatten val rvars = p.blocks.toSet.flatMap(_.statements.flatMap(s => { @@ -377,9 +373,8 @@ def inOutParams( case (p, Some(x)) => (p, ReadWriteAnalysis.onlyGlobal(x)) } - val procEnd = p.procedures.map { - case p => - p -> p.returnBlock.getOrElse(p) + val procEnd = p.procedures.map { case p => + p -> p.returnBlock.getOrElse(p) }.toMap val lives: Map[Procedure, (Set[Variable], Set[Variable])] = p.procedures @@ -388,9 +383,8 @@ def inOutParams( def toLiveSet(p: Option[Map[Variable, TwoElement]]): Set[Variable] = { p.map(p => { - p.collect { - case (v, TwoElementTop) => - v + p.collect { case (v, TwoElementTop) => + v }.toSet }).getOrElse(overapprox) } @@ -451,9 +445,8 @@ def inOutParams( val origIn = oldParams(proc)._1 val origOut = oldParams(proc)._2 - val calls = proc.collect { - case c: DirectCall => - c + val calls = proc.collect { case c: DirectCall => + c } val modifiedFromCall = diff --git a/src/main/scala/ir/transforms/Simp.scala b/src/main/scala/ir/transforms/Simp.scala index a8ef443927..8d294f8644 100644 --- a/src/main/scala/ir/transforms/Simp.scala +++ b/src/main/scala/ir/transforms/Simp.scala @@ -199,9 +199,8 @@ def removeSlices(p: Procedure): Unit = { .flatten .groupBy(_._1) .map((k, v) => (k, v.map(_._2).toSet)) - .collect { - case (k: LocalVar, v) => - (k, v) + .collect { case (k: LocalVar, v) => + (k, v) } enum HighZeroBits: case Bits(n: Int) // (i) and (ii) hold; the n highest bits are redundant @@ -262,9 +261,8 @@ def removeSlices(p: Procedure): Unit = { } lhs -> varHighZeroBits.get(rep) }) - .collect { - case (l, Some(x)) /* remove anything we have no information on */ => - (l, x) + .collect { case (l, Some(x)) /* remove anything we have no information on */ => + (l, x) } class CheckUsesHaveExtend() extends CILVisitor { val result: mutable.HashMap[LocalVar, HighZeroBits] = @@ -288,9 +286,8 @@ def removeSlices(p: Procedure): Unit = { result.toMap } } - val toSmallen = CheckUsesHaveExtend()(varsWithExtend)(p).collect { - case (v, HighZeroBits.Bits(x)) => - v -> x + val toSmallen = CheckUsesHaveExtend()(varsWithExtend)(p).collect { case (v, HighZeroBits.Bits(x)) => + v -> x }.toMap class ReplaceAlwaysSlicedVars(varHighZeroBits: Map[LocalVar, Int]) extends CILVisitor { override def vexpr(v: Expr) = { @@ -409,9 +406,8 @@ def getRedundantAssignments(procedure: Procedure): Set[Assign] = { var removeOld = toRemove val r = toRemove - .collect { - case (v, VS.Assigned(d)) => - d + .collect { case (v, VS.Assigned(d)) => + d } .toSet .flatten @@ -602,14 +598,12 @@ class GuardVisitor(validate: Boolean = false) extends CILVisitor { var defs = Map[Variable, Set[Assign]]() def allDefinitions(p: Procedure): Map[Variable, Set[Assign]] = { - p.collect { - case a: Assign => - a.assignees.map(l => l -> a) + p.collect { case a: Assign => + a.assignees.map(l => l -> a) }.flatten .groupBy(_._1) - .map { - case (v, ass) => - v -> ass.map(_._2).toSet + .map { case (v, ass) => + v -> ass.map(_._2).toSet } } @@ -1444,9 +1438,8 @@ object CopyProp { def replaceVar(lhs: Variable, rhs: Option[Expr] = None) = { st = st - .filterNot { - case (l, r) => - r.variables.contains(lhs) || l == lhs + .filterNot { case (l, r) => + r.variables.contains(lhs) || l == lhs } rhs.foreach(nrhs => st = st.updated(lhs, nrhs)) } @@ -1865,13 +1858,11 @@ def findDefinitelyExits(p: Program) = { val solve = interprocSummaryFixpointSolver(ldom, dom) val res = solve.solveProgInterProc(p, true) ProcReturnInfo( - res.collect { - case (p, PathExit.Return) => - p + res.collect { case (p, PathExit.Return) => + p }.toSet, - res.collect { - case (p, PathExit.NoReturn) => - p + res.collect { case (p, PathExit.NoReturn) => + p }.toSet ) } @@ -1991,9 +1982,8 @@ def fixupGuards(p: Procedure): Unit = { def removeDuplicateGuard(b: Iterable[Block]): Unit = { b.foreach { case block: Block if IRWalk.firstInBlock(block).isInstanceOf[Assume] => { - val assumes = block.statements.collect { - case a: Assume => - a + val assumes = block.statements.collect { case a: Assume => + a }.toList val chosen = assumes.head.body diff --git a/src/main/scala/translating/GTIRBToIR.scala b/src/main/scala/translating/GTIRBToIR.scala index a16ad0c0c9..c690f3a2db 100644 --- a/src/main/scala/translating/GTIRBToIR.scala +++ b/src/main/scala/translating/GTIRBToIR.scala @@ -382,11 +382,10 @@ class GTIRBToIR( procedure.entryBlock = block } - block.address.foreach { - case addr => - val pcCorrectExpr = BinaryExpr(EQ, Register("_PC", 64), BitVecLiteral(addr, 64)) - val assertPC = Assert(pcCorrectExpr, Some("pc-tracking"), Some("pc-tracking")) - block.statements.append(assertPC) + block.address.foreach { case addr => + val pcCorrectExpr = BinaryExpr(EQ, Register("_PC", 64), BitVecLiteral(addr, 64)) + val assertPC = Assert(pcCorrectExpr, Some("pc-tracking"), Some("pc-tracking")) + block.statements.append(assertPC) } block } diff --git a/src/main/scala/translating/IRToBoogie.scala b/src/main/scala/translating/IRToBoogie.scala index 127a926787..0fd734808f 100644 --- a/src/main/scala/translating/IRToBoogie.scala +++ b/src/main/scala/translating/IRToBoogie.scala @@ -640,16 +640,14 @@ class IRToBoogie( private def translateAtomicStart(a: AtomicSection): List[BCmd] = { val sharedLoads = a.getBlocks.flatMap { b => - b.statements.collect { - case load @ MemoryLoad(_, _: SharedMemory, _, _, _, _) => - load + b.statements.collect { case load @ MemoryLoad(_, _: SharedMemory, _, _, _, _) => + load } } val sharedStores = a.getBlocks.flatMap { b => - b.statements.collect { - case store @ MemoryStore(_: SharedMemory, _, _, _, _, _) => - store + b.statements.collect { case store @ MemoryStore(_: SharedMemory, _, _, _, _, _) => + store } } @@ -671,9 +669,8 @@ class IRToBoogie( private def translateAtomicEnd(a: AtomicSection): List[BCmd] = { val sharedStores = a.getBlocks.flatMap { b => - b.statements.collect { - case store @ MemoryStore(_: SharedMemory, _, _, _, _, _) => - store + b.statements.collect { case store @ MemoryStore(_: SharedMemory, _, _, _, _, _) => + store } } diff --git a/src/main/scala/translating/IRToBoogieNoVC.scala b/src/main/scala/translating/IRToBoogieNoVC.scala index aa33bb9bcc..6212f8910a 100644 --- a/src/main/scala/translating/IRToBoogieNoVC.scala +++ b/src/main/scala/translating/IRToBoogieNoVC.scala @@ -188,13 +188,11 @@ class FindVars extends CILVisitor { SkipChildren() } - def globals = (vars ++ mems).collect { - case g: Global => - g + def globals = (vars ++ mems).collect { case g: Global => + g } - def locals = vars.collect { - case v: LocalVar => - v + def locals = vars.collect { case v: LocalVar => + v } } diff --git a/src/main/scala/translating/SpecificationLoader.scala b/src/main/scala/translating/SpecificationLoader.scala index 28c1253789..63fbb8f96b 100644 --- a/src/main/scala/translating/SpecificationLoader.scala +++ b/src/main/scala/translating/SpecificationLoader.scala @@ -389,9 +389,8 @@ case class SpecificationLoader(symbols: Set[SpecGlobal], program: Program) { r } - val requires = ctx.requires.asScala.collect { - case r: ParsedRequiresContext => - visitExpr(r.expr, nameToGlobals, params) + val requires = ctx.requires.asScala.collect { case r: ParsedRequiresContext => + visitExpr(r.expr, nameToGlobals, params) }.toList val modifies = Option(ctx.modifies) match { @@ -399,19 +398,16 @@ case class SpecificationLoader(symbols: Set[SpecGlobal], program: Program) { case None => List() } - val ensures = ctx.ensures.asScala.collect { - case e: ParsedEnsuresContext => - visitExpr(e.expr, nameToGlobals, params) + val ensures = ctx.ensures.asScala.collect { case e: ParsedEnsuresContext => + visitExpr(e.expr, nameToGlobals, params) }.toList - val requiresDirect = ctx.requires.asScala.collect { - case r: DirectRequiresContext => - r.QUOTESTRING.getText.stripPrefix("\"").stripSuffix("\"") + val requiresDirect = ctx.requires.asScala.collect { case r: DirectRequiresContext => + r.QUOTESTRING.getText.stripPrefix("\"").stripSuffix("\"") }.toList - val ensuresDirect = ctx.ensures.asScala.collect { - case r: DirectEnsuresContext => - r.QUOTESTRING.getText.stripPrefix("\"").stripSuffix("\"") + val ensuresDirect = ctx.ensures.asScala.collect { case r: DirectEnsuresContext => + r.QUOTESTRING.getText.stripPrefix("\"").stripSuffix("\"") }.toList val rely = Option(ctx.relies) match { diff --git a/src/main/scala/util/RunUtils.scala b/src/main/scala/util/RunUtils.scala index e6221d3d22..fd8210d311 100644 --- a/src/main/scala/util/RunUtils.scala +++ b/src/main/scala/util/RunUtils.scala @@ -654,9 +654,8 @@ object StaticAnalysis { toVisit.pushAll( IntraProcBlockIRCursor .succ(next) - .diff(visited.collect[Block] { - case b: Block => - b + .diff(visited.collect[Block] { case b: Block => + b }) ) @@ -761,9 +760,8 @@ object RunUtils { File(s"${s}_blockgraph-after-dsa.dot"), dotBlockGraph( program, - (program.collect { - case b: Block => - b -> pp_block(b) + (program.collect { case b: Block => + b -> pp_block(b) }).toMap ) ) diff --git a/src/main/scala/util/Twine.scala b/src/main/scala/util/Twine.scala index f1ea0f1b8f..03d5d30f78 100644 --- a/src/main/scala/util/Twine.scala +++ b/src/main/scala/util/Twine.scala @@ -64,18 +64,17 @@ sealed trait Twine { // furthermore, newline and indent should only be inserted upon // reaching a non-empty literal string. multiple Lines nodes placed within // each other should not insert additional newlines or indentation. - lines.foreach { - case l => - val first = firstInLine.once() - if (!first) - doNewline = Once() - helper(l, ind) - - // if no newline was introduced by this list element, we should manually - // add one if needed. this allows blank lines to be produced by placing - // Twine.empty within Lines. - if (!first && doNewline.once()) - sb ++= newline + lines.foreach { case l => + val first = firstInLine.once() + if (!first) + doNewline = Once() + helper(l, ind) + + // if no newline was introduced by this list element, we should manually + // add one if needed. this allows blank lines to be produced by placing + // Twine.empty within Lines. + if (!first && doNewline.once()) + sb ++= newline } case Concat(tws) => tws.foreach(helper(_, ind)) } diff --git a/src/main/scala/util/functional/List.scala b/src/main/scala/util/functional/List.scala index a25b6a4089..f025984566 100644 --- a/src/main/scala/util/functional/List.scala +++ b/src/main/scala/util/functional/List.scala @@ -51,9 +51,8 @@ def sequence[DD[V] <: IterableOps[V, DD, DD[V]], CC[U] <: IterableOps[U, CC, CC[ def cc(x: T): CC[T] = xs.iterableFactory.newBuilder.addOne(x).result val base: DD[CC[T]] = dd(cc0()) - xs.foldRight(base) { - case (ys, rest) => - ys.flatMap((y: T) => rest.map((r: CC[T]) => cc(y) ++ r)) + xs.foldRight(base) { case (ys, rest) => + ys.flatMap((y: T) => rest.map((r: CC[T]) => cc(y) ++ r)) } } diff --git a/src/test/scala/InterpretTestConstProp.scala b/src/test/scala/InterpretTestConstProp.scala index b05fb628d0..0080f75dc1 100644 --- a/src/test/scala/InterpretTestConstProp.scala +++ b/src/test/scala/InterpretTestConstProp.scala @@ -59,9 +59,8 @@ class InterpretTestConstProp ir.transforms.clearParams(ictx.program) val analyses = RunUtils.staticAnalysis(StaticAnalysisConfig(None, None, None), ictx) - val analysisres = analyses.intraProcConstProp.collect { - case (block: Block, v) => - block -> v + val analysisres = analyses.intraProcConstProp.collect { case (block: Block, v) => + block -> v } val result = runTestInterpreter(ictx, analysisres) diff --git a/src/test/scala/LiveVarsAnalysisTests.scala b/src/test/scala/LiveVarsAnalysisTests.scala index ac1cb3e759..d5e120af75 100644 --- a/src/test/scala/LiveVarsAnalysisTests.scala +++ b/src/test/scala/LiveVarsAnalysisTests.scala @@ -269,9 +269,8 @@ class LiveVarsAnalysisTests extends AnyFunSuite, CaptureOutput, BASILTest { info("bean1") info( analysisResults.keySet - .collect { - case b: Block => - b.label + .collect { case b: Block => + b.label } .mkString("; ") ) diff --git a/src/test/scala/ir/CILVisitorTest.scala b/src/test/scala/ir/CILVisitorTest.scala index 1051b57b86..cfc2c35acd 100644 --- a/src/test/scala/ir/CILVisitorTest.scala +++ b/src/test/scala/ir/CILVisitorTest.scala @@ -19,9 +19,8 @@ class FindVars extends CILVisitor { SkipChildren() } - def globals = vars.collect { - case g: Global => - g + def globals = vars.collect { case g: Global => + g } } diff --git a/src/test/scala/ir/IRTest.scala b/src/test/scala/ir/IRTest.scala index 80b4082d32..cf7941a7e0 100644 --- a/src/test/scala/ir/IRTest.scala +++ b/src/test/scala/ir/IRTest.scala @@ -84,9 +84,8 @@ class IRTest extends AnyFunSuite with CaptureOutput { val blocks = p.labelToBlock - val directcalls = p.collect { - case c: DirectCall => - c + val directcalls = p.collect { case c: DirectCall => + c } assert(p.toSet.contains(blocks("l_main_1").jump)) @@ -314,9 +313,8 @@ class IRTest extends AnyFunSuite with CaptureOutput { ) ) - val blockOrder = p.mainProcedure.preOrderIterator.collect { - case b: Block => - b.label + val blockOrder = p.mainProcedure.preOrderIterator.collect { case b: Block => + b.label }.toList // assert(blockOrder == List("lmain", "lmain1", "lmainret", "lmain3")) diff --git a/src/test/scala/ir/IRToDSLTest.scala b/src/test/scala/ir/IRToDSLTest.scala index dfb6e89f90..d9da3142fa 100644 --- a/src/test/scala/ir/IRToDSLTest.scala +++ b/src/test/scala/ir/IRToDSLTest.scala @@ -130,9 +130,8 @@ class IRToDSLTest extends AnyFunSuite with CaptureOutput { // for each procedure, check that the conversion is correct, // i.e., is structurally equal to the original dsl procedure - (dslprog.allProcedures zip irprog.procedures).foreach { - case (dslproc, proc) => - assertDeepEquality(dslproc) { IRToDSL.convertProcedure(proc) } + (dslprog.allProcedures zip irprog.procedures).foreach { case (dslproc, proc) => + assertDeepEquality(dslproc) { IRToDSL.convertProcedure(proc) } } } diff --git a/src/test/scala/test_util/TestValueDomainWithInterpreter.scala b/src/test/scala/test_util/TestValueDomainWithInterpreter.scala index 7095565d88..fd8cd60857 100644 --- a/src/test/scala/test_util/TestValueDomainWithInterpreter.scala +++ b/src/test/scala/test_util/TestValueDomainWithInterpreter.scala @@ -130,12 +130,10 @@ trait TestValueDomainWithInterpreter[T] { val interpretResult = State.execute(initState, InterpFuns.callProcedure(interp)(startProc, startParams)) val breakres: List[(BreakPoint, _, List[(String, Expr, Option[Expr])])] = interpretResult(1) - val checkResults = breakres.flatMap { - case (bp, _, evaledExprs) => - evaledExprs.grouped(2).map(_.toList).map { - case List((_, variable, varValue), (name, test, evaled)) => - CheckResult(name, bp, test, variable, varValue, evaled) - } + val checkResults = breakres.flatMap { case (bp, _, evaledExprs) => + evaledExprs.grouped(2).map(_.toList).map { case List((_, variable, varValue), (name, test, evaled)) => + CheckResult(name, bp, test, variable, varValue, evaled) + } }.toList InterpreterTestResult(interpretResult(0).nextCmd, checkResults) From c7e98f6c0fb5976d6fcb940bfa0d8578e92e82ca Mon Sep 17 00:00:00 2001 From: rina Date: Mon, 23 Jun 2025 15:43:59 +1000 Subject: [PATCH 26/51] scalafmt --- src/main/scala/Main.scala | 3 +-- src/main/scala/gtirb/GTIRBReadELF.scala | 27 +++++++++++------------- src/main/scala/gtirb/GTIRBResolver.scala | 23 ++++++++++---------- 3 files changed, 24 insertions(+), 29 deletions(-) diff --git a/src/main/scala/Main.scala b/src/main/scala/Main.scala index 669d2d9429..da54a4de28 100644 --- a/src/main/scala/Main.scala +++ b/src/main/scala/Main.scala @@ -339,8 +339,7 @@ object Main { val fIn = java.io.FileInputStream(loadingInputs.inputFile) val ir = com.grammatech.gtirb.proto.IR.IR.parseFrom(fIn) - println(ir.modules.map(x => - gtirb.GTIRBReadELF.getExternalFunctions(gtirb.GTIRBResolver(x)))) + println(ir.modules.map(x => gtirb.GTIRBReadELF.getExternalFunctions(gtirb.GTIRBResolver(x)))) return } diff --git a/src/main/scala/gtirb/GTIRBReadELF.scala b/src/main/scala/gtirb/GTIRBReadELF.scala index e9a29fade6..b8b0731bda 100644 --- a/src/main/scala/gtirb/GTIRBReadELF.scala +++ b/src/main/scala/gtirb/GTIRBReadELF.scala @@ -63,29 +63,26 @@ object GTIRBReadELF { import scala.math.Ordering.Implicits.seqOrdering val allSymbols = gtirb.symbolKindsByUuid - .map { - case (k, pos) => - val sym = k.get - println(k) - val addr = k.getReferentBlock.map(_.address) - val value = k.getScalarValue.fold("")("val=" + _.toString) - (k.symTabIdx, addr, pos) -> s"${sym.name} $value" + .map { case (k, pos) => + val sym = k.get + println(k) + val addr = k.getReferentBlock.map(_.address) + val value = k.getScalarValue.fold("")("val=" + _.toString) + (k.symTabIdx, addr, pos) -> s"${sym.name} $value" } .to(SortedMap) println(allSymbols.mkString("\n")) println() println(".rela.dyn") - relaDyns.foreach { - case x => - val symid = gtirb.symbolTables(".dynsym")(x.r_sym.toInt) - println(s"$x " + symid.get.name) + relaDyns.foreach { case x => + val symid = gtirb.symbolTables(".dynsym")(x.r_sym.toInt) + println(s"$x " + symid.get.name) } println(".rela.plt") - relaPlts.foreach { - case x => - val symid = gtirb.symbolTables(".dynsym")(x.r_sym.toInt) - println(s"$x " + symid.get.name) + relaPlts.foreach { case x => + val symid = gtirb.symbolTables(".dynsym")(x.r_sym.toInt) + println(s"$x " + symid.get.name) } val specGlobals = gtirb.symbolKindsByUuid.toList.collect { diff --git a/src/main/scala/gtirb/GTIRBResolver.scala b/src/main/scala/gtirb/GTIRBResolver.scala index bd101befe1..902a6f7273 100644 --- a/src/main/scala/gtirb/GTIRBResolver.scala +++ b/src/main/scala/gtirb/GTIRBResolver.scala @@ -72,7 +72,7 @@ case class GTIRBResolver(mod: Module) { def getEntries = funcEntries(x) def getName = funcNames(x) - private def mapFirst[T,T2,U](f: T => T2)(x: (T, U)) = (f(x._1), x._2) + private def mapFirst[T, T2, U](f: T => T2)(x: (T, U)) = (f(x._1), x._2) val proxyBlockUuids = mod.proxies.map(x => Uuid.Block(x.uuid)).toSet val symbolsByUuid = mod.symbols.map(x => Uuid.Symbol(x.uuid) -> x).toMap @@ -81,32 +81,31 @@ case class GTIRBResolver(mod: Module) { sec <- mod.sections.toList interval <- sec.byteIntervals (uuid, innerb, outerb) <- interval.blocks.collect { - case b @ Block(_, Block.Value.Data(dat), _) => (dat.uuid, (dat : DataBlock | CodeBlock), b) - case b @ Block(_, Block.Value.Code(cod), _) => (cod.uuid, (cod : DataBlock | CodeBlock), b) + case b @ Block(_, Block.Value.Data(dat), _) => (dat.uuid, (dat: DataBlock | CodeBlock), b) + case b @ Block(_, Block.Value.Code(cod), _) => (cod.uuid, (cod: DataBlock | CodeBlock), b) } id: Uuid.Block = Uuid.Block(uuid) } yield id -> BlockData(innerb, outerb, interval, sec)).toMap val sectionsByName = mod.sections.map(x => x.name -> x).toMap - val symbolTabIdxByUuid = AuxDecoder.decodeAux(AuxDecoder.AuxKind.ElfSymbolTabIdxInfo)(mod).map(mapFirst(Uuid.Symbol(_))) + val symbolTabIdxByUuid = + AuxDecoder.decodeAux(AuxDecoder.AuxKind.ElfSymbolTabIdxInfo)(mod).map(mapFirst(Uuid.Symbol(_))) val symbolTables = symbolTabIdxByUuid - .flatMap { - case (sym, idxs) => - idxs.map(_ -> sym) + .flatMap { case (sym, idxs) => + idxs.map(_ -> sym) } .groupMapReduce(kv => kv.head.head)(kv => SortedMap(kv.head.last -> kv.last))(_ ++ _) val symbolKindsByUuid = decodeAux(AuxKind.ElfSymbolInfo)(mod) .map(mapFirst(Uuid.Symbol(_))) - val funcNames = decodeAux(AuxKind.FunctionNames)(mod).map { - case (fun, sym) => Uuid.Function(fun) -> Uuid.Symbol(sym) + val funcNames = decodeAux(AuxKind.FunctionNames)(mod).map { case (fun, sym) => + Uuid.Function(fun) -> Uuid.Symbol(sym) } val funcNamesInverse = funcNames.map(_.swap) - val funcEntries = decodeAux(AuxKind.FunctionEntries)(mod).map { - case (a, b) => Uuid.Function(a) -> b.map(Uuid.Block(_)) + val funcEntries = decodeAux(AuxKind.FunctionEntries)(mod).map { case (a, b) => + Uuid.Function(a) -> b.map(Uuid.Block(_)) } - } From 07fc539694b123c384cf17fe7497d9e34d998694 Mon Sep 17 00:00:00 2001 From: rina Date: Mon, 23 Jun 2025 15:50:49 +1000 Subject: [PATCH 27/51] allow string in Uuid inputs --- src/main/scala/gtirb/GTIRBResolver.scala | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/main/scala/gtirb/GTIRBResolver.scala b/src/main/scala/gtirb/GTIRBResolver.scala index 902a6f7273..78d4657c91 100644 --- a/src/main/scala/gtirb/GTIRBResolver.scala +++ b/src/main/scala/gtirb/GTIRBResolver.scala @@ -24,9 +24,6 @@ import scala.collection.immutable.SortedMap case class GTIRBResolver(mod: Module) { - private def b64(bs: ByteString) = - java.util.Base64.getEncoder().encodeToString(bs.toByteArray) - sealed trait Uuid(val kind: String, val uuid: String) { override def toString = s"$kind:$uuid" override def equals(o: Any) = o match { @@ -35,10 +32,17 @@ case class GTIRBResolver(mod: Module) { } override def hashCode = (kind, uuid).hashCode } + object Uuid { - class Block(xs: ByteString) extends Uuid("blok", b64(xs)) - class Function(xs: ByteString) extends Uuid("func", b64(xs)) - class Symbol(xs: ByteString) extends Uuid("symb", b64(xs)) + + private def b64(bs: String | ByteString) = bs match { + case s: String => s + case bs: ByteString => java.util.Base64.getEncoder().encodeToString(bs.toByteArray) + } + + class Block(xs: String | ByteString) extends Uuid("blok", b64(xs)) + class Function(xs: String | ByteString) extends Uuid("func", b64(xs)) + class Symbol(xs: String | ByteString) extends Uuid("symb", b64(xs)) } case class BlockData(inner: DataBlock | CodeBlock, block: Block, interval: ByteInterval, section: Section) { From c23d19f549171c7f883161463b6c439b5c593492 Mon Sep 17 00:00:00 2001 From: rina Date: Mon, 23 Jun 2025 16:14:11 +1000 Subject: [PATCH 28/51] docs --- src/main/scala/gtirb/GTIRBReadELF.scala | 10 +-- src/main/scala/gtirb/GTIRBResolver.scala | 78 +++++++++++++++++++++--- 2 files changed, 73 insertions(+), 15 deletions(-) diff --git a/src/main/scala/gtirb/GTIRBReadELF.scala b/src/main/scala/gtirb/GTIRBReadELF.scala index b8b0731bda..b0d00010fa 100644 --- a/src/main/scala/gtirb/GTIRBReadELF.scala +++ b/src/main/scala/gtirb/GTIRBReadELF.scala @@ -62,11 +62,11 @@ object GTIRBReadELF { val relaPlts = parseRelaTab(gtirb.sectionsByName(".rela.plt").byteIntervals.head.contents) import scala.math.Ordering.Implicits.seqOrdering - val allSymbols = gtirb.symbolKindsByUuid + val allSymbols = gtirb.symbolEntriesByUuid .map { case (k, pos) => val sym = k.get println(k) - val addr = k.getReferentBlock.map(_.address) + val addr = k.getReferentUuid.flatMap(_.getOption).map(_.address) val value = k.getScalarValue.fold("")("val=" + _.toString) (k.symTabIdx, addr, pos) -> s"${sym.name} $value" } @@ -85,16 +85,16 @@ object GTIRBReadELF { println(s"$x " + symid.get.name) } - val specGlobals = gtirb.symbolKindsByUuid.toList.collect { + val specGlobals = gtirb.symbolEntriesByUuid.toList.collect { case (symid, (size, "OBJECT", "GLOBAL", "DEFAULT", idx)) => - val blk = symid.getReferentBlock.get + val blk = symid.getReferentUuid.get.get val sec = blk.section assert(mod.sections(idx.toInt - 1) == sec) (symid.get.name, blk.size * 8, None, blk.address) } println(specGlobals) - val funentry = gtirb.symbolKindsByUuid.toList.collect { + val funentry = gtirb.symbolEntriesByUuid.toList.collect { case (symid, (size, "FUNC", "GLOBAL", "DEFAULT", idx)) if idx != 0 => val nameSymbol = symid.get diff --git a/src/main/scala/gtirb/GTIRBResolver.scala b/src/main/scala/gtirb/GTIRBResolver.scala index 78d4657c91..3ddd632ac0 100644 --- a/src/main/scala/gtirb/GTIRBResolver.scala +++ b/src/main/scala/gtirb/GTIRBResolver.scala @@ -22,6 +22,17 @@ import com.grammatech.gtirb.proto.Symbol.Symbol.OptionalPayload import scala.collection.mutable import scala.collection.immutable.SortedMap +/** + * A class for querying the GTIRB IR, abstracting away common operations of + * searching for symbols, functions, blocks, and their relations. The inner + * type [[Uuid]] wraps a Base64 UUID. A number of UUID subtypes are defined to + * distinguish from different kinds of UUID within GTIRB, for example [[Uuid.Block]] + * and [Uuid.Function]]. + * + * Each Uuid specialisation defines a number of extension methods for common + * query operations. For example, accessing the object itself from its Uuid can be + * done via the `.get` methods. + */ case class GTIRBResolver(mod: Module) { sealed trait Uuid(val kind: String, val uuid: String) { @@ -45,35 +56,77 @@ case class GTIRBResolver(mod: Module) { class Symbol(xs: String | ByteString) extends Uuid("symb", b64(xs)) } + /** + * Represents a GTIRB code/data block and its parents. In GTIRB, block + * occurs within a byte interval which occur within a section. Desirable information, + * such as offset and address, is spread across these levels, so it is useful to bundle + * them all together. + * + * See the [GTIRB structure diagram](https://github.com/GrammaTech/gtirb#structure). + */ case class BlockData(inner: DataBlock | CodeBlock, block: Block, interval: ByteInterval, section: Section) { - def uuid = inner match { + val uuid = inner match { case x: DataBlock => x.uuid case x: CodeBlock => x.uuid } - def size = inner match { + val size = inner match { case x: DataBlock => x.size case x: CodeBlock => x.size } - def address = block.offset + interval.address + val address = block.offset + interval.address } extension (x: Uuid.Block) def get = blocksByUuid(x) + def getOption = blocksByUuid.get(x) def isProxyBlock = proxyBlockUuids.contains(x) extension (x: Uuid.Symbol) def get = symbolsByUuid(x) + + /** + * Returns the list of symbol table indices where this symbol can be found. + * Each index is a tuple of table name and index within that table. + */ def symTabIdx = symbolTabIdxByUuid(x) - def symKind = symbolKindsByUuid(x) - def getReferentBlock = for { + + /** + * Returns the `.symtab` entry for the given symbol. + * This is a 5-tuple made up of size, type, binding, visibility, and index. + */ + def symEntry = symbolEntriesByUuid(x) + + /** + * Gets the [[Uuid.Block]] referred to by this symbol. + * This is mutually-exclusive with [[getScalarValue]], + * only one of these can be non-None. + */ + def getReferentUuid = for { uuid <- x.get.optionalPayload.referentUuid - blok <- blocksByUuid.get(Uuid.Block(uuid)) - } yield blok + } yield Uuid.Block(uuid) + + /** + * Gets the scalar value associated with this symbol. + * This is mutually-exclusive with [[getReferentUuid]], + * only one of these can be non-None. + */ def getScalarValue = x.get.optionalPayload._value + + /** + * Gets the [[Uuid.Function]] associated with this symbol, + * or None if this is not a function name symbol. + */ def getFunction = funcNamesInverse.get(x) extension (x: Uuid.Function) + /** + * Gets the set of entry block UUIDs for the given function. + */ def getEntries = funcEntries(x) + + /** + * Gets the [[Uuid.Symbol]] for the given function. + */ def getName = funcNames(x) private def mapFirst[T, T2, U](f: T => T2)(x: (T, U)) = (f(x._1), x._2) @@ -88,20 +141,25 @@ case class GTIRBResolver(mod: Module) { case b @ Block(_, Block.Value.Data(dat), _) => (dat.uuid, (dat: DataBlock | CodeBlock), b) case b @ Block(_, Block.Value.Code(cod), _) => (cod.uuid, (cod: DataBlock | CodeBlock), b) } - id: Uuid.Block = Uuid.Block(uuid) + id = Uuid.Block(uuid) } yield id -> BlockData(innerb, outerb, interval, sec)).toMap val sectionsByName = mod.sections.map(x => x.name -> x).toMap - val symbolTabIdxByUuid = + val symbolTabIdxByUuid: Map[Uuid.Symbol, List[(String, BigInt)]] = AuxDecoder.decodeAux(AuxDecoder.AuxKind.ElfSymbolTabIdxInfo)(mod).map(mapFirst(Uuid.Symbol(_))) + + /** + * A nested map indexed by section name, then symbol index, and returning a symbol uuid. + * For example, `symbolTables(".symtab")(63)`. + */ val symbolTables = symbolTabIdxByUuid .flatMap { case (sym, idxs) => idxs.map(_ -> sym) } .groupMapReduce(kv => kv.head.head)(kv => SortedMap(kv.head.last -> kv.last))(_ ++ _) - val symbolKindsByUuid = decodeAux(AuxKind.ElfSymbolInfo)(mod) + val symbolEntriesByUuid = decodeAux(AuxKind.ElfSymbolInfo)(mod) .map(mapFirst(Uuid.Symbol(_))) val funcNames = decodeAux(AuxKind.FunctionNames)(mod).map { case (fun, sym) => From b058f746ea50693fd377d2c06a9f35a354e44f5d Mon Sep 17 00:00:00 2001 From: rina Date: Mon, 23 Jun 2025 17:38:45 +1000 Subject: [PATCH 29/51] construct ReadELFData struct --- src/main/scala/Main.scala | 15 ++- src/main/scala/gtirb/GTIRBReadELF.scala | 131 ++++++++++++++++------- src/main/scala/gtirb/GTIRBResolver.scala | 16 ++- src/main/scala/util/RunUtils.scala | 17 ++- 4 files changed, 132 insertions(+), 47 deletions(-) diff --git a/src/main/scala/Main.scala b/src/main/scala/Main.scala index da54a4de28..7363b8a7d2 100644 --- a/src/main/scala/Main.scala +++ b/src/main/scala/Main.scala @@ -329,17 +329,24 @@ object Main { ) } + import gtirb.* + import ir.dsl.given if (conf.dumpRelf.value) { val relfFile = loadingInputs.relfFile.getOrElse { throw IllegalArgumentException("--dump-relf requires --relf") } val relfData = IRLoading.loadReadELF(relfFile, loadingInputs) println(relfData.toScala) - println() - val fIn = java.io.FileInputStream(loadingInputs.inputFile) - val ir = com.grammatech.gtirb.proto.IR.IR.parseFrom(fIn) - println(ir.modules.map(x => gtirb.GTIRBReadELF.getExternalFunctions(gtirb.GTIRBResolver(x)))) + import com.grammatech.gtirb.proto.IR.IR + + import java.io.* + val ir = IR.parseFrom(FileInputStream(loadingInputs.inputFile)) + + + val gtirb = GTIRBResolver(ir.modules.head) + val gtirbRelfLoader = GTIRBReadELF(gtirb) + println(gtirbRelfLoader.getReadELFData("main").toScala) return } diff --git a/src/main/scala/gtirb/GTIRBReadELF.scala b/src/main/scala/gtirb/GTIRBReadELF.scala index b0d00010fa..a052a41a76 100644 --- a/src/main/scala/gtirb/GTIRBReadELF.scala +++ b/src/main/scala/gtirb/GTIRBReadELF.scala @@ -1,8 +1,13 @@ package gtirb +import util.Logger import gtirb.AuxDecoder import gtirb.AuxDecoder.{AuxKind, decodeAux} +import translating.{ELFSymType, ELFBind, ELFVis, ELFNDX, ELFSymbol, ReadELFData} +import specification.{ExternalFunction, FuncEntry} +import boogie.{SpecGlobal} + import java.io.ByteArrayInputStream import com.google.protobuf.ByteString @@ -19,7 +24,7 @@ import com.grammatech.gtirb.proto.Symbol.Symbol.OptionalPayload import scala.collection.mutable import scala.collection.immutable.SortedMap -object GTIRBReadELF { +class GTIRBReadELF(protected val gtirb: GTIRBResolver) { /** * An `Elf64_Rela` structure, as described by the [System V ABI](https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.reloc.html). @@ -33,14 +38,14 @@ object GTIRBReadELF { // https://refspecs.linuxbase.org/elf/gabi4+/ch4.reloc.html // https://github.com/ARM-software/abi-aa/blob/main/aaelf64/aaelf64.rst#relocation-types - def readRela(bs: AuxDecoder.Input) = + protected def readRela(bs: AuxDecoder.Input) = import AuxDecoder.* val (r_offset, r_info, r_addend) = readTuple(readUint(64), readUint(64), readUint(64))(bs) val r_sym = r_info >> 32 val r_type = r_info & 0xffffffffL Elf64Rela(r_offset, r_info, r_addend, r_sym.toLong, r_type.toLong) - def parseRelaTab(bstr: ByteString) = + protected def parseRelaTab(bstr: ByteString) = val bs = ByteArrayInputStream(bstr.toByteArray) List.unfold(bs) { case bs if bs.available() > 0 => Some(readRela(bs), bs) @@ -54,47 +59,86 @@ object GTIRBReadELF { // // https://www.man7.org/linux/man-pages/man5/elf.5.html - def getExternalFunctions(gtirb: GTIRBResolver) = { + // Full ELF32 specification: https://refspecs.linuxfoundation.org/elf/elf.pdf - val mod = gtirb.mod + // Full ELF64 specification: https://irix7.com/techpubs/007-4658-001.pdf - val relaDyns = parseRelaTab(gtirb.sectionsByName(".rela.dyn").byteIntervals.head.contents) - val relaPlts = parseRelaTab(gtirb.sectionsByName(".rela.plt").byteIntervals.head.contents) + /** + * https://refspecs.linuxfoundation.org/elf/elf.pdf + * Figure 1-7. Special Section Indexes + */ + protected def parseElfNdx(n: BigInt) = n.toInt match { + case 0 => ELFNDX.UND + case 0xfff1 => ELFNDX.ABS + case i => + if (i >= 0xff00) + Logger.warn("unhandled special elf section index: " + i) + ELFNDX.Section(i) + } - import scala.math.Ordering.Implicits.seqOrdering - val allSymbols = gtirb.symbolEntriesByUuid - .map { case (k, pos) => + /** + * https://github.com/ARM-software/abi-aa/blob/main/aaelf64/aaelf64.rst#dynamic-relocations + */ + def parseRela(rela: Elf64Rela) = + val sym = gtirb.symbolTables(".dynsym")(rela.r_sym.toInt).get + + rela.r_type match { + case 1025 | 1026 => Right(ExternalFunction(sym.name, rela.r_offset)) + case 1027 => Left( (rela.r_offset, rela.r_addend)) + } + + def getAllSymbols() = { + gtirb.symbolEntriesByUuid + .flatMap { case (k, pos) => val sym = k.get - println(k) - val addr = k.getReferentUuid.flatMap(_.getOption).map(_.address) - val value = k.getScalarValue.fold("")("val=" + _.toString) - (k.symTabIdx, addr, pos) -> s"${sym.name} $value" + val block = k.getReferentUuid.flatMap(_.getOption) + + val idx = k.symTabIdx.collectFirst { + case (".symtab", i) => i.toInt + } + + val addr = block.map(x => BigInt(x.address)) + val value = k.getScalarValue.map(BigInt(_)) + val combinedValue = addr.orElse(value).getOrElse(BigInt(0)) + + val (size, ty, bind, vis, shndx) = k.symEntry + + ty match { + case "NONE" => None + case ty => Some( + ELFSymbol(idx.getOrElse(-1), combinedValue, size.toInt, ELFSymType.valueOf(ty), + ELFBind.valueOf(bind), + ELFVis.valueOf(vis), + parseElfNdx(shndx), + sym.name + ) + ) } - .to(SortedMap) - println(allSymbols.mkString("\n")) - - println() - println(".rela.dyn") - relaDyns.foreach { case x => - val symid = gtirb.symbolTables(".dynsym")(x.r_sym.toInt) - println(s"$x " + symid.get.name) - } - println(".rela.plt") - relaPlts.foreach { case x => - val symid = gtirb.symbolTables(".dynsym")(x.r_sym.toInt) - println(s"$x " + symid.get.name) - } + }.toList.sortBy(x => x.num) + } - val specGlobals = gtirb.symbolEntriesByUuid.toList.collect { + def getRelocations() = { + val relaDyns = parseRelaTab(gtirb.sectionsByName(".rela.dyn").byteIntervals.head.contents) + val relaPlts = parseRelaTab(gtirb.sectionsByName(".rela.plt").byteIntervals.head.contents) + + val (offs, exts) = (relaDyns.view ++ relaPlts.view).partitionMap(parseRela) + + (offs.toMap, exts.toSet) + } + + def getGlobals() = { + gtirb.symbolEntriesByUuid.view.collect { case (symid, (size, "OBJECT", "GLOBAL", "DEFAULT", idx)) => val blk = symid.getReferentUuid.get.get val sec = blk.section - assert(mod.sections(idx.toInt - 1) == sec) - (symid.get.name, blk.size * 8, None, blk.address) - } - println(specGlobals) + assert(gtirb.mod.sections(idx.toInt - 1) == sec) + SpecGlobal(symid.get.name, (blk.size * 8).toInt, None, blk.address) + }.toSet + } - val funentry = gtirb.symbolEntriesByUuid.toList.collect { + def getFunctionEntries() = { + + gtirb.symbolEntriesByUuid.view.collect { case (symid, (size, "FUNC", "GLOBAL", "DEFAULT", idx)) if idx != 0 => val nameSymbol = symid.get @@ -105,10 +149,23 @@ object GTIRBReadELF { val entry = entries.head val addr = entry.get.address - (nameSymbol.name, size * 8, addr) - } + FuncEntry(nameSymbol.name, (size * 8).toInt, addr) + }.toSet + } - println(funentry) + def getEntryPoint(mainProcedureName: String) = { + gtirb.symbolsByName(mainProcedureName).getReferentUuid.get.get.address + } + def getReadELFData(mainProcedureName: String) = { + + val syms = getAllSymbols() + val (offs, exts) = getRelocations() + val globs = getGlobals() + val funs = getFunctionEntries() + val main = getEntryPoint(mainProcedureName) + + ReadELFData(syms, exts, globs, funs, offs, main) } + } diff --git a/src/main/scala/gtirb/GTIRBResolver.scala b/src/main/scala/gtirb/GTIRBResolver.scala index 3ddd632ac0..8ef4a972ed 100644 --- a/src/main/scala/gtirb/GTIRBResolver.scala +++ b/src/main/scala/gtirb/GTIRBResolver.scala @@ -33,7 +33,7 @@ import scala.collection.immutable.SortedMap * query operations. For example, accessing the object itself from its Uuid can be * done via the `.get` methods. */ -case class GTIRBResolver(mod: Module) { +case class GTIRBResolver(val mod: Module) { sealed trait Uuid(val kind: String, val uuid: String) { override def toString = s"$kind:$uuid" @@ -66,8 +66,8 @@ case class GTIRBResolver(mod: Module) { */ case class BlockData(inner: DataBlock | CodeBlock, block: Block, interval: ByteInterval, section: Section) { val uuid = inner match { - case x: DataBlock => x.uuid - case x: CodeBlock => x.uuid + case x: DataBlock => Uuid.Block(x.uuid) + case x: CodeBlock => Uuid.Block(x.uuid) } val size = inner match { case x: DataBlock => x.size @@ -92,12 +92,15 @@ case class GTIRBResolver(mod: Module) { /** * Returns the `.symtab` entry for the given symbol. - * This is a 5-tuple made up of size, type, binding, visibility, and index. + * This is a 5-tuple made up of size, type, binding, visibility, and section index. + * Every symbol table entry is in relation to some section. The section index is the + * index of the relevant section's section header. */ def symEntry = symbolEntriesByUuid(x) /** - * Gets the [[Uuid.Block]] referred to by this symbol. + * Gets the [[Uuid.Block]] referred to by this symbol, for example + * a data block or code block. * This is mutually-exclusive with [[getScalarValue]], * only one of these can be non-None. */ @@ -133,6 +136,7 @@ case class GTIRBResolver(mod: Module) { val proxyBlockUuids = mod.proxies.map(x => Uuid.Block(x.uuid)).toSet val symbolsByUuid = mod.symbols.map(x => Uuid.Symbol(x.uuid) -> x).toMap + val symbolsByName = mod.symbols.map(x => x.name -> Uuid.Symbol(x.uuid)).toMap val blocksByUuid = (for { sec <- mod.sections.toList @@ -170,4 +174,6 @@ case class GTIRBResolver(mod: Module) { Uuid.Function(a) -> b.map(Uuid.Block(_)) } + val entryPoint = Uuid.Block(mod.entryPoint) + } diff --git a/src/main/scala/util/RunUtils.scala b/src/main/scala/util/RunUtils.scala index 3d740925db..598712975c 100644 --- a/src/main/scala/util/RunUtils.scala +++ b/src/main/scala/util/RunUtils.scala @@ -212,7 +212,22 @@ object IRLoading { val parser = ReadELFParser(tokens) parser.setErrorHandler(BailErrorStrategy()) parser.setBuildParseTree(true) - ReadELFLoader.visitSyms(parser.syms(), config) + + val relf = ReadELFLoader.visitSyms(parser.syms(), config) + + if (config.inputFile.endsWith(".gts")) { + val ir = IR.parseFrom(FileInputStream(config.inputFile)) + if (ir.modules.length != 1) { + Logger.warn(s"GTIRB file ${config.inputFile} unexpectedly has ${ir.modules.length} modules") + } + + val gtirb = GTIRBResolver(ir.modules.head) + val gtirbRelfLoader = GTIRBReadELF(gtirb) + val gtirbRelf = gtirbRelfLoader.getAllSymbols() + + } + + relf } def emptySpecification(globals: Set[SpecGlobal]) = From 96fcd87a3d19856d6316559d19dd77fb40ab5641 Mon Sep 17 00:00:00 2001 From: rina Date: Mon, 23 Jun 2025 18:23:57 +1000 Subject: [PATCH 30/51] diffing. WHY IS IT OFF BY 8 RAHHH --- src/main/scala/Main.scala | 10 ----- src/main/scala/gtirb/GTIRBReadELF.scala | 50 ++++++++++++++++++++++++- src/main/scala/util/RunUtils.scala | 4 +- 3 files changed, 50 insertions(+), 14 deletions(-) diff --git a/src/main/scala/Main.scala b/src/main/scala/Main.scala index 7363b8a7d2..b47ef98fcd 100644 --- a/src/main/scala/Main.scala +++ b/src/main/scala/Main.scala @@ -337,16 +337,6 @@ object Main { } val relfData = IRLoading.loadReadELF(relfFile, loadingInputs) println(relfData.toScala) - - import com.grammatech.gtirb.proto.IR.IR - - import java.io.* - val ir = IR.parseFrom(FileInputStream(loadingInputs.inputFile)) - - - val gtirb = GTIRBResolver(ir.modules.head) - val gtirbRelfLoader = GTIRBReadELF(gtirb) - println(gtirbRelfLoader.getReadELFData("main").toScala) return } diff --git a/src/main/scala/gtirb/GTIRBReadELF.scala b/src/main/scala/gtirb/GTIRBReadELF.scala index a052a41a76..5cccb3fc0c 100644 --- a/src/main/scala/gtirb/GTIRBReadELF.scala +++ b/src/main/scala/gtirb/GTIRBReadELF.scala @@ -153,7 +153,7 @@ class GTIRBReadELF(protected val gtirb: GTIRBResolver) { }.toSet } - def getEntryPoint(mainProcedureName: String) = { + def getMainAddress(mainProcedureName: String) = { gtirb.symbolsByName(mainProcedureName).getReferentUuid.get.get.address } @@ -163,9 +163,55 @@ class GTIRBReadELF(protected val gtirb: GTIRBResolver) { val (offs, exts) = getRelocations() val globs = getGlobals() val funs = getFunctionEntries() - val main = getEntryPoint(mainProcedureName) + val main = getMainAddress(mainProcedureName) ReadELFData(syms, exts, globs, funs, offs, main) } + private val atSuffix = """@[A-Za-z_\d.]+$""".r + private def normaliseRelf(relf: ReadELFData) = { + val exts = relf.externalFunctions.map(x => x.copy(name = atSuffix.replaceFirstIn(x.name, ""))) + val syms = relf.symbolTable.collect { + case sym if sym.etype != ELFSymType.SECTION && sym.num != -1 => + sym.copy(name = atSuffix.replaceFirstIn(sym.name, "")) + } + + relf.copy( + externalFunctions = exts, + symbolTable = syms + ) + } + + /** + * Determines whether the current ReadELFData is compatible with + * a given reference ReadELFData. That is, whether the The given reference object is + * assumed to be the gold standard + */ + def checkReadELFCompatibility(gtirbRelf: ReadELFData, referenceRelf: ReadELFData): Boolean = { + var ok = true + + def check(b: Boolean, s: String) = { + if (!b) { + Logger.warn("PLEASE REPORT THIS ISSUE! include the gts and relf files. gtirb relf discrepancy, " + s) + ok = false + } + } + + def checkSet[T](x: Set[T], y: Set[T], s: String) = + check(x == y, s"$s:\ngtirb - relf = ${x -- y}\nrelf - gtirb = ${y--x}\n& = ${y & x}") + + def checkEq(x: Any, y: Any, s: String) = + check(x == y, s"$s: gtirb: $x, readelf: $y}") + + val g = normaliseRelf(gtirbRelf) + val o = normaliseRelf(referenceRelf) + checkEq(g.mainAddress, o.mainAddress, "main address differs") + checkEq(g.functionEntries, o.functionEntries, "function entries differ") + checkEq(g.relocationOffsets, o.relocationOffsets, "relocations differ") + checkEq(g.globalVariables, o.globalVariables, "global variables differ") + checkSet(g.externalFunctions, o.externalFunctions, "external functions differ") + checkSet(g.symbolTable.toSet, o.symbolTable.toSet, "symbol tables differ") + + ok + } } diff --git a/src/main/scala/util/RunUtils.scala b/src/main/scala/util/RunUtils.scala index 598712975c..f10b1cd200 100644 --- a/src/main/scala/util/RunUtils.scala +++ b/src/main/scala/util/RunUtils.scala @@ -137,7 +137,6 @@ object IRLoading { val (mainAddress, makeContext) = q.relfFile match { case Some(relf) => { - // TODO: this tuple is large, should be a case class val ReadELFData(symbols, externalFunctions, globals, funcEntries, globalOffsets, mainAddress) = IRLoading.loadReadELF(relf, q) @@ -223,8 +222,9 @@ object IRLoading { val gtirb = GTIRBResolver(ir.modules.head) val gtirbRelfLoader = GTIRBReadELF(gtirb) - val gtirbRelf = gtirbRelfLoader.getAllSymbols() + val gtirbRelf = gtirbRelfLoader.getReadELFData(config.mainProcedureName) + gtirbRelfLoader.checkReadELFCompatibility(gtirbRelf, relf) } relf From 6e7524401dd2a9cce88228fe94300cb2d2bffa95 Mon Sep 17 00:00:00 2001 From: rina Date: Mon, 23 Jun 2025 18:39:47 +1000 Subject: [PATCH 31/51] --dump-relf writes files now --- src/main/scala/Main.scala | 27 ++++++++++++++++----------- src/main/scala/util/RunUtils.scala | 13 ++++++++++--- 2 files changed, 26 insertions(+), 14 deletions(-) diff --git a/src/main/scala/Main.scala b/src/main/scala/Main.scala index b47ef98fcd..53f205cdb9 100644 --- a/src/main/scala/Main.scala +++ b/src/main/scala/Main.scala @@ -143,8 +143,8 @@ object Main { interpret: Flag, @arg(name = "dump-il", doc = "Dump the Intermediate Language to text.") dumpIL: Option[String], - @arg(name = "dump-relf", doc = "Dump Basil's representation of the readelf information to stdout and exit.") - dumpRelf: Flag, + @arg(name = "dump-relf", doc = "Dump Basil's representation of the readelf information to the given file and exit.") + dumpRelf: Option[String], @arg(name = "main-procedure-name", short = 'm', doc = "Name of the main procedure to begin analysis at.") mainProcedureName: String = "main", @arg( @@ -329,15 +329,20 @@ object Main { ) } - import gtirb.* - import ir.dsl.given - if (conf.dumpRelf.value) { - val relfFile = loadingInputs.relfFile.getOrElse { - throw IllegalArgumentException("--dump-relf requires --relf") - } - val relfData = IRLoading.loadReadELF(relfFile, loadingInputs) - println(relfData.toScala) - return + conf.dumpRelf match { + case None => () + case Some(relfOut) => + val relfFile = loadingInputs.relfFile.getOrElse { + throw IllegalArgumentException("--dump-relf requires --relf") + } + val (relf, gtirb) = IRLoading.loadReadELFWithGTIRB(relfFile, loadingInputs) + + Logger.setLevel(LogLevel.DEBUG) + + import ir.dsl.given + writeToFile(relf.toScala, relfOut + "-readelf.scala") + gtirb.foreach(x => writeToFile(x.toScala, relfOut + "-gtsrelf.scala")) + return } if (loadingInputs.specFile.isDefined && loadingInputs.relfFile.isEmpty) { diff --git a/src/main/scala/util/RunUtils.scala b/src/main/scala/util/RunUtils.scala index f10b1cd200..0b0a239fb3 100644 --- a/src/main/scala/util/RunUtils.scala +++ b/src/main/scala/util/RunUtils.scala @@ -205,7 +205,7 @@ object IRLoading { GTIRBConverter.createIR() } - def loadReadELF(fileName: String, config: ILLoadingConfig): ReadELFData = { + def loadReadELFWithGTIRB(fileName: String, config: ILLoadingConfig): (ReadELFData, Option[ReadELFData]) = { val lexer = ReadELFLexer(CharStreams.fromFileName(fileName)) val tokens = CommonTokenStream(lexer) val parser = ReadELFParser(tokens) @@ -214,7 +214,7 @@ object IRLoading { val relf = ReadELFLoader.visitSyms(parser.syms(), config) - if (config.inputFile.endsWith(".gts")) { + val gtirbRelf = if (config.inputFile.endsWith(".gts")) { val ir = IR.parseFrom(FileInputStream(config.inputFile)) if (ir.modules.length != 1) { Logger.warn(s"GTIRB file ${config.inputFile} unexpectedly has ${ir.modules.length} modules") @@ -225,11 +225,18 @@ object IRLoading { val gtirbRelf = gtirbRelfLoader.getReadELFData(config.mainProcedureName) gtirbRelfLoader.checkReadELFCompatibility(gtirbRelf, relf) + Some(gtirbRelf) + } else { + None } - relf + (relf, gtirbRelf) } + + def loadReadELF(fileName: String, config: ILLoadingConfig) = + loadReadELFWithGTIRB(fileName, config)._1 + def emptySpecification(globals: Set[SpecGlobal]) = Specification(Set(), globals, Map(), List(), List(), List(), Set()) From 4d44074d1ccfbf6da892d8f996a5b4420c968bac Mon Sep 17 00:00:00 2001 From: rina Date: Mon, 23 Jun 2025 18:43:02 +1000 Subject: [PATCH 32/51] fix size bug in global variables --- src/main/scala/gtirb/GTIRBReadELF.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/scala/gtirb/GTIRBReadELF.scala b/src/main/scala/gtirb/GTIRBReadELF.scala index 5cccb3fc0c..59842cd6ce 100644 --- a/src/main/scala/gtirb/GTIRBReadELF.scala +++ b/src/main/scala/gtirb/GTIRBReadELF.scala @@ -132,7 +132,7 @@ class GTIRBReadELF(protected val gtirb: GTIRBResolver) { val blk = symid.getReferentUuid.get.get val sec = blk.section assert(gtirb.mod.sections(idx.toInt - 1) == sec) - SpecGlobal(symid.get.name, (blk.size * 8).toInt, None, blk.address) + SpecGlobal(symid.get.name, (size * 8).toInt, None, blk.address) }.toSet } From 1c88565ef7772e588b4137ca25ec8cc0fe781167 Mon Sep 17 00:00:00 2001 From: rina Date: Mon, 23 Jun 2025 18:43:29 +1000 Subject: [PATCH 33/51] scalfmt --- src/main/scala/gtirb/GTIRBReadELF.scala | 49 +++++++++++++------------ src/main/scala/util/RunUtils.scala | 1 - 2 files changed, 26 insertions(+), 24 deletions(-) diff --git a/src/main/scala/gtirb/GTIRBReadELF.scala b/src/main/scala/gtirb/GTIRBReadELF.scala index 59842cd6ce..8a396338a7 100644 --- a/src/main/scala/gtirb/GTIRBReadELF.scala +++ b/src/main/scala/gtirb/GTIRBReadELF.scala @@ -84,7 +84,7 @@ class GTIRBReadELF(protected val gtirb: GTIRBResolver) { rela.r_type match { case 1025 | 1026 => Right(ExternalFunction(sym.name, rela.r_offset)) - case 1027 => Left( (rela.r_offset, rela.r_addend)) + case 1027 => Left((rela.r_offset, rela.r_addend)) } def getAllSymbols() = { @@ -93,8 +93,8 @@ class GTIRBReadELF(protected val gtirb: GTIRBResolver) { val sym = k.get val block = k.getReferentUuid.flatMap(_.getOption) - val idx = k.symTabIdx.collectFirst { - case (".symtab", i) => i.toInt + val idx = k.symTabIdx.collectFirst { case (".symtab", i) => + i.toInt } val addr = block.map(x => BigInt(x.address)) @@ -105,16 +105,23 @@ class GTIRBReadELF(protected val gtirb: GTIRBResolver) { ty match { case "NONE" => None - case ty => Some( - ELFSymbol(idx.getOrElse(-1), combinedValue, size.toInt, ELFSymType.valueOf(ty), - ELFBind.valueOf(bind), - ELFVis.valueOf(vis), - parseElfNdx(shndx), - sym.name - ) - ) + case ty => + Some( + ELFSymbol( + idx.getOrElse(-1), + combinedValue, + size.toInt, + ELFSymType.valueOf(ty), + ELFBind.valueOf(bind), + ELFVis.valueOf(vis), + parseElfNdx(shndx), + sym.name + ) + ) + } } - }.toList.sortBy(x => x.num) + .toList + .sortBy(x => x.num) } def getRelocations() = { @@ -127,12 +134,11 @@ class GTIRBReadELF(protected val gtirb: GTIRBResolver) { } def getGlobals() = { - gtirb.symbolEntriesByUuid.view.collect { - case (symid, (size, "OBJECT", "GLOBAL", "DEFAULT", idx)) => - val blk = symid.getReferentUuid.get.get - val sec = blk.section - assert(gtirb.mod.sections(idx.toInt - 1) == sec) - SpecGlobal(symid.get.name, (size * 8).toInt, None, blk.address) + gtirb.symbolEntriesByUuid.view.collect { case (symid, (size, "OBJECT", "GLOBAL", "DEFAULT", idx)) => + val blk = symid.getReferentUuid.get.get + val sec = blk.section + assert(gtirb.mod.sections(idx.toInt - 1) == sec) + SpecGlobal(symid.get.name, (size * 8).toInt, None, blk.address) }.toSet } @@ -175,10 +181,7 @@ class GTIRBReadELF(protected val gtirb: GTIRBResolver) { sym.copy(name = atSuffix.replaceFirstIn(sym.name, "")) } - relf.copy( - externalFunctions = exts, - symbolTable = syms - ) + relf.copy(externalFunctions = exts, symbolTable = syms) } /** @@ -197,7 +200,7 @@ class GTIRBReadELF(protected val gtirb: GTIRBResolver) { } def checkSet[T](x: Set[T], y: Set[T], s: String) = - check(x == y, s"$s:\ngtirb - relf = ${x -- y}\nrelf - gtirb = ${y--x}\n& = ${y & x}") + check(x == y, s"$s:\ngtirb - relf = ${x -- y}\nrelf - gtirb = ${y -- x}\n& = ${y & x}") def checkEq(x: Any, y: Any, s: String) = check(x == y, s"$s: gtirb: $x, readelf: $y}") diff --git a/src/main/scala/util/RunUtils.scala b/src/main/scala/util/RunUtils.scala index 0b0a239fb3..10ea730ee8 100644 --- a/src/main/scala/util/RunUtils.scala +++ b/src/main/scala/util/RunUtils.scala @@ -233,7 +233,6 @@ object IRLoading { (relf, gtirbRelf) } - def loadReadELF(fileName: String, config: ILLoadingConfig) = loadReadELFWithGTIRB(fileName, config)._1 From 20a77b5b1cb637018419d92da114864499dfc819 Mon Sep 17 00:00:00 2001 From: rina Date: Mon, 23 Jun 2025 20:20:38 +1000 Subject: [PATCH 34/51] fix atEnd bug! yay. it all matches now except crtstuff.c ?? --- src/main/scala/Main.scala | 6 ++++- src/main/scala/gtirb/GTIRBReadELF.scala | 33 ++++++++++++++++-------- src/main/scala/gtirb/GTIRBResolver.scala | 10 +++++++ 3 files changed, 37 insertions(+), 12 deletions(-) diff --git a/src/main/scala/Main.scala b/src/main/scala/Main.scala index 53f205cdb9..6c9f1f4d61 100644 --- a/src/main/scala/Main.scala +++ b/src/main/scala/Main.scala @@ -335,9 +335,13 @@ object Main { val relfFile = loadingInputs.relfFile.getOrElse { throw IllegalArgumentException("--dump-relf requires --relf") } + Logger.setLevel(LogLevel.DEBUG) val (relf, gtirb) = IRLoading.loadReadELFWithGTIRB(relfFile, loadingInputs) - Logger.setLevel(LogLevel.DEBUG) + // skip writing files if the given path is an empty string + if (relfOut.trim.isEmpty) + return + import ir.dsl.given writeToFile(relf.toScala, relfOut + "-readelf.scala") diff --git a/src/main/scala/gtirb/GTIRBReadELF.scala b/src/main/scala/gtirb/GTIRBReadELF.scala index 8a396338a7..5556691cbe 100644 --- a/src/main/scala/gtirb/GTIRBReadELF.scala +++ b/src/main/scala/gtirb/GTIRBReadELF.scala @@ -91,15 +91,14 @@ class GTIRBReadELF(protected val gtirb: GTIRBResolver) { gtirb.symbolEntriesByUuid .flatMap { case (k, pos) => val sym = k.get - val block = k.getReferentUuid.flatMap(_.getOption) val idx = k.symTabIdx.collectFirst { case (".symtab", i) => i.toInt } - val addr = block.map(x => BigInt(x.address)) - val value = k.getScalarValue.map(BigInt(_)) - val combinedValue = addr.orElse(value).getOrElse(BigInt(0)) + val addr = k.getReferentAddress + val value = k.getScalarValue + val combinedValue = addr.orElse(value).getOrElse(0L) val (size, ty, bind, vis, shndx) = k.symEntry @@ -173,12 +172,23 @@ class GTIRBReadELF(protected val gtirb: GTIRBResolver) { ReadELFData(syms, exts, globs, funs, offs, main) } + private val atSuffix = """@[A-Za-z_\d.]+$""".r + + /** + * Strips away some information from `readelf`'s [[ReadELFData]] + * which is not so important and not produced by the GTIRB ELF loader. + * + * For example, this throws away symbols of type SECTION and symbols beginning with `$`. + * It also strips the `@GLIBC_XX.X` suffix from symbol names. + */ private def normaliseRelf(relf: ReadELFData) = { val exts = relf.externalFunctions.map(x => x.copy(name = atSuffix.replaceFirstIn(x.name, ""))) - val syms = relf.symbolTable.collect { - case sym if sym.etype != ELFSymType.SECTION && sym.num != -1 => - sym.copy(name = atSuffix.replaceFirstIn(sym.name, "")) + val syms = relf.symbolTable.flatMap { + case ELFSymbol(_,0,0,ELFSymType.FILE,ELFBind.LOCAL,ELFVis.DEFAULT,ELFNDX.ABS,"crtstuff.c") => None + case sym if sym.etype != ELFSymType.SECTION && sym.num != -1 && !sym.name.startsWith("$") => + Some(sym.copy(name = atSuffix.replaceFirstIn(sym.name, ""))) + case _ => None } relf.copy(externalFunctions = exts, symbolTable = syms) @@ -192,17 +202,17 @@ class GTIRBReadELF(protected val gtirb: GTIRBResolver) { def checkReadELFCompatibility(gtirbRelf: ReadELFData, referenceRelf: ReadELFData): Boolean = { var ok = true - def check(b: Boolean, s: String) = { + inline def check(b: Boolean, s: String) = { if (!b) { Logger.warn("PLEASE REPORT THIS ISSUE! include the gts and relf files. gtirb relf discrepancy, " + s) ok = false } } - def checkSet[T](x: Set[T], y: Set[T], s: String) = - check(x == y, s"$s:\ngtirb - relf = ${x -- y}\nrelf - gtirb = ${y -- x}\n& = ${y & x}") + inline def checkSet[T](x: Set[T], y: Set[T], s: String) = + check(x == y, s"$s:\ngtirb - relf = ${x -- y}\nrelf - gtirb = ${y -- x}") - def checkEq(x: Any, y: Any, s: String) = + inline def checkEq(x: Any, y: Any, s: String) = check(x == y, s"$s: gtirb: $x, readelf: $y}") val g = normaliseRelf(gtirbRelf) @@ -214,6 +224,7 @@ class GTIRBReadELF(protected val gtirb: GTIRBResolver) { checkSet(g.externalFunctions, o.externalFunctions, "external functions differ") checkSet(g.symbolTable.toSet, o.symbolTable.toSet, "symbol tables differ") + Logger.debug("gtirb relf and readelf relf compatible: " + ok) ok } diff --git a/src/main/scala/gtirb/GTIRBResolver.scala b/src/main/scala/gtirb/GTIRBResolver.scala index 8ef4a972ed..ed5164be13 100644 --- a/src/main/scala/gtirb/GTIRBResolver.scala +++ b/src/main/scala/gtirb/GTIRBResolver.scala @@ -108,6 +108,16 @@ case class GTIRBResolver(val mod: Module) { uuid <- x.get.optionalPayload.referentUuid } yield Uuid.Block(uuid) + /** + * Gets the address referred to by this symbol, if the referent is a + * block. Correctly takes into account the `atEnd` field of [[Symbol]]. + */ + def getReferentAddress = for { + uuid <- x.getReferentUuid + block <- uuid.getOption + atEndOffset = if x.get.atEnd then block.size else 0 + } yield block.address + atEndOffset + /** * Gets the scalar value associated with this symbol. * This is mutually-exclusive with [[getReferentUuid]], From 742a84a11cfe732cb61d59d2cf582269a7e36157 Mon Sep 17 00:00:00 2001 From: rina Date: Mon, 23 Jun 2025 20:22:54 +1000 Subject: [PATCH 35/51] scalafmt --- src/main/scala/Main.scala | 1 - src/main/scala/gtirb/GTIRBReadELF.scala | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/main/scala/Main.scala b/src/main/scala/Main.scala index 6c9f1f4d61..afb017aa52 100644 --- a/src/main/scala/Main.scala +++ b/src/main/scala/Main.scala @@ -342,7 +342,6 @@ object Main { if (relfOut.trim.isEmpty) return - import ir.dsl.given writeToFile(relf.toScala, relfOut + "-readelf.scala") gtirb.foreach(x => writeToFile(x.toScala, relfOut + "-gtsrelf.scala")) diff --git a/src/main/scala/gtirb/GTIRBReadELF.scala b/src/main/scala/gtirb/GTIRBReadELF.scala index 5556691cbe..a23b66fa39 100644 --- a/src/main/scala/gtirb/GTIRBReadELF.scala +++ b/src/main/scala/gtirb/GTIRBReadELF.scala @@ -182,10 +182,10 @@ class GTIRBReadELF(protected val gtirb: GTIRBResolver) { * For example, this throws away symbols of type SECTION and symbols beginning with `$`. * It also strips the `@GLIBC_XX.X` suffix from symbol names. */ - private def normaliseRelf(relf: ReadELFData) = { + def normaliseRelf(relf: ReadELFData) = { val exts = relf.externalFunctions.map(x => x.copy(name = atSuffix.replaceFirstIn(x.name, ""))) val syms = relf.symbolTable.flatMap { - case ELFSymbol(_,0,0,ELFSymType.FILE,ELFBind.LOCAL,ELFVis.DEFAULT,ELFNDX.ABS,"crtstuff.c") => None + case ELFSymbol(_, 0, 0, ELFSymType.FILE, ELFBind.LOCAL, ELFVis.DEFAULT, ELFNDX.ABS, "crtstuff.c") => None case sym if sym.etype != ELFSymType.SECTION && sym.num != -1 && !sym.name.startsWith("$") => Some(sym.copy(name = atSuffix.replaceFirstIn(sym.name, ""))) case _ => None From dad7b02ff1d7b1451120887dae773fe9b380de04 Mon Sep 17 00:00:00 2001 From: rina Date: Mon, 23 Jun 2025 20:46:03 +1000 Subject: [PATCH 36/51] touch up docs and fix references --- build.mill | 14 +++++++------- src/main/scala/gtirb/AuxDecoder.scala | 8 +++++--- src/main/scala/gtirb/GTIRBReadELF.scala | 2 +- src/main/scala/gtirb/GTIRBResolver.scala | 10 ++++++---- 4 files changed, 19 insertions(+), 15 deletions(-) diff --git a/build.mill b/build.mill index 4b185230b1..2fdb38973b 100644 --- a/build.mill +++ b/build.mill @@ -195,10 +195,10 @@ object `package` extends RootModule with ScalaModule { // these docs paths are used for inter-project linking. // https://docs.scala-lang.org/scala3/guides/scaladoc/settings.html#-external-mappings Map( - "api/bnfc" -> ".*basil_ir/.*::javadoc", - "api/basil-antlr" -> ".*Parsers/.*::javadoc", - "api/basil-proto" -> ".*com/grammatech/gtirb.*::scaladoc3", - "api/java-cup" -> ".*java_cup/.*::javadoc", + "api/bnfc" -> ".*/basil_ir/.*::javadoc", + "api/basil-antlr" -> ".*/Parsers/.*::javadoc", + "api/basil-proto" -> ".*/com/grammatech/gtirb.*::scaladoc3", + "api/java-cup" -> ".*/java_cup/.*::javadoc", ) } @@ -222,9 +222,9 @@ object `package` extends RootModule with ScalaModule { def scalaDocExternalMappingOptions = Task { val defaultExternals = Seq( - ".*scala/.*::scaladoc3::https://scala-lang.org/api/3.3_LTS/", - "java/.*::javadoc::https://docs.oracle.com/en/java/javase/17/docs/api/java.base/", - ".*com/google/protobuf.*::javadoc::https://protobuf.dev/reference/java/api-docs/", + ".*/scala/.*::scaladoc3::https://scala-lang.org/api/3.3_LTS/", + ".*/java/.*::javadoc::https://docs.oracle.com/en/java/javase/17/docs/api/java.base/", + ".*/com/google/protobuf.*::javadoc::https://protobuf.dev/reference/java/api-docs/", ) val externals = defaultExternals ++ docsRegexes().map { case (path, regex) => s"$regex::$baseUrl/$path" diff --git a/src/main/scala/gtirb/AuxDecoder.scala b/src/main/scala/gtirb/AuxDecoder.scala index d0327db379..3092b2e298 100644 --- a/src/main/scala/gtirb/AuxDecoder.scala +++ b/src/main/scala/gtirb/AuxDecoder.scala @@ -19,7 +19,7 @@ import com.grammatech.gtirb.proto.Module.Module * The read methods return [[Decoder]] values which can be passed to the [[decode]] methods. * * [[AuxKind]] provides pre-defined decoders for some official AuxData fields. An [[AuxKind]] can be - * passed to [[decodeAux]] to automatically extract and decode the given AuxData from a GTIRB [[Module]]. + * passed to [[decodeAux]] to automatically extract and decode the given AuxData from a GTIRB [[com.grammatech.gtirb.proto.Module.Module]]. * * Within a [[Decoder]], the internal state of the [[java.io.ByteArrayInputStream]] is used to keep * track of the current byte position. @@ -28,7 +28,9 @@ object AuxDecoder { /** * [[AuxKind]] provides pre-defined decoders for some official AuxData fields. An [[AuxKind]] can be - * passed to [[decodeAux]] to automatically extract and decode the given AuxData from a GTIRB [[Module]]. + * passed to [[decodeAux]] to automatically extract and decode the given AuxData from a GTIRB [[com.grammatech.gtirb.proto.Module.Module]]. + * See the [Standard AuxData Schemata](https://grammatech.github.io/gtirb/md__aux_data.html) for a list of official AuxData fields + * and their types. */ enum AuxKind[T](val name: String, val decoder: Decoder[T]) { case ElfSymbolTabIdxInfo @@ -44,7 +46,7 @@ object AuxDecoder { } type Input = ByteArrayInputStream - type Decoder[T] = Input => T + type Decoder[T] = ByteArrayInputStream => T def decodeAux[T](known: AuxKind[T])(mod: Module) = decode(known.decoder)(mod.auxData(known.name)) diff --git a/src/main/scala/gtirb/GTIRBReadELF.scala b/src/main/scala/gtirb/GTIRBReadELF.scala index a23b66fa39..c175041f21 100644 --- a/src/main/scala/gtirb/GTIRBReadELF.scala +++ b/src/main/scala/gtirb/GTIRBReadELF.scala @@ -176,7 +176,7 @@ class GTIRBReadELF(protected val gtirb: GTIRBResolver) { private val atSuffix = """@[A-Za-z_\d.]+$""".r /** - * Strips away some information from `readelf`'s [[ReadELFData]] + * Strips away some information from `readelf`'s [[translating.ReadELFData]] * which is not so important and not produced by the GTIRB ELF loader. * * For example, this throws away symbols of type SECTION and symbols beginning with `$`. diff --git a/src/main/scala/gtirb/GTIRBResolver.scala b/src/main/scala/gtirb/GTIRBResolver.scala index ed5164be13..b0d0a595de 100644 --- a/src/main/scala/gtirb/GTIRBResolver.scala +++ b/src/main/scala/gtirb/GTIRBResolver.scala @@ -26,12 +26,14 @@ import scala.collection.immutable.SortedMap * A class for querying the GTIRB IR, abstracting away common operations of * searching for symbols, functions, blocks, and their relations. The inner * type [[Uuid]] wraps a Base64 UUID. A number of UUID subtypes are defined to - * distinguish from different kinds of UUID within GTIRB, for example [[Uuid.Block]] - * and [Uuid.Function]]. + * distinguish different kinds of GTIRB UUID, for example [[Uuid.Block]] + * and [[Uuid.Function]]. * * Each Uuid specialisation defines a number of extension methods for common - * query operations. For example, accessing the object itself from its Uuid can be - * done via the `.get` methods. + * query operations. For example, given a [[Uuid.Symbol]], you can get the symbol + * itself via the `.get` methods, and you can get its symbol table entry with the + * [[symEntry]] method. Internally, the [[GTIRBResolver]] is indexing into the GTIRB + * protobuf and parsing the AuxData, but this is all neatly hidden away. */ case class GTIRBResolver(val mod: Module) { From 56be8aeb64cb580c8b7150f1c6a9c6c87a23be4a Mon Sep 17 00:00:00 2001 From: rina Date: Mon, 23 Jun 2025 21:21:21 +1000 Subject: [PATCH 37/51] rename to ref --- src/main/scala/gtirb/GTIRBResolver.scala | 60 ++++++++++++------------ 1 file changed, 31 insertions(+), 29 deletions(-) diff --git a/src/main/scala/gtirb/GTIRBResolver.scala b/src/main/scala/gtirb/GTIRBResolver.scala index b0d0a595de..02fac418f4 100644 --- a/src/main/scala/gtirb/GTIRBResolver.scala +++ b/src/main/scala/gtirb/GTIRBResolver.scala @@ -25,39 +25,41 @@ import scala.collection.immutable.SortedMap /** * A class for querying the GTIRB IR, abstracting away common operations of * searching for symbols, functions, blocks, and their relations. The inner - * type [[Uuid]] wraps a Base64 UUID. A number of UUID subtypes are defined to - * distinguish different kinds of GTIRB UUID, for example [[Uuid.Block]] - * and [[Uuid.Function]]. + * type [[GTIRBRef]] wraps a Base64 UUID. A number of UUID subtypes are defined to + * distinguish different kinds of GTIRB UUID, for example [[GTIRBRef.BlockRef]] + * and [[GTIRBRef.FunctionRef]]. * * Each Uuid specialisation defines a number of extension methods for common - * query operations. For example, given a [[Uuid.Symbol]], you can get the symbol + * query operations. For example, given a [[GTIRBRef.SymbolRef]], you can get the symbol * itself via the `.get` methods, and you can get its symbol table entry with the * [[symEntry]] method. Internally, the [[GTIRBResolver]] is indexing into the GTIRB * protobuf and parsing the AuxData, but this is all neatly hidden away. */ case class GTIRBResolver(val mod: Module) { - sealed trait Uuid(val kind: String, val uuid: String) { + sealed trait GTIRBRef(val kind: String, val uuid: String) { override def toString = s"$kind:$uuid" override def equals(o: Any) = o match { - case x: Uuid => x.kind == kind && x.uuid == uuid + case x: GTIRBRef => x.kind == kind && x.uuid == uuid case _ => false } override def hashCode = (kind, uuid).hashCode } - object Uuid { + object GTIRBRef { private def b64(bs: String | ByteString) = bs match { case s: String => s case bs: ByteString => java.util.Base64.getEncoder().encodeToString(bs.toByteArray) } - class Block(xs: String | ByteString) extends Uuid("blok", b64(xs)) - class Function(xs: String | ByteString) extends Uuid("func", b64(xs)) - class Symbol(xs: String | ByteString) extends Uuid("symb", b64(xs)) + class BlockRef(xs: String | ByteString) extends GTIRBRef("blok", b64(xs)) + class FunctionRef(xs: String | ByteString) extends GTIRBRef("func", b64(xs)) + class SymbolRef(xs: String | ByteString) extends GTIRBRef("symb", b64(xs)) } + import GTIRBRef.* + /** * Represents a GTIRB code/data block and its parents. In GTIRB, block * occurs within a byte interval which occur within a section. Desirable information, @@ -68,8 +70,8 @@ case class GTIRBResolver(val mod: Module) { */ case class BlockData(inner: DataBlock | CodeBlock, block: Block, interval: ByteInterval, section: Section) { val uuid = inner match { - case x: DataBlock => Uuid.Block(x.uuid) - case x: CodeBlock => Uuid.Block(x.uuid) + case x: DataBlock => BlockRef(x.uuid) + case x: CodeBlock => BlockRef(x.uuid) } val size = inner match { case x: DataBlock => x.size @@ -78,12 +80,12 @@ case class GTIRBResolver(val mod: Module) { val address = block.offset + interval.address } - extension (x: Uuid.Block) + extension (x: BlockRef) def get = blocksByUuid(x) def getOption = blocksByUuid.get(x) def isProxyBlock = proxyBlockUuids.contains(x) - extension (x: Uuid.Symbol) + extension (x: SymbolRef) def get = symbolsByUuid(x) /** @@ -101,14 +103,14 @@ case class GTIRBResolver(val mod: Module) { def symEntry = symbolEntriesByUuid(x) /** - * Gets the [[Uuid.Block]] referred to by this symbol, for example + * Gets the [[GTIRBRef.BlockRef]] referred to by this symbol, for example * a data block or code block. * This is mutually-exclusive with [[getScalarValue]], * only one of these can be non-None. */ def getReferentUuid = for { uuid <- x.get.optionalPayload.referentUuid - } yield Uuid.Block(uuid) + } yield BlockRef(uuid) /** * Gets the address referred to by this symbol, if the referent is a @@ -128,27 +130,27 @@ case class GTIRBResolver(val mod: Module) { def getScalarValue = x.get.optionalPayload._value /** - * Gets the [[Uuid.Function]] associated with this symbol, + * Gets the [[GTIRBRef.FunctionRef]] associated with this symbol, * or None if this is not a function name symbol. */ def getFunction = funcNamesInverse.get(x) - extension (x: Uuid.Function) + extension (x: FunctionRef) /** * Gets the set of entry block UUIDs for the given function. */ def getEntries = funcEntries(x) /** - * Gets the [[Uuid.Symbol]] for the given function. + * Gets the [[GTIRBRef.SymbolRef]] for the given function. */ def getName = funcNames(x) private def mapFirst[T, T2, U](f: T => T2)(x: (T, U)) = (f(x._1), x._2) - val proxyBlockUuids = mod.proxies.map(x => Uuid.Block(x.uuid)).toSet - val symbolsByUuid = mod.symbols.map(x => Uuid.Symbol(x.uuid) -> x).toMap - val symbolsByName = mod.symbols.map(x => x.name -> Uuid.Symbol(x.uuid)).toMap + val proxyBlockUuids = mod.proxies.map(x => BlockRef(x.uuid)).toSet + val symbolsByUuid = mod.symbols.map(x => SymbolRef(x.uuid) -> x).toMap + val symbolsByName = mod.symbols.map(x => x.name -> SymbolRef(x.uuid)).toMap val blocksByUuid = (for { sec <- mod.sections.toList @@ -157,13 +159,13 @@ case class GTIRBResolver(val mod: Module) { case b @ Block(_, Block.Value.Data(dat), _) => (dat.uuid, (dat: DataBlock | CodeBlock), b) case b @ Block(_, Block.Value.Code(cod), _) => (cod.uuid, (cod: DataBlock | CodeBlock), b) } - id = Uuid.Block(uuid) + id = BlockRef(uuid) } yield id -> BlockData(innerb, outerb, interval, sec)).toMap val sectionsByName = mod.sections.map(x => x.name -> x).toMap - val symbolTabIdxByUuid: Map[Uuid.Symbol, List[(String, BigInt)]] = - AuxDecoder.decodeAux(AuxDecoder.AuxKind.ElfSymbolTabIdxInfo)(mod).map(mapFirst(Uuid.Symbol(_))) + val symbolTabIdxByUuid: Map[SymbolRef, List[(String, BigInt)]] = + AuxDecoder.decodeAux(AuxDecoder.AuxKind.ElfSymbolTabIdxInfo)(mod).map(mapFirst(SymbolRef(_))) /** * A nested map indexed by section name, then symbol index, and returning a symbol uuid. @@ -176,16 +178,16 @@ case class GTIRBResolver(val mod: Module) { .groupMapReduce(kv => kv.head.head)(kv => SortedMap(kv.head.last -> kv.last))(_ ++ _) val symbolEntriesByUuid = decodeAux(AuxKind.ElfSymbolInfo)(mod) - .map(mapFirst(Uuid.Symbol(_))) + .map(mapFirst(SymbolRef(_))) val funcNames = decodeAux(AuxKind.FunctionNames)(mod).map { case (fun, sym) => - Uuid.Function(fun) -> Uuid.Symbol(sym) + FunctionRef(fun) -> SymbolRef(sym) } val funcNamesInverse = funcNames.map(_.swap) val funcEntries = decodeAux(AuxKind.FunctionEntries)(mod).map { case (a, b) => - Uuid.Function(a) -> b.map(Uuid.Block(_)) + FunctionRef(a) -> b.map(BlockRef(_)) } - val entryPoint = Uuid.Block(mod.entryPoint) + val entryPoint = BlockRef(mod.entryPoint) } From 5eb9db6e3604e1c0b1e246d5393944edcb59ebca Mon Sep 17 00:00:00 2001 From: rina Date: Mon, 23 Jun 2025 23:08:42 +1000 Subject: [PATCH 38/51] make it not crash. TODO: global library variables are borked also, we need to deal with COPY properly --- src/main/scala/gtirb/GTIRBReadELF.scala | 15 +++++++++------ src/main/scala/gtirb/GTIRBResolver.scala | 2 +- src/main/scala/ir/dsl/ToScala.scala | 4 ++-- src/main/scala/translating/ReadELFLoader.scala | 4 +++- 4 files changed, 15 insertions(+), 10 deletions(-) diff --git a/src/main/scala/gtirb/GTIRBReadELF.scala b/src/main/scala/gtirb/GTIRBReadELF.scala index c175041f21..552e5eb36a 100644 --- a/src/main/scala/gtirb/GTIRBReadELF.scala +++ b/src/main/scala/gtirb/GTIRBReadELF.scala @@ -22,7 +22,7 @@ import com.grammatech.gtirb.proto.ByteInterval.ByteInterval import com.grammatech.gtirb.proto.Symbol.Symbol.OptionalPayload import scala.collection.mutable -import scala.collection.immutable.SortedMap +import scala.collection.immutable.{SortedMap, SortedSet} class GTIRBReadELF(protected val gtirb: GTIRBResolver) { @@ -85,6 +85,7 @@ class GTIRBReadELF(protected val gtirb: GTIRBResolver) { rela.r_type match { case 1025 | 1026 => Right(ExternalFunction(sym.name, rela.r_offset)) case 1027 => Left((rela.r_offset, rela.r_addend)) + case 1024 => Left((BigInt(0), BigInt(0))) } def getAllSymbols() = { @@ -134,10 +135,10 @@ class GTIRBReadELF(protected val gtirb: GTIRBResolver) { def getGlobals() = { gtirb.symbolEntriesByUuid.view.collect { case (symid, (size, "OBJECT", "GLOBAL", "DEFAULT", idx)) => - val blk = symid.getReferentUuid.get.get - val sec = blk.section - assert(gtirb.mod.sections(idx.toInt - 1) == sec) - SpecGlobal(symid.get.name, (size * 8).toInt, None, blk.address) + val blk = symid.getReferentUuid.get.getOption + // val sec = blk.section + // assert(gtirb.mod.sections(idx.toInt - 1) == sec) + SpecGlobal(symid.get.name, (size * 8).toInt, None, blk.fold(BigInt(-1))(_.address)) }.toSet } @@ -170,7 +171,9 @@ class GTIRBReadELF(protected val gtirb: GTIRBResolver) { val funs = getFunctionEntries() val main = getMainAddress(mainProcedureName) - ReadELFData(syms, exts, globs, funs, offs, main) + val x = SortedSet.from(exts)(Ordering.by(_.toString)) + println(x) + ReadELFData(syms, x, SortedSet.from(globs), funs, offs, main) } private val atSuffix = """@[A-Za-z_\d.]+$""".r diff --git a/src/main/scala/gtirb/GTIRBResolver.scala b/src/main/scala/gtirb/GTIRBResolver.scala index 02fac418f4..8e13d9eb50 100644 --- a/src/main/scala/gtirb/GTIRBResolver.scala +++ b/src/main/scala/gtirb/GTIRBResolver.scala @@ -92,7 +92,7 @@ case class GTIRBResolver(val mod: Module) { * Returns the list of symbol table indices where this symbol can be found. * Each index is a tuple of table name and index within that table. */ - def symTabIdx = symbolTabIdxByUuid(x) + def symTabIdx = symbolTabIdxByUuid.getOrElse(x, Nil) /** * Returns the `.symtab` entry for the given symbol. diff --git a/src/main/scala/ir/dsl/ToScala.scala b/src/main/scala/ir/dsl/ToScala.scala index dfe3fcf8c2..449e36aa34 100644 --- a/src/main/scala/ir/dsl/ToScala.scala +++ b/src/main/scala/ir/dsl/ToScala.scala @@ -84,12 +84,12 @@ given ToScalaString[BigInt] with given [T](using ToScala[T]): ToScalaLines[Seq[T]] with extension (x: Seq[T]) def toScalaLines = - Twine.indentNested("Seq(", x.map(_.toScalaLines), ")") + Twine.indentNested("Seq(", x.view.map(_.toScalaLines), ")") given [T](using ToScala[T]): ToScalaLines[Set[T]] with extension (x: Set[T]) def toScalaLines = - Twine.indentNested("Set(", x.map(_.toScalaLines), ")") + Twine.indentNested("Set(", x.view.map(_.toScalaLines), ")") given [K, V](using ToScala[K], ToScala[V]): ToScalaLines[Map[K, V]] with extension (x: Map[K, V]) diff --git a/src/main/scala/translating/ReadELFLoader.scala b/src/main/scala/translating/ReadELFLoader.scala index 44a453d802..9c59ff5c57 100644 --- a/src/main/scala/translating/ReadELFLoader.scala +++ b/src/main/scala/translating/ReadELFLoader.scala @@ -6,6 +6,7 @@ import boogie.* import specification.* import util.ILLoadingConfig +import scala.collection.immutable.{SortedSet} import scala.jdk.CollectionConverters.* import ir.dsl.given @@ -83,7 +84,8 @@ object ReadELFLoader { if (mainAddress.isEmpty) { throw Exception(s"no ${config.mainProcedureName} function in symbol table") } - ReadELFData(symbolTable, externalFunctions, globalVariables, functionEntries, relocationOffsets, mainAddress.head) + ReadELFData(symbolTable, SortedSet.from(externalFunctions)(Ordering.by(_.toString)), SortedSet.from(globalVariables), + functionEntries, relocationOffsets, mainAddress.head) } def visitRelocationTableExtFunc(ctx: RelocationTableContext): Set[ExternalFunction] = { From 10f68c24b872a5afaf50d1a1dfd62ad2e572ea04 Mon Sep 17 00:00:00 2001 From: rina Date: Mon, 23 Jun 2025 23:31:26 +1000 Subject: [PATCH 39/51] no println --- src/main/scala/gtirb/GTIRBReadELF.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/scala/gtirb/GTIRBReadELF.scala b/src/main/scala/gtirb/GTIRBReadELF.scala index 552e5eb36a..0984967420 100644 --- a/src/main/scala/gtirb/GTIRBReadELF.scala +++ b/src/main/scala/gtirb/GTIRBReadELF.scala @@ -172,7 +172,6 @@ class GTIRBReadELF(protected val gtirb: GTIRBResolver) { val main = getMainAddress(mainProcedureName) val x = SortedSet.from(exts)(Ordering.by(_.toString)) - println(x) ReadELFData(syms, x, SortedSet.from(globs), funs, offs, main) } From bedc54d02a0547a04bc7288a86976307b00112a2 Mon Sep 17 00:00:00 2001 From: rina Date: Wed, 25 Jun 2025 15:03:11 +1000 Subject: [PATCH 40/51] scalafmt --- src/main/scala/translating/ReadELFLoader.scala | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/main/scala/translating/ReadELFLoader.scala b/src/main/scala/translating/ReadELFLoader.scala index 9c59ff5c57..94814ec527 100644 --- a/src/main/scala/translating/ReadELFLoader.scala +++ b/src/main/scala/translating/ReadELFLoader.scala @@ -84,8 +84,14 @@ object ReadELFLoader { if (mainAddress.isEmpty) { throw Exception(s"no ${config.mainProcedureName} function in symbol table") } - ReadELFData(symbolTable, SortedSet.from(externalFunctions)(Ordering.by(_.toString)), SortedSet.from(globalVariables), - functionEntries, relocationOffsets, mainAddress.head) + ReadELFData( + symbolTable, + SortedSet.from(externalFunctions)(Ordering.by(_.toString)), + SortedSet.from(globalVariables), + functionEntries, + relocationOffsets, + mainAddress.head + ) } def visitRelocationTableExtFunc(ctx: RelocationTableContext): Set[ExternalFunction] = { From 28cb1a81ea02327d3c17db8fe3e35b32492503b8 Mon Sep 17 00:00:00 2001 From: rina Date: Wed, 25 Jun 2025 15:56:46 +1000 Subject: [PATCH 41/51] .sorted method for ReadELFData --- src/main/scala/Main.scala | 4 ++-- src/main/scala/gtirb/GTIRBReadELF.scala | 3 +-- src/main/scala/ir/dsl/ToScala.scala | 2 +- .../scala/translating/ReadELFLoader.scala | 24 +++++++++++-------- 4 files changed, 18 insertions(+), 15 deletions(-) diff --git a/src/main/scala/Main.scala b/src/main/scala/Main.scala index afb017aa52..550c565f28 100644 --- a/src/main/scala/Main.scala +++ b/src/main/scala/Main.scala @@ -343,8 +343,8 @@ object Main { return import ir.dsl.given - writeToFile(relf.toScala, relfOut + "-readelf.scala") - gtirb.foreach(x => writeToFile(x.toScala, relfOut + "-gtsrelf.scala")) + writeToFile(relf.sorted.toScala, relfOut + "-readelf.scala") + gtirb.foreach(x => writeToFile(x.sorted.toScala, relfOut + "-gtsrelf.scala")) return } diff --git a/src/main/scala/gtirb/GTIRBReadELF.scala b/src/main/scala/gtirb/GTIRBReadELF.scala index 0984967420..5045c075d2 100644 --- a/src/main/scala/gtirb/GTIRBReadELF.scala +++ b/src/main/scala/gtirb/GTIRBReadELF.scala @@ -171,8 +171,7 @@ class GTIRBReadELF(protected val gtirb: GTIRBResolver) { val funs = getFunctionEntries() val main = getMainAddress(mainProcedureName) - val x = SortedSet.from(exts)(Ordering.by(_.toString)) - ReadELFData(syms, x, SortedSet.from(globs), funs, offs, main) + ReadELFData(syms, exts, globs, funs, offs, main) } private val atSuffix = """@[A-Za-z_\d.]+$""".r diff --git a/src/main/scala/ir/dsl/ToScala.scala b/src/main/scala/ir/dsl/ToScala.scala index 449e36aa34..6781bfc700 100644 --- a/src/main/scala/ir/dsl/ToScala.scala +++ b/src/main/scala/ir/dsl/ToScala.scala @@ -94,7 +94,7 @@ given [T](using ToScala[T]): ToScalaLines[Set[T]] with given [K, V](using ToScala[K], ToScala[V]): ToScalaLines[Map[K, V]] with extension (x: Map[K, V]) def toScalaLines = - val pairs = x.map { case (k, v) => + val pairs = x.view.map { case (k, v) => Twine(k.toScalaLines, " -> ", v.toScalaLines) } Twine.indentNested("Map(", pairs, ")") diff --git a/src/main/scala/translating/ReadELFLoader.scala b/src/main/scala/translating/ReadELFLoader.scala index 94814ec527..ef08a205ca 100644 --- a/src/main/scala/translating/ReadELFLoader.scala +++ b/src/main/scala/translating/ReadELFLoader.scala @@ -6,7 +6,7 @@ import boogie.* import specification.* import util.ILLoadingConfig -import scala.collection.immutable.{SortedSet} +import scala.collection.immutable.{SortedSet, SortedMap} import scala.jdk.CollectionConverters.* import ir.dsl.given @@ -55,7 +55,18 @@ case class ReadELFData( functionEntries: Set[FuncEntry], relocationOffsets: Map[BigInt, BigInt], mainAddress: BigInt -) derives ir.dsl.ToScala +) derives ir.dsl.ToScala { + + def sorted = ReadELFData( + symbolTable, + SortedSet.from(externalFunctions)(Ordering.by(Tuple.fromProductTyped(_))), + SortedSet.from(globalVariables)(Ordering.by(Tuple.fromProductTyped(_))), + SortedSet.from(functionEntries)(Ordering.by(Tuple.fromProductTyped(_))), + SortedMap.from(relocationOffsets), + mainAddress + ) + +} object ReadELFLoader { def visitSyms(ctx: SymsContext, config: ILLoadingConfig): ReadELFData = { @@ -84,14 +95,7 @@ object ReadELFLoader { if (mainAddress.isEmpty) { throw Exception(s"no ${config.mainProcedureName} function in symbol table") } - ReadELFData( - symbolTable, - SortedSet.from(externalFunctions)(Ordering.by(_.toString)), - SortedSet.from(globalVariables), - functionEntries, - relocationOffsets, - mainAddress.head - ) + ReadELFData(symbolTable, externalFunctions, globalVariables, functionEntries, relocationOffsets, mainAddress.head) } def visitRelocationTableExtFunc(ctx: RelocationTableContext): Set[ExternalFunction] = { From f0de62ef70d4bf630e274d608261ca05330bc5f5 Mon Sep 17 00:00:00 2001 From: rina Date: Thu, 26 Jun 2025 15:26:58 +1000 Subject: [PATCH 42/51] working on R_AARCH64_COPY. add SECTION entries too --- src/main/scala/gtirb/AuxDecoder.scala | 2 + src/main/scala/gtirb/GTIRBReadELF.scala | 112 +++++++++++++++++------ src/main/scala/gtirb/GTIRBResolver.scala | 22 ++++- 3 files changed, 107 insertions(+), 29 deletions(-) diff --git a/src/main/scala/gtirb/AuxDecoder.scala b/src/main/scala/gtirb/AuxDecoder.scala index 3092b2e298..681fe9d6ba 100644 --- a/src/main/scala/gtirb/AuxDecoder.scala +++ b/src/main/scala/gtirb/AuxDecoder.scala @@ -40,9 +40,11 @@ object AuxDecoder { "elfSymbolInfo", readMap(readUuid, readTuple(readUint(64), readString, readString, readString, readUint(64))) ) + case SectionProperties extends AuxKind("sectionProperties", readMap(readUuid, readTuple(readUint(64), readUint(64)))) case FunctionEntries extends AuxKind("functionEntries", readMap(readUuid, readSet(readUuid))) case FunctionBlocks extends AuxKind("functionBlocks", readMap(readUuid, readSet(readUuid))) case FunctionNames extends AuxKind("functionNames", readMap(readUuid, readUuid)) + case SymbolForwarding extends AuxKind("symbolForwarding", readMap(readUuid, readUuid)) } type Input = ByteArrayInputStream diff --git a/src/main/scala/gtirb/GTIRBReadELF.scala b/src/main/scala/gtirb/GTIRBReadELF.scala index 5045c075d2..f651536763 100644 --- a/src/main/scala/gtirb/GTIRBReadELF.scala +++ b/src/main/scala/gtirb/GTIRBReadELF.scala @@ -23,6 +23,7 @@ import com.grammatech.gtirb.proto.Symbol.Symbol.OptionalPayload import scala.collection.mutable import scala.collection.immutable.{SortedMap, SortedSet} +import scala.util.chaining.scalaUtilChainingOps class GTIRBReadELF(protected val gtirb: GTIRBResolver) { @@ -36,6 +37,15 @@ class GTIRBReadELF(protected val gtirb: GTIRBResolver) { */ case class Elf64Rela(r_offset: BigInt, r_info: BigInt, r_addend: BigInt, r_sym: Long, r_type: Long) + /** + * https://github.com/ARM-software/abi-aa/blob/main/aaelf64/aaelf64.rst#relocation-types + */ + sealed trait Elf64RelaType(val value: Long) + case object R_AARCH64_COPY extends Elf64RelaType(1024) + case object R_AARCH64_GLOB_DAT extends Elf64RelaType(1025) + case object R_AARCH64_JUMP_SLOT extends Elf64RelaType(1026) + case object R_AARCH64_RELATIVE extends Elf64RelaType(1027) + // https://refspecs.linuxbase.org/elf/gabi4+/ch4.reloc.html // https://github.com/ARM-software/abi-aa/blob/main/aaelf64/aaelf64.rst#relocation-types protected def readRela(bs: AuxDecoder.Input) = @@ -52,6 +62,13 @@ class GTIRBReadELF(protected val gtirb: GTIRBResolver) { case _ => None } + protected def parseAarch64RelaType(x: Long) = x match { + case R_AARCH64_COPY.value => R_AARCH64_COPY + case R_AARCH64_GLOB_DAT.value => R_AARCH64_GLOB_DAT + case R_AARCH64_JUMP_SLOT.value => R_AARCH64_JUMP_SLOT + case R_AARCH64_RELATIVE.value => R_AARCH64_RELATIVE + } + // see also: // https://www.javadoc.io/doc/net.fornwall/jelf/latest/net/fornwall/jelf/ElfSymbol.html // @@ -76,20 +93,18 @@ class GTIRBReadELF(protected val gtirb: GTIRBResolver) { ELFNDX.Section(i) } - /** - * https://github.com/ARM-software/abi-aa/blob/main/aaelf64/aaelf64.rst#dynamic-relocations - */ - def parseRela(rela: Elf64Rela) = - val sym = gtirb.symbolTables(".dynsym")(rela.r_sym.toInt).get + def parseRela(kind: R_AARCH64_JUMP_SLOT.type | R_AARCH64_GLOB_DAT.type, rela: Elf64Rela): ExternalFunction = + val sym = gtirb.getDynSym(rela.r_sym.toInt).get + ExternalFunction(sym.name, rela.r_offset) - rela.r_type match { - case 1025 | 1026 => Right(ExternalFunction(sym.name, rela.r_offset)) - case 1027 => Left((rela.r_offset, rela.r_addend)) - case 1024 => Left((BigInt(0), BigInt(0))) - } + def parseRela(kind: R_AARCH64_RELATIVE.type, rela: Elf64Rela): (BigInt, BigInt) = + (rela.r_offset, rela.r_addend) + + def parseRela(kind: R_AARCH64_COPY.type, rela: Elf64Rela): gtirb.SymbolRef = + gtirb.getDynSym(rela.r_sym.toInt) def getAllSymbols() = { - gtirb.symbolEntriesByUuid + val normalsyms = gtirb.symbolEntriesByUuid.view .flatMap { case (k, pos) => val sym = k.get @@ -103,44 +118,87 @@ class GTIRBReadELF(protected val gtirb: GTIRBResolver) { val (size, ty, bind, vis, shndx) = k.symEntry - ty match { - case "NONE" => None - case ty => + val name = sym.name + + (ty, idx) match { + case ("NONE", _) => None + case (_, None) => None + case (ty, Some(idx)) => Some( ELFSymbol( - idx.getOrElse(-1), + idx, combinedValue, size.toInt, ELFSymType.valueOf(ty), ELFBind.valueOf(bind), ELFVis.valueOf(vis), parseElfNdx(shndx), - sym.name + name ) ) } } + + val sectionsyms = gtirb.mod.sections.view.zipWithIndex.map { case (sec, i) => + val addr = sec.byteIntervals.head.address + val num = i + 1 + + ELFSymbol(num, addr, 0, ELFSymType.SECTION, ELFBind.LOCAL, ELFVis.DEFAULT, ELFNDX.Section(num), sec.name) + } + + (normalsyms ++ sectionsyms) .toList .sortBy(x => x.num) } def getRelocations() = { - val relaDyns = parseRelaTab(gtirb.sectionsByName(".rela.dyn").byteIntervals.head.contents) - val relaPlts = parseRelaTab(gtirb.sectionsByName(".rela.plt").byteIntervals.head.contents) + def getSectionBytes(sectionName: String) = + gtirb.sectionsByName(sectionName).byteIntervals.head.contents + + val relaDyns = getSectionBytes(".rela.dyn").pipe(parseRelaTab) + val relaPlts = getSectionBytes(".rela.plt").pipe(parseRelaTab) - val (offs, exts) = (relaDyns.view ++ relaPlts.view).partitionMap(parseRela) + val relas = (relaDyns ++ relaPlts) + .groupBy(x => parseAarch64RelaType(x.r_type)) + .withDefaultValue(Nil) + + val offs = relas(R_AARCH64_RELATIVE).map(parseRela(R_AARCH64_RELATIVE, _)) + val exts = (relas(R_AARCH64_GLOB_DAT) ++ relas(R_AARCH64_JUMP_SLOT)).map(parseRela(R_AARCH64_JUMP_SLOT, _)) (offs.toMap, exts.toSet) } - def getGlobals() = { - gtirb.symbolEntriesByUuid.view.collect { case (symid, (size, "OBJECT", "GLOBAL", "DEFAULT", idx)) => - val blk = symid.getReferentUuid.get.getOption - // val sec = blk.section - // assert(gtirb.mod.sections(idx.toInt - 1) == sec) - SpecGlobal(symid.get.name, (size * 8).toInt, None, blk.fold(BigInt(-1))(_.address)) + def getGlobals() = + gtirb.symbolEntriesByUuid.view.flatMap { + case (symid, (size, "OBJECT", "GLOBAL", "DEFAULT", idx)) => + + // forwarded object symbols correspond to R_AARCH64_COPY relocations. + // for these, ddisasm produces a `*_copy` symbol. get the original symbol + // name by following the forwarding. + val fwdtarget: Option[gtirb.SymbolRef] = gtirb.symbolForwarding.get(symid) + val name = fwdtarget match { + case Some(fwdid) => fwdid.get.name + case None => symid.get.name + } + + val referentid = symid.getReferentUuid.get + val referent: Option[gtirb.BlockData] = referentid.getOption + referent match { + case Some(blk) => + Some(SpecGlobal(name, (size * 8).toInt, None, blk.address)) + + // if the referent is not a real block, then this is a + // forwarding target symbol. discard, because we generate + // the SpecGlobal from the forwarding source symbol. + case None => + assert( + gtirb.symbolForwardingInverse.contains(symid), + "a symbol with a referent that has no data block should be a forwarding target" + ) + None + } + case _ => None }.toSet - } def getFunctionEntries() = { @@ -165,8 +223,8 @@ class GTIRBReadELF(protected val gtirb: GTIRBResolver) { def getReadELFData(mainProcedureName: String) = { - val syms = getAllSymbols() val (offs, exts) = getRelocations() + val syms = getAllSymbols() val globs = getGlobals() val funs = getFunctionEntries() val main = getMainAddress(mainProcedureName) diff --git a/src/main/scala/gtirb/GTIRBResolver.scala b/src/main/scala/gtirb/GTIRBResolver.scala index 8e13d9eb50..825771bb92 100644 --- a/src/main/scala/gtirb/GTIRBResolver.scala +++ b/src/main/scala/gtirb/GTIRBResolver.scala @@ -56,9 +56,10 @@ case class GTIRBResolver(val mod: Module) { class BlockRef(xs: String | ByteString) extends GTIRBRef("blok", b64(xs)) class FunctionRef(xs: String | ByteString) extends GTIRBRef("func", b64(xs)) class SymbolRef(xs: String | ByteString) extends GTIRBRef("symb", b64(xs)) + class SectionRef(xs: String | ByteString) extends GTIRBRef("sect", b64(xs)) } - import GTIRBRef.* + export GTIRBRef.* /** * Represents a GTIRB code/data block and its parents. In GTIRB, block @@ -165,7 +166,7 @@ case class GTIRBResolver(val mod: Module) { val sectionsByName = mod.sections.map(x => x.name -> x).toMap val symbolTabIdxByUuid: Map[SymbolRef, List[(String, BigInt)]] = - AuxDecoder.decodeAux(AuxDecoder.AuxKind.ElfSymbolTabIdxInfo)(mod).map(mapFirst(SymbolRef(_))) + decodeAux(AuxKind.ElfSymbolTabIdxInfo)(mod).map(mapFirst(SymbolRef(_))) /** * A nested map indexed by section name, then symbol index, and returning a symbol uuid. @@ -190,4 +191,21 @@ case class GTIRBResolver(val mod: Module) { val entryPoint = BlockRef(mod.entryPoint) + def getDynSym(i: Int) = + symbolTables(".dynsym")(i) + + /** + * Symbol forwarding. Keys are "forwarding" symbols which are dynamically-bound to their associated value symbol. + */ + val symbolForwarding = + decodeAux(AuxKind.SymbolForwarding)(mod).map(SymbolRef(_) -> SymbolRef(_)) + + /** + * Inverse symbol forwarding. + * Keys are "target" symbols. + * At runtime, a target's associated symbols will point to the key symbol. + */ + val symbolForwardingInverse = + symbolForwarding.map(_.swap) + } From bb019a19701a312e3c16a6323da515eced962736 Mon Sep 17 00:00:00 2001 From: rina Date: Thu, 26 Jun 2025 16:00:29 +1000 Subject: [PATCH 43/51] fix relocated global objects --- src/main/scala/gtirb/AuxDecoder.scala | 3 ++- src/main/scala/gtirb/GTIRBReadELF.scala | 27 ++++++------------------ src/main/scala/gtirb/GTIRBResolver.scala | 15 ++++++++++++- 3 files changed, 23 insertions(+), 22 deletions(-) diff --git a/src/main/scala/gtirb/AuxDecoder.scala b/src/main/scala/gtirb/AuxDecoder.scala index 681fe9d6ba..988bfa64d1 100644 --- a/src/main/scala/gtirb/AuxDecoder.scala +++ b/src/main/scala/gtirb/AuxDecoder.scala @@ -40,7 +40,8 @@ object AuxDecoder { "elfSymbolInfo", readMap(readUuid, readTuple(readUint(64), readString, readString, readString, readUint(64))) ) - case SectionProperties extends AuxKind("sectionProperties", readMap(readUuid, readTuple(readUint(64), readUint(64)))) + case SectionProperties + extends AuxKind("sectionProperties", readMap(readUuid, readTuple(readUint(64), readUint(64)))) case FunctionEntries extends AuxKind("functionEntries", readMap(readUuid, readSet(readUuid))) case FunctionBlocks extends AuxKind("functionBlocks", readMap(readUuid, readSet(readUuid))) case FunctionNames extends AuxKind("functionNames", readMap(readUuid, readUuid)) diff --git a/src/main/scala/gtirb/GTIRBReadELF.scala b/src/main/scala/gtirb/GTIRBReadELF.scala index f651536763..2578e76ba6 100644 --- a/src/main/scala/gtirb/GTIRBReadELF.scala +++ b/src/main/scala/gtirb/GTIRBReadELF.scala @@ -122,11 +122,11 @@ class GTIRBReadELF(protected val gtirb: GTIRBResolver) { (ty, idx) match { case ("NONE", _) => None - case (_, None) => None - case (ty, Some(idx)) => + // case (_, None) => None + case (ty, idx) => Some( ELFSymbol( - idx, + idx.getOrElse(-100), combinedValue, size.toInt, ELFSymType.valueOf(ty), @@ -146,8 +146,7 @@ class GTIRBReadELF(protected val gtirb: GTIRBResolver) { ELFSymbol(num, addr, 0, ELFSymType.SECTION, ELFBind.LOCAL, ELFVis.DEFAULT, ELFNDX.Section(num), sec.name) } - (normalsyms ++ sectionsyms) - .toList + (normalsyms ++ sectionsyms).toList .sortBy(x => x.num) } @@ -172,20 +171,11 @@ class GTIRBReadELF(protected val gtirb: GTIRBResolver) { gtirb.symbolEntriesByUuid.view.flatMap { case (symid, (size, "OBJECT", "GLOBAL", "DEFAULT", idx)) => - // forwarded object symbols correspond to R_AARCH64_COPY relocations. - // for these, ddisasm produces a `*_copy` symbol. get the original symbol - // name by following the forwarding. - val fwdtarget: Option[gtirb.SymbolRef] = gtirb.symbolForwarding.get(symid) - val name = fwdtarget match { - case Some(fwdid) => fwdid.get.name - case None => symid.get.name - } - val referentid = symid.getReferentUuid.get val referent: Option[gtirb.BlockData] = referentid.getOption referent match { case Some(blk) => - Some(SpecGlobal(name, (size * 8).toInt, None, blk.address)) + Some(SpecGlobal(symid.get.name, (size * 8).toInt, None, blk.address)) // if the referent is not a real block, then this is a // forwarding target symbol. discard, because we generate @@ -200,8 +190,7 @@ class GTIRBReadELF(protected val gtirb: GTIRBResolver) { case _ => None }.toSet - def getFunctionEntries() = { - + def getFunctionEntries() = gtirb.symbolEntriesByUuid.view.collect { case (symid, (size, "FUNC", "GLOBAL", "DEFAULT", idx)) if idx != 0 => @@ -215,11 +204,9 @@ class GTIRBReadELF(protected val gtirb: GTIRBResolver) { FuncEntry(nameSymbol.name, (size * 8).toInt, addr) }.toSet - } - def getMainAddress(mainProcedureName: String) = { + def getMainAddress(mainProcedureName: String) = gtirb.symbolsByName(mainProcedureName).getReferentUuid.get.get.address - } def getReadELFData(mainProcedureName: String) = { diff --git a/src/main/scala/gtirb/GTIRBResolver.scala b/src/main/scala/gtirb/GTIRBResolver.scala index 825771bb92..880d5f935a 100644 --- a/src/main/scala/gtirb/GTIRBResolver.scala +++ b/src/main/scala/gtirb/GTIRBResolver.scala @@ -87,7 +87,18 @@ case class GTIRBResolver(val mod: Module) { def isProxyBlock = proxyBlockUuids.contains(x) extension (x: SymbolRef) - def get = symbolsByUuid(x) + def get: Symbol = { + val sym = symbolsByUuid(x) + + // XXX: forwarded object symbols correspond to R_AARCH64_COPY relocations. + // for these, ddisasm produces a `*_copy` symbol. get the original symbol + // name by following the forwarding. + (getForwardingTarget, symEntry) match { + case (Some(fwd), (_, "OBJECT", "GLOBAL", "DEFAULT", _)) => + sym.copy(name = fwd.get.name) + case _ => sym + } + } /** * Returns the list of symbol table indices where this symbol can be found. @@ -136,6 +147,8 @@ case class GTIRBResolver(val mod: Module) { */ def getFunction = funcNamesInverse.get(x) + def getForwardingTarget = symbolForwarding.get(x) + extension (x: FunctionRef) /** * Gets the set of entry block UUIDs for the given function. From 9aa41b45615a746927400f6adc1b9e16f9e42b3c Mon Sep 17 00:00:00 2001 From: rina Date: Thu, 26 Jun 2025 16:05:08 +1000 Subject: [PATCH 44/51] checkset, remove -100 symbols --- src/main/scala/gtirb/GTIRBReadELF.scala | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/main/scala/gtirb/GTIRBReadELF.scala b/src/main/scala/gtirb/GTIRBReadELF.scala index 2578e76ba6..a78c42b901 100644 --- a/src/main/scala/gtirb/GTIRBReadELF.scala +++ b/src/main/scala/gtirb/GTIRBReadELF.scala @@ -122,11 +122,11 @@ class GTIRBReadELF(protected val gtirb: GTIRBResolver) { (ty, idx) match { case ("NONE", _) => None - // case (_, None) => None - case (ty, idx) => + case (_, None) => None + case (ty, Some(idx)) => Some( ELFSymbol( - idx.getOrElse(-100), + idx, combinedValue, size.toInt, ELFSymType.valueOf(ty), @@ -236,8 +236,11 @@ class GTIRBReadELF(protected val gtirb: GTIRBResolver) { Some(sym.copy(name = atSuffix.replaceFirstIn(sym.name, ""))) case _ => None } + val globs = relf.globalVariables.map { x => + x.copy(name = atSuffix.replaceFirstIn(x.name, "")) + } - relf.copy(externalFunctions = exts, symbolTable = syms) + relf.copy(externalFunctions = exts, symbolTable = syms, globalVariables = globs) } /** @@ -266,7 +269,7 @@ class GTIRBReadELF(protected val gtirb: GTIRBResolver) { checkEq(g.mainAddress, o.mainAddress, "main address differs") checkEq(g.functionEntries, o.functionEntries, "function entries differ") checkEq(g.relocationOffsets, o.relocationOffsets, "relocations differ") - checkEq(g.globalVariables, o.globalVariables, "global variables differ") + checkSet(g.globalVariables, o.globalVariables, "global variables differ") checkSet(g.externalFunctions, o.externalFunctions, "external functions differ") checkSet(g.symbolTable.toSet, o.symbolTable.toSet, "symbol tables differ") From 10524003cbe467992f5eda037bde66cb7d228408 Mon Sep 17 00:00:00 2001 From: rina Date: Thu, 26 Jun 2025 16:10:05 +1000 Subject: [PATCH 45/51] scalafmt --- src/main/scala/Main.scala | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/main/scala/Main.scala b/src/main/scala/Main.scala index 550c565f28..96c6b58bff 100644 --- a/src/main/scala/Main.scala +++ b/src/main/scala/Main.scala @@ -343,7 +343,13 @@ object Main { return import ir.dsl.given - writeToFile(relf.sorted.toScala, relfOut + "-readelf.scala") + writeToFile( + relf.sorted.toScala + .replace("@GLIBC_2.17", "") + .replace("@GLIBC_2.38", "") + .replace("@GLIBC_2.34", ""), + relfOut + "-readelf.scala" + ) gtirb.foreach(x => writeToFile(x.sorted.toScala, relfOut + "-gtsrelf.scala")) return } From 53275d3a8e30c56466917b748f4b27b29f66c511 Mon Sep 17 00:00:00 2001 From: rina Date: Thu, 26 Jun 2025 16:19:46 +1000 Subject: [PATCH 46/51] urls in doc comments --- src/main/scala/gtirb/GTIRBReadELF.scala | 46 ++++++++++++++----------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/src/main/scala/gtirb/GTIRBReadELF.scala b/src/main/scala/gtirb/GTIRBReadELF.scala index a78c42b901..18b14f0dd2 100644 --- a/src/main/scala/gtirb/GTIRBReadELF.scala +++ b/src/main/scala/gtirb/GTIRBReadELF.scala @@ -25,6 +25,20 @@ import scala.collection.mutable import scala.collection.immutable.{SortedMap, SortedSet} import scala.util.chaining.scalaUtilChainingOps +/** + * Responsible for interpreting the GTIRB's symbol information + * and producing ELF information in a format matching [[translating.ReadELFLoader]]. + * + * **Useful links:** + * + * - Full ELF64 specification, useful for symbol kinds/visibility/binding: https://irix7.com/techpubs/007-4658-001.pdf + * - Full ELF32 specification: https://refspecs.linuxfoundation.org/elf/elf.pdf + * - ELF relocation specification, for relocation struct definition: https://refspecs.linuxbase.org/elf/gabi4+/ch4.reloc.html + * - Aarch64 ELF supplement, for relocation types: https://github.com/ARM-software/abi-aa/blob/main/aaelf64/aaelf64.rst#relocation-types + * - An ELF cheatsheet: https://gist.github.com/x0nu11byt3/bcb35c3de461e5fb66173071a2379779 + * - elf man page, extra details: https://www.man7.org/linux/man-pages/man5/elf.5.html + * + */ class GTIRBReadELF(protected val gtirb: GTIRBResolver) { /** @@ -38,6 +52,7 @@ class GTIRBReadELF(protected val gtirb: GTIRBResolver) { case class Elf64Rela(r_offset: BigInt, r_info: BigInt, r_addend: BigInt, r_sym: Long, r_type: Long) /** + * An Aarch64 relocation type, with constants from: * https://github.com/ARM-software/abi-aa/blob/main/aaelf64/aaelf64.rst#relocation-types */ sealed trait Elf64RelaType(val value: Long) @@ -46,8 +61,6 @@ class GTIRBReadELF(protected val gtirb: GTIRBResolver) { case object R_AARCH64_JUMP_SLOT extends Elf64RelaType(1026) case object R_AARCH64_RELATIVE extends Elf64RelaType(1027) - // https://refspecs.linuxbase.org/elf/gabi4+/ch4.reloc.html - // https://github.com/ARM-software/abi-aa/blob/main/aaelf64/aaelf64.rst#relocation-types protected def readRela(bs: AuxDecoder.Input) = import AuxDecoder.* val (r_offset, r_info, r_addend) = readTuple(readUint(64), readUint(64), readUint(64))(bs) @@ -62,6 +75,9 @@ class GTIRBReadELF(protected val gtirb: GTIRBResolver) { case _ => None } + /** + * Parsers an Aarch64 relocation type integer. See [[Elf64RelaType]] for constants. + */ protected def parseAarch64RelaType(x: Long) = x match { case R_AARCH64_COPY.value => R_AARCH64_COPY case R_AARCH64_GLOB_DAT.value => R_AARCH64_GLOB_DAT @@ -69,19 +85,8 @@ class GTIRBReadELF(protected val gtirb: GTIRBResolver) { case R_AARCH64_RELATIVE.value => R_AARCH64_RELATIVE } - // see also: - // https://www.javadoc.io/doc/net.fornwall/jelf/latest/net/fornwall/jelf/ElfSymbol.html - // - // https://gist.github.com/x0nu11byt3/bcb35c3de461e5fb66173071a2379779 - // - // https://www.man7.org/linux/man-pages/man5/elf.5.html - - // Full ELF32 specification: https://refspecs.linuxfoundation.org/elf/elf.pdf - - // Full ELF64 specification: https://irix7.com/techpubs/007-4658-001.pdf - /** - * https://refspecs.linuxfoundation.org/elf/elf.pdf + * https://refspecs.linuxfoundation.org/elf/elf.pdf. * Figure 1-7. Special Section Indexes */ protected def parseElfNdx(n: BigInt) = n.toInt match { @@ -222,8 +227,9 @@ class GTIRBReadELF(protected val gtirb: GTIRBResolver) { private val atSuffix = """@[A-Za-z_\d.]+$""".r /** - * Strips away some information from `readelf`'s [[translating.ReadELFData]] - * which is not so important and not produced by the GTIRB ELF loader. + * Strips away some information from [[translating.ReadELFData]] + * which is not so important and causes spurious mismatches between the two + * ELF loaders. * * For example, this throws away symbols of type SECTION and symbols beginning with `$`. * It also strips the `@GLIBC_XX.X` suffix from symbol names. @@ -245,8 +251,8 @@ class GTIRBReadELF(protected val gtirb: GTIRBResolver) { /** * Determines whether the current ReadELFData is compatible with - * a given reference ReadELFData. That is, whether the The given reference object is - * assumed to be the gold standard + * a given reference ReadELFData. That is, whether the two ELF datas are + * equivalent when normalised ([[normalisedRelf]]). */ def checkReadELFCompatibility(gtirbRelf: ReadELFData, referenceRelf: ReadELFData): Boolean = { var ok = true @@ -267,8 +273,8 @@ class GTIRBReadELF(protected val gtirb: GTIRBResolver) { val g = normaliseRelf(gtirbRelf) val o = normaliseRelf(referenceRelf) checkEq(g.mainAddress, o.mainAddress, "main address differs") - checkEq(g.functionEntries, o.functionEntries, "function entries differ") - checkEq(g.relocationOffsets, o.relocationOffsets, "relocations differ") + checkSet(g.functionEntries, o.functionEntries, "function entries differ") + checkSet(g.relocationOffsets.toSet, o.relocationOffsets.toSet, "relocations differ") checkSet(g.globalVariables, o.globalVariables, "global variables differ") checkSet(g.externalFunctions, o.externalFunctions, "external functions differ") checkSet(g.symbolTable.toSet, o.symbolTable.toSet, "symbol tables differ") From 62cb830699b079d8e1e50fd6cda0f8b88ff87c94 Mon Sep 17 00:00:00 2001 From: rina Date: Thu, 26 Jun 2025 16:21:39 +1000 Subject: [PATCH 47/51] type annotations --- src/main/scala/gtirb/GTIRBReadELF.scala | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/main/scala/gtirb/GTIRBReadELF.scala b/src/main/scala/gtirb/GTIRBReadELF.scala index 18b14f0dd2..f029401cd1 100644 --- a/src/main/scala/gtirb/GTIRBReadELF.scala +++ b/src/main/scala/gtirb/GTIRBReadELF.scala @@ -108,7 +108,7 @@ class GTIRBReadELF(protected val gtirb: GTIRBResolver) { def parseRela(kind: R_AARCH64_COPY.type, rela: Elf64Rela): gtirb.SymbolRef = gtirb.getDynSym(rela.r_sym.toInt) - def getAllSymbols() = { + def getAllSymbols(): List[ELFSymbol] = { val normalsyms = gtirb.symbolEntriesByUuid.view .flatMap { case (k, pos) => val sym = k.get @@ -155,7 +155,7 @@ class GTIRBReadELF(protected val gtirb: GTIRBResolver) { .sortBy(x => x.num) } - def getRelocations() = { + def getRelocations(): (Map[BigInt, BigInt], Set[ExternalFunction]) = { def getSectionBytes(sectionName: String) = gtirb.sectionsByName(sectionName).byteIntervals.head.contents @@ -172,7 +172,7 @@ class GTIRBReadELF(protected val gtirb: GTIRBResolver) { (offs.toMap, exts.toSet) } - def getGlobals() = + def getGlobals(): Set[SpecGlobal] = gtirb.symbolEntriesByUuid.view.flatMap { case (symid, (size, "OBJECT", "GLOBAL", "DEFAULT", idx)) => @@ -195,7 +195,7 @@ class GTIRBReadELF(protected val gtirb: GTIRBResolver) { case _ => None }.toSet - def getFunctionEntries() = + def getFunctionEntries(): Set[FuncEntry] = gtirb.symbolEntriesByUuid.view.collect { case (symid, (size, "FUNC", "GLOBAL", "DEFAULT", idx)) if idx != 0 => @@ -210,10 +210,10 @@ class GTIRBReadELF(protected val gtirb: GTIRBResolver) { FuncEntry(nameSymbol.name, (size * 8).toInt, addr) }.toSet - def getMainAddress(mainProcedureName: String) = + def getMainAddress(mainProcedureName: String): BigInt = gtirb.symbolsByName(mainProcedureName).getReferentUuid.get.get.address - def getReadELFData(mainProcedureName: String) = { + def getReadELFData(mainProcedureName: String): ReadELFData = { val (offs, exts) = getRelocations() val syms = getAllSymbols() From 3b9916913bbe001bb3211a22a810fce621d0f0a9 Mon Sep 17 00:00:00 2001 From: rina Date: Thu, 26 Jun 2025 16:44:39 +1000 Subject: [PATCH 48/51] format --- src/main/scala/Main.scala | 1 - src/main/scala/gtirb/GTIRBReadELF.scala | 25 ++++--------------- src/main/scala/gtirb/GTIRBResolver.scala | 19 ++++---------- .../scala/specification/Specification.scala | 1 - .../scala/translating/ReadELFLoader.scala | 5 ++-- 5 files changed, 12 insertions(+), 39 deletions(-) diff --git a/src/main/scala/Main.scala b/src/main/scala/Main.scala index a0ece9b6be..f7c2b7188a 100644 --- a/src/main/scala/Main.scala +++ b/src/main/scala/Main.scala @@ -350,7 +350,6 @@ object Main { if (relfOut.trim.isEmpty) return - import ir.dsl.given writeToFile( relf.sorted.toScala .replace("@GLIBC_2.17", "") diff --git a/src/main/scala/gtirb/GTIRBReadELF.scala b/src/main/scala/gtirb/GTIRBReadELF.scala index f029401cd1..38e490d18a 100644 --- a/src/main/scala/gtirb/GTIRBReadELF.scala +++ b/src/main/scala/gtirb/GTIRBReadELF.scala @@ -1,28 +1,13 @@ package gtirb -import util.Logger -import gtirb.AuxDecoder -import gtirb.AuxDecoder.{AuxKind, decodeAux} - -import translating.{ELFSymType, ELFBind, ELFVis, ELFNDX, ELFSymbol, ReadELFData} +import boogie.SpecGlobal +import com.google.protobuf.ByteString +import com.grammatech.gtirb.proto.CFG.EdgeType.* import specification.{ExternalFunction, FuncEntry} -import boogie.{SpecGlobal} +import translating.{ELFBind, ELFNDX, ELFSymType, ELFSymbol, ELFVis, ReadELFData} +import util.Logger import java.io.ByteArrayInputStream - -import com.google.protobuf.ByteString -import com.grammatech.gtirb.proto.CFG.EdgeType.* -import com.grammatech.gtirb.proto.CFG.CFG -import com.grammatech.gtirb.proto.CFG.Edge -import com.grammatech.gtirb.proto.CFG.EdgeLabel -import com.grammatech.gtirb.proto.Module.Module -import com.grammatech.gtirb.proto.Symbol.Symbol -import com.grammatech.gtirb.proto.ByteInterval.Block -import com.grammatech.gtirb.proto.ByteInterval.ByteInterval -import com.grammatech.gtirb.proto.Symbol.Symbol.OptionalPayload - -import scala.collection.mutable -import scala.collection.immutable.{SortedMap, SortedSet} import scala.util.chaining.scalaUtilChainingOps /** diff --git a/src/main/scala/gtirb/GTIRBResolver.scala b/src/main/scala/gtirb/GTIRBResolver.scala index 880d5f935a..35062b7863 100644 --- a/src/main/scala/gtirb/GTIRBResolver.scala +++ b/src/main/scala/gtirb/GTIRBResolver.scala @@ -1,25 +1,16 @@ package gtirb -import gtirb.AuxDecoder -import gtirb.AuxDecoder.{AuxKind, decodeAux} - -import java.io.ByteArrayInputStream - import com.google.protobuf.ByteString +import com.grammatech.gtirb.proto.ByteInterval.{Block, ByteInterval} import com.grammatech.gtirb.proto.CFG.EdgeType.* -import com.grammatech.gtirb.proto.CFG.CFG -import com.grammatech.gtirb.proto.CFG.Edge -import com.grammatech.gtirb.proto.CFG.EdgeLabel -import com.grammatech.gtirb.proto.Module.Module -import com.grammatech.gtirb.proto.Symbol.Symbol -import com.grammatech.gtirb.proto.Section.Section -import com.grammatech.gtirb.proto.ByteInterval.Block import com.grammatech.gtirb.proto.CodeBlock.CodeBlock import com.grammatech.gtirb.proto.DataBlock.DataBlock -import com.grammatech.gtirb.proto.ByteInterval.ByteInterval +import com.grammatech.gtirb.proto.Module.Module +import com.grammatech.gtirb.proto.Section.Section +import com.grammatech.gtirb.proto.Symbol.Symbol import com.grammatech.gtirb.proto.Symbol.Symbol.OptionalPayload +import gtirb.AuxDecoder.{AuxKind, decodeAux} -import scala.collection.mutable import scala.collection.immutable.SortedMap /** diff --git a/src/main/scala/specification/Specification.scala b/src/main/scala/specification/Specification.scala index 8f138a06b2..2b46673765 100644 --- a/src/main/scala/specification/Specification.scala +++ b/src/main/scala/specification/Specification.scala @@ -3,7 +3,6 @@ package specification import boogie.* import ir.* import ir.dsl.given -import util.Logger trait SymbolTableEntry { val name: String diff --git a/src/main/scala/translating/ReadELFLoader.scala b/src/main/scala/translating/ReadELFLoader.scala index 99f47584a0..27102f9868 100644 --- a/src/main/scala/translating/ReadELFLoader.scala +++ b/src/main/scala/translating/ReadELFLoader.scala @@ -2,14 +2,13 @@ package translating import Parsers.ReadELFParser.* import boogie.* +import ir.dsl.given import specification.* import util.{ILLoadingConfig, Logger} -import scala.collection.immutable.{SortedSet, SortedMap} +import scala.collection.immutable.{SortedMap, SortedSet} import scala.jdk.CollectionConverters.* -import ir.dsl.given - /** https://refspecs.linuxfoundation.org/elf/elf.pdf */ From e4afa500f10ec5aa838ba7462e4d6a5937b2d22e Mon Sep 17 00:00:00 2001 From: rina Date: Fri, 27 Jun 2025 13:54:18 +1000 Subject: [PATCH 49/51] reformat --- src/test/scala/IrreducibleLoop.scala | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/test/scala/IrreducibleLoop.scala b/src/test/scala/IrreducibleLoop.scala index 9ad69243fe..f200245a40 100644 --- a/src/test/scala/IrreducibleLoop.scala +++ b/src/test/scala/IrreducibleLoop.scala @@ -1,11 +1,8 @@ -import org.scalatest.funsuite.AnyFunSuite -import util.{ILLoadingConfig, IRLoading, LogLevel, Logger, PerformanceTimer, RunUtils} -import translating.{BAPToIR, ReadELFData} import analysis.LoopDetector import ir.{Block, Program, dotBlockGraph} import org.scalatest.funsuite.AnyFunSuite import test_util.{BASILTest, CaptureOutput} -import translating.BAPToIR +import translating.{BAPToIR, ReadELFData} import util.{ILLoadingConfig, IRLoading, LogLevel, Logger} import scala.sys.process.* From b7a51653bb1c5af131fe1bfadd80f4e84a72e98e Mon Sep 17 00:00:00 2001 From: rina Date: Tue, 8 Jul 2025 14:35:00 +1000 Subject: [PATCH 50/51] touch scaladoc mappings --- basilmill/basildocs.mill | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/basilmill/basildocs.mill b/basilmill/basildocs.mill index a6bc1abf31..a98901f7cf 100644 --- a/basilmill/basildocs.mill +++ b/basilmill/basildocs.mill @@ -48,9 +48,9 @@ trait BasilDocs extends ScalaModule { def scalaDocExternalMappingOptions = Task { val defaultExternals = Seq( - ".*scala/.*::scaladoc3::https://scala-lang.org/api/3.3_LTS/", - "java/.*::javadoc::https://docs.oracle.com/en/java/javase/17/docs/api/java.base/", - ".*com/google/protobuf.*::javadoc::https://protobuf.dev/reference/java/api-docs/", + ".*/scala/.*::scaladoc3::https://scala-lang.org/api/3.3_LTS/", + ".*/java/.*::javadoc::https://docs.oracle.com/en/java/javase/17/docs/api/java.base/", + ".*/com/google/protobuf.*::javadoc::https://protobuf.dev/reference/java/api-docs/", ) val externals = defaultExternals ++ docsRegexes().map { case (path, regex) => s"$regex::$baseUrl/$path" From 71e1cf564accaf653ca25c3618393e877430cbb7 Mon Sep 17 00:00:00 2001 From: rina Date: Tue, 8 Jul 2025 15:49:02 +1000 Subject: [PATCH 51/51] --gts-relf argument to use inputFile as relfFile the argument works in this way: ``` # --load-directory-* defaults to using .relf file if present ./mill -w run --load-directory-gtirb src/test/incorrect/nestedifglobal/clang # ... if relf and gts is given, --gts-relf directs Basil to use gtirb-as-relf ./mill -w run --load-directory-gtirb src/test/incorrect/nestedifglobal/clang --gts-relf # if only gtirb input is given, Basil will also use that for elf. ./mill -w run -i src/test/incorrect/nestedifglobal/clang/nestedifglobal.gts ``` --- src/main/scala/Main.scala | 61 +++++++++++++++++------ src/main/scala/gtirb/GTIRBReadELF.scala | 4 ++ src/main/scala/util/BASILConfig.scala | 15 +++++- src/main/scala/util/RunUtils.scala | 65 +++++++++++++++---------- 4 files changed, 104 insertions(+), 41 deletions(-) diff --git a/src/main/scala/Main.scala b/src/main/scala/Main.scala index 3d112debf4..bba6fb8925 100644 --- a/src/main/scala/Main.scala +++ b/src/main/scala/Main.scala @@ -10,6 +10,7 @@ import util.{ BoogieMemoryAccessMode, DSAConfig, DebugDumpIRLogger, + FrontendMode, ILLoadingConfig, IRLoading, LogLevel, @@ -146,6 +147,12 @@ object Main { doc = "Switch version of procedure rely/guarantee checks to emit. (function|ifblock)" ) procedureRG: Option[String], + @arg( + name = "gts-relf", + doc = + "Use .gts file for obtaining ELF symbol information (overrides --relf) (defaults to true if using GTIRB input and no --relf)" + ) + useGTIRBReadELF: Flag, @arg(name = "verbose", short = 'v', doc = "Show extra debugging logs (the same as -vl log)") verbose: Flag, @arg( @@ -339,7 +346,7 @@ object Main { val boogieGeneratorConfig = BoogieGeneratorConfig(boogieMemoryAccessMode, true, rely, conf.threadSplit.value, conf.noif.value) - val loadingInputs = if (conf.bapInputDirName.isDefined) then { + var loadingInputs = if (conf.bapInputDirName.isDefined) then { loadDirectory(ChooseInput.Bap, conf.bapInputDirName.get) } else if (conf.gtirbInputDirName.isDefined) then { @@ -354,30 +361,56 @@ object Main { ) } + val isGTIRB = loadingInputs.frontendMode == FrontendMode.Gtirb + + // NOTE: --dump-relf ignores --gts-relf, to ensure that the output ELF files are correctly named conf.dumpRelf match { case None => () case Some(relfOut) => - val relfFile = loadingInputs.relfFile.getOrElse { - throw IllegalArgumentException("--dump-relf requires --relf") - } + + val gtirbRelfFile = Some(loadingInputs.inputFile).filter(_ => isGTIRB) + val realRelfFile = loadingInputs.relfFile + Logger.setLevel(LogLevel.DEBUG) - val (relf, gtirb) = IRLoading.loadReadELFWithGTIRB(relfFile, loadingInputs) + val (relf, gtirb) = (realRelfFile, gtirbRelfFile) match { + case (Some(relfFile), _) => + val (a, b) = IRLoading.loadReadELFWithGTIRB(relfFile, loadingInputs) + (Some(a), b) + case (None, Some(_)) => (None, Some(IRLoading.loadGTIRBReadELF(loadingInputs))) + case _ => throw IllegalArgumentException("--dump-relf requires either --relf or a GTIRB input") + } - // skip writing files if the given path is an empty string + // skip writing files if the given path is an empty string. this checks compatibility and exits. if (relfOut.trim.isEmpty) return - writeToFile( - relf.sorted.toScala - .replace("@GLIBC_2.17", "") - .replace("@GLIBC_2.38", "") - .replace("@GLIBC_2.34", ""), - relfOut + "-readelf.scala" - ) - gtirb.foreach(x => writeToFile(x.sorted.toScala, relfOut + "-gtsrelf.scala")) + relf match { + case Some(relf) => + writeToFile( + relf.sorted.toScala + .replace("@GLIBC_2.17", "") + .replace("@GLIBC_2.38", "") + .replace("@GLIBC_2.34", ""), + relfOut + "-readelf.scala" + ) + case None => Logger.warn(s"Failed to load .relf information, $relfOut-readelf.scala not written") + } + gtirb match { + case Some(relf) => writeToFile(relf.sorted.toScala, relfOut + "-gtsrelf.scala") + case None => Logger.warn(s"Failed to load GTIRB information, $relfOut-gtsrelf.scala not written") + } return } + // patch in gtirb-as-relf if directed or if relf is omitted but we are using gtirb. + // NOTE: this must be done early, because lots of later places make checks about loadingInputs.relfFile. + if (conf.useGTIRBReadELF.value || (isGTIRB && loadingInputs.relfFile.isEmpty)) { + if (!isGTIRB) { + throw IllegalArgumentException("--gts-relf requires a GTIRB input") + } + loadingInputs = loadingInputs.copy(relfFile = Some(loadingInputs.inputFile)) + } + if (loadingInputs.specFile.isDefined && loadingInputs.relfFile.isEmpty) { throw IllegalArgumentException("--spec requires --relf") } diff --git a/src/main/scala/gtirb/GTIRBReadELF.scala b/src/main/scala/gtirb/GTIRBReadELF.scala index 38e490d18a..73abb0c797 100644 --- a/src/main/scala/gtirb/GTIRBReadELF.scala +++ b/src/main/scala/gtirb/GTIRBReadELF.scala @@ -209,6 +209,10 @@ class GTIRBReadELF(protected val gtirb: GTIRBResolver) { ReadELFData(syms, exts, globs, funs, offs, main) } +} + +object GTIRBReadELF { + private val atSuffix = """@[A-Za-z_\d.]+$""".r /** diff --git a/src/main/scala/util/BASILConfig.scala b/src/main/scala/util/BASILConfig.scala index 678492616b..f606368a25 100644 --- a/src/main/scala/util/BASILConfig.scala +++ b/src/main/scala/util/BASILConfig.scala @@ -27,7 +27,20 @@ case class ILLoadingConfig( trimEarly: Boolean = false, gtirbLiftOffline: Boolean = false, pcTracking: PCTrackingOption = PCTrackingOption.None -) +) { + lazy val frontendMode = + if inputFile.endsWith(".gts") then { + FrontendMode.Gtirb + } else if inputFile.endsWith(".gtirb") then { + FrontendMode.Gtirb + } else if inputFile.endsWith(".adt") then { + FrontendMode.Bap + } else if (inputFile.endsWith(".il")) { + FrontendMode.Basil + } else { + throw Exception(s"input file name ${inputFile} must be an .adt, .gts or .gtirb file") + } +} case class StaticAnalysisConfig( dumpILToPath: Option[String] = None, diff --git a/src/main/scala/util/RunUtils.scala b/src/main/scala/util/RunUtils.scala index 68c93243e4..e870d67a6e 100644 --- a/src/main/scala/util/RunUtils.scala +++ b/src/main/scala/util/RunUtils.scala @@ -6,6 +6,7 @@ import analysis.{Interval as _, *} import bap.* import boogie.* import com.grammatech.gtirb.proto.IR.IR +import gtirb.{GTIRBReadELF, GTIRBResolver} import ir.* import ir.dsl.given import ir.eval.* @@ -109,25 +110,24 @@ object IRLoading { */ def load(q: ILLoadingConfig): IRContext = { - val mode = if q.inputFile.endsWith(".gts") then { - FrontendMode.Gtirb - } else if q.inputFile.endsWith(".gtirb") then { - if (!q.gtirbLiftOffline) { - throw IllegalArgumentException(".gtirb input requires --lifter") - } - FrontendMode.Gtirb - } else if q.inputFile.endsWith(".adt") then { - FrontendMode.Bap - } else if (q.inputFile.endsWith(".il")) { - FrontendMode.Basil - } else { - throw Exception(s"input file name ${q.inputFile} must be an .adt, .gts or .gtirb file") + val mode = q.frontendMode + if (q.inputFile.endsWith(".gtirb") && !q.gtirbLiftOffline) { + throw IllegalArgumentException(".gtirb input requires --lifter") } val (mainAddress, makeContext) = q.relfFile match { case Some(relf) => { - val ReadELFData(symbols, externalFunctions, globals, funcEntries, globalOffsets, mainAddress) = + + // allow loading elf from inputFile if using GTIRB mode. + val relfData = if (relf == q.inputFile && mode == FrontendMode.Gtirb) { + Logger.info("[!] Using ELF data from GTIRB: " + q.inputFile) + IRLoading.loadGTIRBReadELF(q) + } else { + Logger.info("[!] Using ELF data from relf: " + relf) IRLoading.loadReadELF(relf, q) + } + + val ReadELFData(symbols, externalFunctions, globals, funcEntries, globalOffsets, mainAddress) = relfData def continuation(ctx: IRContext) = val specification = IRLoading.loadSpecification(q.specFile, ctx.program, globals) @@ -136,7 +136,9 @@ object IRLoading { (Some(mainAddress), continuation) } case None if mode == FrontendMode.Gtirb => { - Logger.warn("RELF not provided, recommended for GTIRB input") + Logger.warn( + "RELF input not provided, this is not recommended! To provide a RELF input, specify --relf or --gts-relf." + ) (None, (x: IRContext) => x) } case None => { @@ -212,6 +214,22 @@ object IRLoading { GTIRBConverter.createIR() } + /** Loads ELF data from the GTIRB input file. */ + def loadGTIRBReadELF(config: ILLoadingConfig): ReadELFData = { + val ir = IR.parseFrom(FileInputStream(config.inputFile)) + if (ir.modules.length != 1) { + Logger.warn(s"GTIRB file ${config.inputFile} unexpectedly has ${ir.modules.length} modules") + } + + val gtirb = GTIRBResolver(ir.modules.head) + val gtirbRelfLoader = GTIRBReadELF(gtirb) + gtirbRelfLoader.getReadELFData(config.mainProcedureName) + } + + /** + * Loads ELF data from *both* .relf and .gts (if using GTIRB input). If both + * sources load successfully, compares them and warns on any differences. + */ def loadReadELFWithGTIRB(fileName: String, config: ILLoadingConfig): (ReadELFData, Option[ReadELFData]) = { val lexer = ReadELFLexer(CharStreams.fromFileName(fileName)) val tokens = CommonTokenStream(lexer) @@ -221,17 +239,9 @@ object IRLoading { val relf = ReadELFLoader.visitSyms(parser.syms(), config) - val gtirbRelf = if (config.inputFile.endsWith(".gts")) { - val ir = IR.parseFrom(FileInputStream(config.inputFile)) - if (ir.modules.length != 1) { - Logger.warn(s"GTIRB file ${config.inputFile} unexpectedly has ${ir.modules.length} modules") - } - - val gtirb = GTIRBResolver(ir.modules.head) - val gtirbRelfLoader = GTIRBReadELF(gtirb) - val gtirbRelf = gtirbRelfLoader.getReadELFData(config.mainProcedureName) - - gtirbRelfLoader.checkReadELFCompatibility(gtirbRelf, relf) + val gtirbRelf = if (config.inputFile.endsWith(".gts") || config.inputFile.endsWith(".gtirb")) { + val gtirbRelf = loadGTIRBReadELF(config) + GTIRBReadELF.checkReadELFCompatibility(gtirbRelf, relf) Some(gtirbRelf) } else { None @@ -240,6 +250,9 @@ object IRLoading { (relf, gtirbRelf) } + /** + * Loads ELF data from .relf. + */ def loadReadELF(fileName: String, config: ILLoadingConfig) = loadReadELFWithGTIRB(fileName, config)._1