Skip to content
Merged
Show file tree
Hide file tree
Changes from 42 commits
Commits
Show all changes
57 commits
Select commit Hold shift + click to select a range
595756a
case class readelf, toscala readelf, --dump-relf
katrinafyi Jun 20, 2025
ba5dec4
we can get external function symbols. TODO: dig into auxdata
katrinafyi Jun 20, 2025
167325f
new AuxDecoder for compositional decoding of AuxData
katrinafyi Jun 20, 2025
7397b88
use auxdecoder in gtirb loader
katrinafyi Jun 20, 2025
efdf667
scalafmt
katrinafyi Jun 20, 2025
19df41e
i cant tell if this is shorter
katrinafyi Jun 20, 2025
191730d
foldLeft1 is just reduce - allie
katrinafyi Jun 20, 2025
fe91ff6
Merge remote-tracking branch 'origin/mapdecoder2' into gtirb-relf
katrinafyi Jun 20, 2025
b94a1a4
what the fkkkkkkkkkkkkkkkkkk
katrinafyi Jun 20, 2025
cebe023
kaitai https://formats.kaitai.io/elf/java.html
katrinafyi Jun 20, 2025
31e0cfe
kaitai integration. ASS BECAUSE SCALA [no ci]
katrinafyi Jun 20, 2025
f67df70
Revert "kaitai integration. ASS BECAUSE SCALA [no ci]"
katrinafyi Jun 21, 2025
6adf92d
Revert "kaitai https://formats.kaitai.io/elf/java.html"
katrinafyi Jun 21, 2025
677bea2
groupMapReduce
katrinafyi Jun 21, 2025
1f80736
change r_type to long, add docs. BTW, ...
katrinafyi Jun 21, 2025
856616c
unfold
katrinafyi Jun 21, 2025
1d280c8
a lot more work. all parts from readelfloader work now
katrinafyi Jun 21, 2025
805342f
ToScala: deriving creates multi-line output sometimes
katrinafyi Jun 21, 2025
8f9911f
scalafmt
katrinafyi Jun 21, 2025
3c45043
tehe revert me
katrinafyi Jun 21, 2025
91d84c0
macros lol
katrinafyi Jun 21, 2025
5f86644
auto tupling??
katrinafyi Jun 21, 2025
e6e38a5
ReadTuple
katrinafyi Jun 22, 2025
1446863
move
katrinafyi Jun 22, 2025
018da5f
resolver
katrinafyi Jun 22, 2025
0e38d1b
Revert "tehe revert me"
katrinafyi Jun 23, 2025
a2e49bd
Merge remote-tracking branch 'origin/main' into gtirb-relf
katrinafyi Jun 23, 2025
c7e98f6
scalafmt
katrinafyi Jun 23, 2025
615f9bb
Merge remote-tracking branch 'origin/main' into gtirb-relf
katrinafyi Jun 23, 2025
07fc539
allow string in Uuid inputs
katrinafyi Jun 23, 2025
c23d19f
docs
katrinafyi Jun 23, 2025
b058f74
construct ReadELFData struct
katrinafyi Jun 23, 2025
96fcd87
diffing. WHY IS IT OFF BY 8 RAHHH
katrinafyi Jun 23, 2025
6e75244
--dump-relf writes files now
katrinafyi Jun 23, 2025
4d44074
fix size bug in global variables
katrinafyi Jun 23, 2025
1c88565
scalfmt
katrinafyi Jun 23, 2025
20a77b5
fix atEnd bug! yay. it all matches now except crtstuff.c ??
katrinafyi Jun 23, 2025
742a84a
scalafmt
katrinafyi Jun 23, 2025
dad7b02
touch up docs and fix references
katrinafyi Jun 23, 2025
56be8ae
rename to ref
katrinafyi Jun 23, 2025
5eb9db6
make it not crash. TODO: global library variables are borked
katrinafyi Jun 23, 2025
10f68c2
no println
katrinafyi Jun 23, 2025
bedc54d
scalafmt
katrinafyi Jun 25, 2025
28cb1a8
.sorted method for ReadELFData
katrinafyi Jun 25, 2025
f0de62e
working on R_AARCH64_COPY. add SECTION entries too
katrinafyi Jun 26, 2025
bb019a1
fix relocated global objects
katrinafyi Jun 26, 2025
9aa41b4
checkset, remove -100 symbols
katrinafyi Jun 26, 2025
1052400
scalafmt
katrinafyi Jun 26, 2025
53275d3
urls in doc comments
katrinafyi Jun 26, 2025
62cb830
type annotations
katrinafyi Jun 26, 2025
d6b688a
Merge remote-tracking branch 'origin/main' into gtirb-relf
katrinafyi Jun 26, 2025
3b99169
format
katrinafyi Jun 26, 2025
698d42d
Merge remote-tracking branch 'origin/main' into gtirb-relf
katrinafyi Jun 27, 2025
e4afa50
reformat
katrinafyi Jun 27, 2025
fd8500b
Merge remote-tracking branch 'origin/main' into gtirb-relf
katrinafyi Jul 8, 2025
b7a5165
touch scaladoc mappings
katrinafyi Jul 8, 2025
71e1cf5
--gts-relf argument to use inputFile as relfFile
katrinafyi Jul 8, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions build.mill
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ object `package` extends RootModule with ScalaModule {

object mdbook extends mdbookBinary {
def mdbookSources = Task.Source {
Task.workspace / "docs"
Task.workspace / "docs"
}
}

Expand All @@ -195,10 +195,10 @@ object `package` extends RootModule with ScalaModule {
// these docs paths are used for inter-project linking.
// https://docs.scala-lang.org/scala3/guides/scaladoc/settings.html#-external-mappings
Map(
"api/bnfc" -> ".*basil_ir/.*::javadoc",
"api/basil-antlr" -> ".*Parsers/.*::javadoc",
"api/basil-proto" -> ".*com/grammatech/gtirb.*::scaladoc3",
"api/java-cup" -> ".*java_cup/.*::javadoc",
"api/bnfc" -> ".*/basil_ir/.*::javadoc",
"api/basil-antlr" -> ".*/Parsers/.*::javadoc",
"api/basil-proto" -> ".*/com/grammatech/gtirb.*::scaladoc3",
"api/java-cup" -> ".*/java_cup/.*::javadoc",
)
}

Expand All @@ -222,9 +222,9 @@ object `package` extends RootModule with ScalaModule {

def scalaDocExternalMappingOptions = Task {
val defaultExternals = Seq(
".*scala/.*::scaladoc3::https://scala-lang.org/api/3.3_LTS/",
"java/.*::javadoc::https://docs.oracle.com/en/java/javase/17/docs/api/java.base/",
".*com/google/protobuf.*::javadoc::https://protobuf.dev/reference/java/api-docs/",
".*/scala/.*::scaladoc3::https://scala-lang.org/api/3.3_LTS/",
".*/java/.*::javadoc::https://docs.oracle.com/en/java/javase/17/docs/api/java.base/",
".*/com/google/protobuf.*::javadoc::https://protobuf.dev/reference/java/api-docs/",
)
val externals = defaultExternals ++ docsRegexes().map {
case (path, regex) => s"$regex::$baseUrl/$path"
Expand Down
21 changes: 21 additions & 0 deletions src/main/scala/Main.scala
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,8 @@ object Main {
interpret: Flag,
@arg(name = "dump-il", doc = "Dump the Intermediate Language to text.")
dumpIL: Option[String],
@arg(name = "dump-relf", doc = "Dump Basil's representation of the readelf information to the given file and exit.")
dumpRelf: Option[String],
@arg(name = "main-procedure-name", short = 'm', doc = "Name of the main procedure to begin analysis at.")
mainProcedureName: String = "main",
@arg(
Expand Down Expand Up @@ -327,6 +329,25 @@ object Main {
)
}

conf.dumpRelf match {
case None => ()
case Some(relfOut) =>
val relfFile = loadingInputs.relfFile.getOrElse {
throw IllegalArgumentException("--dump-relf requires --relf")
}
Logger.setLevel(LogLevel.DEBUG)
val (relf, gtirb) = IRLoading.loadReadELFWithGTIRB(relfFile, loadingInputs)

// skip writing files if the given path is an empty string
if (relfOut.trim.isEmpty)
return

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Significant indentation 👎


import ir.dsl.given
writeToFile(relf.toScala, relfOut + "-readelf.scala")
gtirb.foreach(x => writeToFile(x.toScala, relfOut + "-gtsrelf.scala"))
return
}

if (loadingInputs.specFile.isDefined && loadingInputs.relfFile.isEmpty) {
throw IllegalArgumentException("--spec requires --relf")
}
Expand Down
3 changes: 2 additions & 1 deletion src/main/scala/boogie/BExpr.scala
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package boogie
import ir.*
import ir.dsl.given
import specification.*
import collection.mutable

Expand Down Expand Up @@ -755,7 +756,7 @@ case class SpecGlobal(
arraySize: Option[Int],
override val address: BigInt
) extends SymbolTableEntry,
SpecGlobalOrAccess {
SpecGlobalOrAccess derives ir.dsl.ToScala {
override def specGlobals: Set[SpecGlobalOrAccess] = Set(this)

def sanitisedName = util.StringEscape.escape(name)
Expand Down
19 changes: 16 additions & 3 deletions src/main/scala/gtirb/AuxDecoder.scala
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import com.grammatech.gtirb.proto.Module.Module
* The read methods return [[Decoder]] values which can be passed to the [[decode]] methods.
*
* [[AuxKind]] provides pre-defined decoders for some official AuxData fields. An [[AuxKind]] can be
* passed to [[decodeAux]] to automatically extract and decode the given AuxData from a GTIRB [[Module]].
* passed to [[decodeAux]] to automatically extract and decode the given AuxData from a GTIRB [[com.grammatech.gtirb.proto.Module.Module]].
*
* Within a [[Decoder]], the internal state of the [[java.io.ByteArrayInputStream]] is used to keep
* track of the current byte position.
Expand All @@ -28,7 +28,9 @@ object AuxDecoder {

/**
* [[AuxKind]] provides pre-defined decoders for some official AuxData fields. An [[AuxKind]] can be
* passed to [[decodeAux]] to automatically extract and decode the given AuxData from a GTIRB [[Module]].
* passed to [[decodeAux]] to automatically extract and decode the given AuxData from a GTIRB [[com.grammatech.gtirb.proto.Module.Module]].
* See the [Standard AuxData Schemata](https://grammatech.github.io/gtirb/md__aux_data.html) for a list of official AuxData fields
* and their types.
*/
enum AuxKind[T](val name: String, val decoder: Decoder[T]) {
case ElfSymbolTabIdxInfo
Expand All @@ -44,7 +46,7 @@ object AuxDecoder {
}

type Input = ByteArrayInputStream
type Decoder[T] = Input => T
type Decoder[T] = ByteArrayInputStream => T

def decodeAux[T](known: AuxKind[T])(mod: Module) =
decode(known.decoder)(mod.auxData(known.name))
Expand Down Expand Up @@ -143,6 +145,17 @@ object AuxDecoder {
val x6 = r6(bs)
(x1, x2, x3, x4, x5, x6)

// type ReadTuple[T <: Tuple] <: Tuple = T match
// case Reader[out] *: rest => out *: ReadTuple[rest]
// case EmptyTuple => EmptyTuple
//
// inline def readTuple[T <: Tuple](xs: T)(bs: Input): ReadTuple[T] =
// inline xs match
// case xs: (Reader[o] *: rest) =>
// xs match
// case h *: t => h(bs) *: readTuple[rest](t)(bs)
// case _: EmptyTuple => EmptyTuple

def readUuid(bs: Input) =
// ByteString.copyFrom(readBytes(16)(bs))
Base64.getEncoder().encodeToString(readBytes(16)(bs))
Expand Down
233 changes: 233 additions & 0 deletions src/main/scala/gtirb/GTIRBReadELF.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,233 @@
package gtirb

import util.Logger
import gtirb.AuxDecoder
import gtirb.AuxDecoder.{AuxKind, decodeAux}

import translating.{ELFSymType, ELFBind, ELFVis, ELFNDX, ELFSymbol, ReadELFData}
import specification.{ExternalFunction, FuncEntry}
import boogie.{SpecGlobal}

import java.io.ByteArrayInputStream

import com.google.protobuf.ByteString
import com.grammatech.gtirb.proto.CFG.EdgeType.*
import com.grammatech.gtirb.proto.CFG.CFG
import com.grammatech.gtirb.proto.CFG.Edge
import com.grammatech.gtirb.proto.CFG.EdgeLabel
import com.grammatech.gtirb.proto.Module.Module
import com.grammatech.gtirb.proto.Symbol.Symbol
import com.grammatech.gtirb.proto.ByteInterval.Block
import com.grammatech.gtirb.proto.ByteInterval.ByteInterval
import com.grammatech.gtirb.proto.Symbol.Symbol.OptionalPayload

import scala.collection.mutable
import scala.collection.immutable.{SortedMap, SortedSet}

class GTIRBReadELF(protected val gtirb: GTIRBResolver) {

/**
* An `Elf64_Rela` structure, as described by the [System V ABI](https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.reloc.html).
* The three fields `r_offset`, `r_info`, and `r_addend` are as described in the struct.
* The last two fields, `r_sym` and `r_type`, are extracted from the `r_info` value.
*
* The [ABI supplement for AArch64](https://github.com/ARM-software/abi-aa/blob/main/aaelf64/aaelf64.rst#relocation-types)
* provides information about the interpretation of the `r_type` values.
*/
case class Elf64Rela(r_offset: BigInt, r_info: BigInt, r_addend: BigInt, r_sym: Long, r_type: Long)

// https://refspecs.linuxbase.org/elf/gabi4+/ch4.reloc.html
// https://github.com/ARM-software/abi-aa/blob/main/aaelf64/aaelf64.rst#relocation-types
protected def readRela(bs: AuxDecoder.Input) =
import AuxDecoder.*
val (r_offset, r_info, r_addend) = readTuple(readUint(64), readUint(64), readUint(64))(bs)
val r_sym = r_info >> 32
val r_type = r_info & 0xffffffffL
Elf64Rela(r_offset, r_info, r_addend, r_sym.toLong, r_type.toLong)

protected def parseRelaTab(bstr: ByteString) =
val bs = ByteArrayInputStream(bstr.toByteArray)
List.unfold(bs) {
case bs if bs.available() > 0 => Some(readRela(bs), bs)
case _ => None
}

// see also:
// https://www.javadoc.io/doc/net.fornwall/jelf/latest/net/fornwall/jelf/ElfSymbol.html
//
// https://gist.github.com/x0nu11byt3/bcb35c3de461e5fb66173071a2379779
//
// https://www.man7.org/linux/man-pages/man5/elf.5.html

// Full ELF32 specification: https://refspecs.linuxfoundation.org/elf/elf.pdf

// Full ELF64 specification: https://irix7.com/techpubs/007-4658-001.pdf

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doc comment


/**
* https://refspecs.linuxfoundation.org/elf/elf.pdf
* Figure 1-7. Special Section Indexes
*/
protected def parseElfNdx(n: BigInt) = n.toInt match {
case 0 => ELFNDX.UND
case 0xfff1 => ELFNDX.ABS
case i =>
if (i >= 0xff00)
Logger.warn("unhandled special elf section index: " + i)
ELFNDX.Section(i)
}

/**
* https://github.com/ARM-software/abi-aa/blob/main/aaelf64/aaelf64.rst#dynamic-relocations
*/
def parseRela(rela: Elf64Rela) =
val sym = gtirb.symbolTables(".dynsym")(rela.r_sym.toInt).get

rela.r_type match {
case 1025 | 1026 => Right(ExternalFunction(sym.name, rela.r_offset))
case 1027 => Left((rela.r_offset, rela.r_addend))
case 1024 => Left((BigInt(0), BigInt(0)))
}

def getAllSymbols() = {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Type hint

gtirb.symbolEntriesByUuid
.flatMap { case (k, pos) =>
val sym = k.get

val idx = k.symTabIdx.collectFirst { case (".symtab", i) =>
i.toInt
}

val addr = k.getReferentAddress
val value = k.getScalarValue
val combinedValue = addr.orElse(value).getOrElse(0L)

val (size, ty, bind, vis, shndx) = k.symEntry

ty match {
case "NONE" => None
case ty =>
Some(
ELFSymbol(
idx.getOrElse(-1),
combinedValue,
size.toInt,
ELFSymType.valueOf(ty),
ELFBind.valueOf(bind),
ELFVis.valueOf(vis),
parseElfNdx(shndx),
sym.name
)
)
}
}
.toList
.sortBy(x => x.num)
}

def getRelocations() = {
val relaDyns = parseRelaTab(gtirb.sectionsByName(".rela.dyn").byteIntervals.head.contents)
val relaPlts = parseRelaTab(gtirb.sectionsByName(".rela.plt").byteIntervals.head.contents)

val (offs, exts) = (relaDyns.view ++ relaPlts.view).partitionMap(parseRela)

(offs.toMap, exts.toSet)
}

def getGlobals() = {
gtirb.symbolEntriesByUuid.view.collect { case (symid, (size, "OBJECT", "GLOBAL", "DEFAULT", idx)) =>
val blk = symid.getReferentUuid.get.getOption
// val sec = blk.section
// assert(gtirb.mod.sections(idx.toInt - 1) == sec)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

del

SpecGlobal(symid.get.name, (size * 8).toInt, None, blk.fold(BigInt(-1))(_.address))
}.toSet
}

def getFunctionEntries() = {

gtirb.symbolEntriesByUuid.view.collect {
case (symid, (size, "FUNC", "GLOBAL", "DEFAULT", idx)) if idx != 0 =>

val nameSymbol = symid.get
val funcUuid = symid.getFunction.get
val entries = funcUuid.getEntries

assert(entries.size == 1, "function with non-singular entry")
val entry = entries.head
val addr = entry.get.address

FuncEntry(nameSymbol.name, (size * 8).toInt, addr)
}.toSet
}

def getMainAddress(mainProcedureName: String) = {
gtirb.symbolsByName(mainProcedureName).getReferentUuid.get.get.address
}

def getReadELFData(mainProcedureName: String) = {

val syms = getAllSymbols()
val (offs, exts) = getRelocations()
val globs = getGlobals()
val funs = getFunctionEntries()
val main = getMainAddress(mainProcedureName)

val x = SortedSet.from(exts)(Ordering.by(_.toString))
ReadELFData(syms, x, SortedSet.from(globs), funs, offs, main)
}

private val atSuffix = """@[A-Za-z_\d.]+$""".r

/**
* Strips away some information from `readelf`'s [[translating.ReadELFData]]
* which is not so important and not produced by the GTIRB ELF loader.
*
* For example, this throws away symbols of type SECTION and symbols beginning with `$`.
* It also strips the `@GLIBC_XX.X` suffix from symbol names.
*/
def normaliseRelf(relf: ReadELFData) = {
val exts = relf.externalFunctions.map(x => x.copy(name = atSuffix.replaceFirstIn(x.name, "")))
val syms = relf.symbolTable.flatMap {
case ELFSymbol(_, 0, 0, ELFSymType.FILE, ELFBind.LOCAL, ELFVis.DEFAULT, ELFNDX.ABS, "crtstuff.c") => None
case sym if sym.etype != ELFSymType.SECTION && sym.num != -1 && !sym.name.startsWith("$") =>
Some(sym.copy(name = atSuffix.replaceFirstIn(sym.name, "")))
case _ => None
}

relf.copy(externalFunctions = exts, symbolTable = syms)
}

/**
* Determines whether the current ReadELFData is compatible with
* a given reference ReadELFData. That is, whether the The given reference object is
* assumed to be the gold standard
*/
def checkReadELFCompatibility(gtirbRelf: ReadELFData, referenceRelf: ReadELFData): Boolean = {
var ok = true

inline def check(b: Boolean, s: String) = {
if (!b) {
Logger.warn("PLEASE REPORT THIS ISSUE! include the gts and relf files. gtirb relf discrepancy, " + s)
ok = false
}
}

inline def checkSet[T](x: Set[T], y: Set[T], s: String) =
check(x == y, s"$s:\ngtirb - relf = ${x -- y}\nrelf - gtirb = ${y -- x}")

inline def checkEq(x: Any, y: Any, s: String) =
check(x == y, s"$s: gtirb: $x, readelf: $y}")

val g = normaliseRelf(gtirbRelf)
val o = normaliseRelf(referenceRelf)
checkEq(g.mainAddress, o.mainAddress, "main address differs")
checkEq(g.functionEntries, o.functionEntries, "function entries differ")
checkEq(g.relocationOffsets, o.relocationOffsets, "relocations differ")
checkEq(g.globalVariables, o.globalVariables, "global variables differ")
checkSet(g.externalFunctions, o.externalFunctions, "external functions differ")
checkSet(g.symbolTable.toSet, o.symbolTable.toSet, "symbol tables differ")

Logger.debug("gtirb relf and readelf relf compatible: " + ok)
ok
}

}
Loading
Loading