Skip to content

Commit

Permalink
adding dwarf parsing of functions and formal parameters
Browse files Browse the repository at this point in the history
next I will want to have this parsing returned to match to elf symbols, and then
that further parse by the x86_64 parser to add registers, etc

Signed-off-by: vsoch <vsoch@users.noreply.github.com>
  • Loading branch information
vsoch committed Sep 24, 2021
1 parent 38cf7f9 commit c8def06
Show file tree
Hide file tree
Showing 22 changed files with 9,381 additions and 91 deletions.
30 changes: 16 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ This is mostly for fun - I wanted to see how hard it would be to parse a binary
with Go. Right now we use the same logic as objdump to load it, and then print
Symbols (and I found an entry to where the Dwarf is).

🚧️ **under development** 🚧️

## Usage

Expand All @@ -15,21 +16,22 @@ $ go run main.go parse gosmeagle
```
```
...
Name: runtime.end
Address: 7475504
Size: 0
Code: 100
Type: STT_OBJECT
Binding: STB_LOCAL
Relocs: []
Name: runtime.enoptrbss
Address: 7475504
Size: 0
Code: 100
Type: STT_OBJECT
Binding: STB_LOCAL
Relocs: []
[{a 8 long int } {b 8 long int } {c 8 long int } {d 8 long int } {e 8 long int } {f 16 __int128 }]
[{__fmt -1 }]
[]
```

Note that this library is under development, so currently I've just finished parsing functions
and formal paramters from the dwarf, and next I'm going to map that to an x86_64 parser to get more
metadata. Stay tuned!

Note that I added parsing of the Type and Binding. I think I'm going to pull out using just the Dwarf wrapper and remove the internal code that isn't supposed to be accessible :)
See discussion in [this thread](https://twitter.com/vsoch/status/1437535961131352065) for the discovery of the missing variables.

## Includes

Since I needed some functionality from [debug/dwarf](https://cs.opensource.google/go/go/+/master:src/debug/dwarf/) that was not public, the library is included here (with proper license/credit headers) in [pkg/debug](pkg/debug) along with ELF that needs to return a matching type. The changes I made include

- renaming readType to ReadType so it's public.
- also renaming sigToType to SigToType so it's public
- made typeCache public (TypeCache)
59 changes: 5 additions & 54 deletions corpus/corpus.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
package corpus

import (
"debug/dwarf"
// "encoding/json"
"fmt"
"github.com/vsoch/gosmeagle/descriptor"
"github.com/vsoch/gosmeagle/parsers/file"
Expand Down Expand Up @@ -35,6 +33,9 @@ func GetCorpus(filename string) Corpus {

func (c *Corpus) Parse(f *file.File) {

// Parse dwarf for each entry to use
f.ParseDwarf()

// Parse entries based on type (function or variable)
for _, e := range f.Entries {

Expand All @@ -46,7 +47,7 @@ func (c *Corpus) Parse(f *file.File) {
for _, symbol := range symbols {

// If we have a function, parse function
if symbol.Type == "STT_FUNC" {
if symbol.GetType() == "STT_FUNC" {
c.parseFunction(f, symbol)
}

Expand All @@ -60,27 +61,13 @@ func (c *Corpus) Parse(f *file.File) {
//fmt.Println("Binding:", symbol.Binding)
//fmt.Println("Relocs:", symbol.Relocations)
}

dwf, err := e.Dwarf()
rdr := dwf.Reader()
for entry, err := rdr.Next(); entry != nil; entry, err = rdr.Next() {
if err != nil {
log.Fatalf("error reading DWARF: %v", err)
}
switch entry.Tag {
case dwarf.TagTypedef:
if _, ok := entry.Val(dwarf.AttrName).(string); ok {
//fmt.Println(name)
}
}
}
}
}

// parse a dynamic function symbol
func (c *Corpus) parseFunction(f *file.File, symbol file.Symbol) {

fmt.Println(symbol)
//fmt.Println(symbol)
switch f.GoArch() {
case "amd64":
{
Expand All @@ -99,39 +86,3 @@ func (c *Corpus) ToJson() {
//output := string(outJson)
//fmt.Println(output)
}

/* parse a function for parameters and abi location
void Corpus::parseFunctionABILocation(Dyninst::SymtabAPI::Symbol *symbol,
Dyninst::Architecture arch) {
switch (arch) {
case Dyninst::Architecture::Arch_x86_64:
break;
case Dyninst::Architecture::Arch_aarch64:
break;
case Dyninst::Architecture::Arch_ppc64:
break;
default:
throw std::runtime_error{"Unsupported architecture: " + std::to_string(arch)};
break;
}
}
// parse a variable (global) for parameters and abi location
void Corpus::parseVariableABILocation(Dyninst::SymtabAPI::Symbol *symbol,
Dyninst::Architecture arch) {
switch (arch) {
case Dyninst::Architecture::Arch_x86_64:
variables.emplace_back(x86_64::parse_variable(symbol));
break;
case Dyninst::Architecture::Arch_aarch64:
break;
case Dyninst::Architecture::Arch_ppc64:
break;
default:
throw std::runtime_error{"Unsupported architecture: " + std::to_string(arch)};
break;
}
}
}*/
4 changes: 2 additions & 2 deletions descriptor/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,6 @@ type FunctionDescription struct {
}

type VariableDescription struct {
Name string `json:"name"`
Type string `json:"type"`
Name string `json:"name"`
Type string `json:"type"`
}
138 changes: 138 additions & 0 deletions parsers/file/dwarf.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
package file

import (
"fmt"
"github.com/vsoch/gosmeagle/pkg/debug/dwarf"
"io"
"log"
)

// A common interface to represent a dwarf entry (what we need)
type DwarfEntry interface {
GetComponents() []Component // Can be fields, params, etc.
}

// Types that we need to parse
type FunctionEntry struct {
Entry *dwarf.Entry
Type *dwarf.Type
Params []FormalParamEntry
Data *dwarf.Data
}

type FormalParamEntry struct {
Entry *dwarf.Entry
Type *dwarf.Type
Data *dwarf.Data
}

// A Component can be a Field or param
type Component struct {
Name string
Size int64
Type string
Framebase string
}

// Function components are the associated fields
func (f *FunctionEntry) GetComponents() []Component {

comps := []Component{}
for _, param := range f.Params {

paramName := param.Entry.Val(dwarf.AttrName)
if paramName == nil {
continue
}
paramTypeOffset := param.Entry.Val(dwarf.AttrType)
if paramTypeOffset == nil {
fmt.Printf("Cannot find offset for %s, skipping\n", paramName)
}
paramType, err := f.Data.Type(paramTypeOffset.(dwarf.Offset))
if err != nil {
fmt.Printf("Cannot get type for %s\n", paramName)
continue
}
comps = append(comps, Component{Name: (paramName).(string), Type: paramType.Common().Name, Size: paramType.Common().ByteSize})

}
fmt.Println(comps)
return comps
}

// ParseDwarf and populate something / return something?
func ParseDwarf(dwf *dwarf.Data) {

// TODO - need to save functions to some kind of lookup by name
// and return to save with the file in two spots?
// will need to call funcEntry.GetComponents()

entryReader := dwf.Reader()

// Keep list of general entries
entries := []DwarfEntry{}

// keep track of last function to associate with formal parameters, and if found them
var functionEntry *dwarf.Entry
var params []FormalParamEntry

for entry, err := entryReader.Next(); entry != nil; entry, err = entryReader.Next() {

// Reached end of file
if err == io.EOF {
break
}

switch entry.Tag {

// We found a function - hold onto it for any params
case dwarf.TagSubprogram:

// If we have a previous function entry, add it
if functionEntry != nil {
entries = append(entries, ParseFunction(dwf, functionEntry, params))
}

// Reset params and set new function entry
functionEntry = entry
params = []FormalParamEntry{}

// We match formal parameters to the last function (their parent)
case dwarf.TagFormalParameter:

// This shouldn't ever happen
if functionEntry == nil {
log.Fatalf("Found formal parameter not associated to function: %s\n", entry)
}
params = append(params, ParseFormalParameter(dwf, entry))

case dwarf.TagTypedef:
if _, ok := entry.Val(dwarf.AttrName).(string); ok {
//fmt.Println(value)
}
}
}

// Parse the last function entry
if functionEntry != nil {
entries = append(entries, ParseFunction(dwf, functionEntry, params))
}
// Finally, consolidate and parse into records of Create a list of dwarf entry
// should be like a map so we can look things up?
// TODO we need a way to look up by id/name
// function names should be unique for a library?
// entries := []DwarfEntry{}

}

// Populate a formal parameter
func ParseFormalParameter(d *dwarf.Data, entry *dwarf.Entry) FormalParamEntry {
return FormalParamEntry{Entry: entry, Data: d}
}

// Populate a function entry
func ParseFunction(d *dwarf.Data, entry *dwarf.Entry, params []FormalParamEntry) DwarfEntry {
funcEntry := &FunctionEntry{Entry: entry, Data: d, Params: params}
funcEntry.GetComponents() // TODO remove, this is debugging only
return funcEntry
}

0 comments on commit c8def06

Please sign in to comment.