471 lines
18 KiB
C++
471 lines
18 KiB
C++
#include <stdio.h>
|
|
#include <sys/mman.h>
|
|
#include <sys/types.h>
|
|
#include <sys/stat.h>
|
|
#include <unistd.h>
|
|
#include <fcntl.h>
|
|
#include <elf.h>
|
|
#include <cstddef>
|
|
#include <cstring>
|
|
#include <cstdlib>
|
|
#include <assert.h>
|
|
|
|
#include "enums.h"
|
|
#include "enum_names.h"
|
|
|
|
// STD
|
|
// TODO: maybe do not use this
|
|
#include <vector>
|
|
|
|
#define _STRINGIFY(symbol) #symbol
|
|
#define STRINGIFY(symbol) _STRINGIFY(symbol)
|
|
|
|
|
|
// TODO: dwarf32
|
|
// page 200
|
|
typedef struct __attribute__((packed)) {
|
|
uint32_t preface; // should be 0xFFFFFFFF for 64b
|
|
uint64_t unit_length;
|
|
uint16_t version; // should be 5
|
|
uint8_t unit_type; // should be DW_UT_compile or DW_UT_partial
|
|
uint8_t address_size; // should be 8 for 64b systems
|
|
uint64_t debug_abbrev_offset;
|
|
} compilation_unit_header_t;
|
|
|
|
|
|
// TODO: dwarf32
|
|
typedef struct __attribute__((packed)) {
|
|
uint32_t preface; // should be 0xFFFFFFFF for 64b
|
|
uint64_t unit_length;
|
|
uint16_t version;
|
|
uint64_t debug_info_offset;
|
|
uint8_t address_size; // should be 8 for 64b systems
|
|
uint8_t segment_selector_size;
|
|
} debug_aranges_header_t;
|
|
|
|
static_assert(sizeof(debug_aranges_header_t) == 24);
|
|
|
|
typedef struct {
|
|
uint64_t abbrev_code; // ?
|
|
uint32_t abbrev_tag; // DW_TAG_*
|
|
uint8_t children; // DW_CHILDREN_*
|
|
} debug_abbrev_entry_t;
|
|
|
|
typedef struct {
|
|
uint64_t name; // DW_AT_*
|
|
uint64_t form; // DW_FORM_*
|
|
uint64_t value; // if DW_FORM_implicit_const
|
|
} attribute_spec_t;
|
|
|
|
// TODO: Store this in a more space-sensitive way
|
|
typedef struct {
|
|
std::vector<debug_abbrev_entry_t> entries;
|
|
std::vector<std::vector<attribute_spec_t>> specs;
|
|
} abbrev_table_t;
|
|
|
|
abbrev_table_t abbrev_table;
|
|
|
|
// function signatures
|
|
void parse_debuginfo_section(const void *file);
|
|
void parse_aranges_section(const void *file, uint64_t offset);
|
|
uint64_t get_alignment_of_section(const void *file, const char *name);
|
|
uint64_t get_offset_of_section(const void *file, const char *name);
|
|
void parse_section_names(const void *file, Elf64_Shdr* table, int n, int strtab_index);
|
|
void print_header(Elf64_Ehdr *header);
|
|
|
|
// TODO: Deal with cases where size of LEB128 > word size
|
|
inline int decode_leb128(uint8_t *src, uint64_t *dest)
|
|
{
|
|
*dest = 0;
|
|
int shift = 0;
|
|
uint8_t val;
|
|
do {
|
|
val = *src++;
|
|
*dest |= (val & 0x7f) << shift;
|
|
shift += 7;
|
|
} while (val & 0x80);
|
|
// return the number of bytes that we should move the src pointer
|
|
return shift / 8 + (shift % 8 != 0);
|
|
}
|
|
|
|
// globals
|
|
uint64_t cu_header_offset = 0;
|
|
|
|
void parse_debuginfo_section(const void *file)
|
|
{
|
|
char *p = (char*)file;
|
|
|
|
uint64_t debuginfo_offset = get_offset_of_section(file, ".debug_info");
|
|
fprintf(stdout, "Offset of .debug_info section: %d\n", debuginfo_offset);
|
|
fprintf(stdout, "Offset of CUH in section: %d\n", cu_header_offset);
|
|
p += debuginfo_offset + cu_header_offset;
|
|
|
|
compilation_unit_header_t* cuh = (compilation_unit_header_t*)p;
|
|
|
|
fprintf(stdout, "CUH length: %#x\n", cuh->unit_length);
|
|
fprintf(stdout, "CUH version: %d\n", cuh->version);
|
|
// TODO: multifile dwarf
|
|
fprintf(stdout, "CUH unit_type: %d (%s)\n", cuh->unit_type, (cuh->unit_type == DW_UT_compile ? STRINGIFY(DW_UT_compile) : "?"));
|
|
fprintf(stdout, "CUH address_size: %d\n", cuh->address_size);
|
|
fprintf(stdout, "CUH debug_abbrev_offset: %d\n", cuh->debug_abbrev_offset);
|
|
|
|
uint64_t abbrev_section_offset = get_offset_of_section(file, ".debug_abbrev");
|
|
// Pointer to .debug_abbrev section memory for current CU
|
|
char *cu_abbrev = ((char*)file) + abbrev_section_offset + cuh->debug_abbrev_offset;
|
|
|
|
fprintf(stdout, "ABBREV for this compilation unit should start at: %x\n", abbrev_section_offset + cuh->debug_abbrev_offset);
|
|
|
|
// begin parsing proper
|
|
uint64_t abbrev_code;
|
|
uint64_t abbrev_tag;
|
|
|
|
// TODO: Error handling
|
|
do {
|
|
cu_abbrev += decode_leb128((uint8_t*)cu_abbrev, &abbrev_code);
|
|
// The entries for a compilation unit end with a 0-byte abbrev code
|
|
if (!abbrev_code)
|
|
break;
|
|
cu_abbrev += decode_leb128((uint8_t*)cu_abbrev, &abbrev_tag);
|
|
|
|
bool children = *cu_abbrev++;
|
|
fprintf(stdout, " %d %s, children = %s\n", abbrev_code, dwarf_get_TAG_name(abbrev_tag), children ? "yes" : "no");
|
|
|
|
abbrev_table.entries.push_back({abbrev_code, (uint32_t)abbrev_tag, children});
|
|
|
|
std::vector<attribute_spec_t> attr_specs = {}; // for this code
|
|
|
|
uint64_t attrib_name;
|
|
uint64_t attrib_form;
|
|
uint64_t implicit_const = 0;
|
|
// TODO: Error handling
|
|
do {
|
|
cu_abbrev += decode_leb128((uint8_t*)cu_abbrev, &attrib_name);
|
|
cu_abbrev += decode_leb128((uint8_t*)cu_abbrev, &attrib_form);
|
|
if (attrib_form == DW_FORM_implicit_const)
|
|
{
|
|
cu_abbrev += decode_leb128((uint8_t*)cu_abbrev, &implicit_const);
|
|
fprintf(stdout, "\t%-26s %s value: %d\n", dwarf_get_AT_name(attrib_name), dwarf_get_FORM_name(attrib_form), implicit_const);
|
|
}
|
|
else if (!attrib_name && !attrib_form)
|
|
fprintf(stdout, "\t0\n");
|
|
else
|
|
fprintf(stdout, "\t%-26s %s\n", dwarf_get_AT_name(attrib_name), dwarf_get_FORM_name(attrib_form));
|
|
|
|
if (attrib_name && attrib_form)
|
|
{
|
|
attr_specs.push_back({attrib_name, attrib_form, implicit_const});
|
|
}
|
|
} while (attrib_name && attrib_form);
|
|
abbrev_table.specs.push_back(attr_specs);
|
|
} while (true);
|
|
|
|
p += sizeof(compilation_unit_header_t);
|
|
|
|
fprintf(stdout, "==================================\n");
|
|
uint64_t code;
|
|
fprintf(stdout, ".debug_info offset for first DIE: %#x\n", ((uint64_t)p - (uint64_t)file));
|
|
// TODO: 32bit size
|
|
// TODO: unit_length is supposed to be 4/12 Bytes
|
|
for (; ((uint64_t)p - (uint64_t)cuh) < cuh->unit_length + sizeof(compilation_unit_header_t) - 12;)
|
|
{
|
|
p += decode_leb128((uint8_t*)p, &code);
|
|
|
|
fprintf(stdout, "%d (%s)\n", code, dwarf_get_TAG_name(abbrev_table.entries[code-1].abbrev_tag));
|
|
for (size_t i = 0; i < abbrev_table.specs[code-1].size(); ++i)
|
|
{
|
|
fprintf(stdout, "\t%s\t\t", dwarf_get_AT_name(abbrev_table.specs[code-1][i].name));
|
|
// Get the desired value according to FORM
|
|
switch (abbrev_table.specs[code-1][i].form)
|
|
{
|
|
case DW_FORM_addr: // 0x01
|
|
{
|
|
// TODO: 32bit addresses
|
|
uint64_t addr_value = *(uint64_t*)p;
|
|
p+= 8;
|
|
fprintf(stdout, "\t%#x\n", addr_value);
|
|
} break;
|
|
case DW_FORM_data2: // 0x05
|
|
{
|
|
uint16_t data = *(uint16_t*)p;
|
|
p += 2;
|
|
fprintf(stdout, "\t%#x\n", data);
|
|
} break;
|
|
case DW_FORM_data4: // 0x06
|
|
{
|
|
uint32_t data = *(uint32_t*)p;
|
|
p += 4;
|
|
fprintf(stdout, "\t%#x\n", data);
|
|
} break;
|
|
case DW_FORM_data8: // 0x06
|
|
{
|
|
uint64_t data = *(uint64_t*)p;
|
|
p += 8;
|
|
fprintf(stdout, "\t%#x\n", data);
|
|
} break;
|
|
case DW_FORM_string: // 0x08
|
|
{
|
|
// String is inside the .debug_info, just read it and advance pointer past null terminator
|
|
fprintf(stdout, "\t%s\n", p);
|
|
while (*p++);
|
|
} break;
|
|
case DW_FORM_data1: // 0x13
|
|
{
|
|
uint8_t data = *(uint8_t*)p;
|
|
p += 1;
|
|
if (abbrev_table.specs[code-1][i].name == DW_AT_language)
|
|
fprintf(stdout, "\t%s\n", dwarf_get_LANG_name(data));
|
|
else
|
|
fprintf(stdout, "\t%#x\n", data);
|
|
} break;
|
|
|
|
case DW_FORM_ref8:
|
|
{
|
|
uint64_t data = *(uint64_t*)p;
|
|
p += 8;
|
|
fprintf(stdout, "\t%#x\n", data);
|
|
} break;
|
|
case DW_FORM_sec_offset: // 0x17
|
|
{
|
|
// TODO: 32 addresses
|
|
// TODO: read more about this. Could both files exist at the same time?
|
|
// NOTE: objdump does not seem to try reading the string, instead outputs the pointer value
|
|
// read offset into .debug_rnglists or .debug_loclists of desired string
|
|
uint64_t str_offset = *(uint64_t*)p;
|
|
p+= 8;
|
|
uint64_t section_offset = get_offset_of_section(file, ".debug_rnglists");
|
|
char *string = (char*)file + section_offset + str_offset;
|
|
fprintf(stdout, "\t%s\n", string);
|
|
} break;
|
|
case DW_FORM_exprloc: // 0x18
|
|
{
|
|
uint64_t length;
|
|
p += decode_leb128((uint8_t*)p, &length);
|
|
// TODO: save this data
|
|
p += length;
|
|
fprintf(stdout, "(%d bytes data)\n");
|
|
} break;
|
|
case DW_FORM_flag_present:
|
|
{
|
|
// Nothing to read here, flag_present just indicates that a flag is ON. We output '1' just like objdump does
|
|
fprintf(stdout, "\t1\n");
|
|
} break;
|
|
case DW_FORM_implicit_const:
|
|
{
|
|
// We already have the value, do not advance the pointer at all
|
|
fprintf(stdout, "\t%#x\n", abbrev_table.specs[code-1][i].value);
|
|
} break;
|
|
case DW_FORM_strp: // 0x0e
|
|
{
|
|
// TODO: 32 addresses
|
|
// read offset into .debug_str of desired string
|
|
uint64_t str_offset = *(uint64_t*)p;
|
|
p+= 8;
|
|
uint64_t section_offset = get_offset_of_section(file, ".debug_str");
|
|
char *string = (char*)file + section_offset + str_offset;
|
|
fprintf(stdout, "\t%s\n", string);
|
|
} break;
|
|
|
|
case DW_FORM_line_strp: // 0x1f
|
|
{
|
|
// TODO: 32 addresses
|
|
// read offset into .debug_line_str of desired string
|
|
uint64_t str_offset = *(uint64_t*)p;
|
|
p+= 8;
|
|
uint64_t section_offset = get_offset_of_section(file, ".debug_line_str");
|
|
char *string = (char*)file + section_offset + str_offset;
|
|
fprintf(stdout, "\t%s\n", string);
|
|
} break;
|
|
default:
|
|
fprintf(stdout, "\tTODO\n");
|
|
break;
|
|
}
|
|
}
|
|
fprintf(stdout, "\n");
|
|
}
|
|
}
|
|
|
|
void parse_aranges_section(const void *file, uint64_t offset)
|
|
{
|
|
debug_aranges_header_t *headerinfo = (debug_aranges_header_t*)((char*)file + offset);
|
|
fprintf(stdout, ".debug_aranges info:\n");
|
|
fprintf(stdout, "\tunit_length: %d\n", headerinfo->unit_length);
|
|
fprintf(stdout, "\tversion: %d\n", headerinfo->version);
|
|
fprintf(stdout, "\tdebug_info_offset: %d\n", headerinfo->debug_info_offset);
|
|
fprintf(stdout, "\taddress_size: %d\n", headerinfo->address_size);
|
|
fprintf(stdout, "\tsegment_selector_size: %d\n", headerinfo->segment_selector_size);
|
|
// TODO: Deal with more than one address
|
|
cu_header_offset = headerinfo->debug_info_offset;
|
|
|
|
fprintf(stdout, "\tADDRESS\tLENGTH:\n");
|
|
char *p = (char*)headerinfo;
|
|
int padding_boundary = headerinfo->segment_selector_size + headerinfo->address_size * 2;
|
|
int curr_padding = sizeof(debug_aranges_header_t);
|
|
while (curr_padding % padding_boundary)
|
|
++curr_padding;
|
|
p = p + curr_padding;
|
|
while (1)
|
|
{
|
|
// TODO: dwarf32
|
|
if (headerinfo->address_size != 8)
|
|
{
|
|
fprintf(stderr, "Address_size != 8 not implemented yet");
|
|
exit(1);
|
|
}
|
|
if (headerinfo->segment_selector_size)
|
|
{
|
|
fprintf(stderr, "Segment selectors are not implemented yet");
|
|
exit(1);
|
|
}
|
|
uint64_t address = *((uint64_t*)p);
|
|
fprintf(stdout, "\t%x", address);
|
|
p = p + headerinfo->address_size;
|
|
uint64_t length = *((uint64_t*)p);
|
|
fprintf(stdout, "\t%x\n", length);
|
|
p = p + headerinfo->address_size;
|
|
|
|
if (!address && !length)
|
|
break;
|
|
}
|
|
}
|
|
|
|
uint64_t get_alignment_of_section(const void *file, const char *name)
|
|
{
|
|
Elf64_Ehdr *elf_header = (Elf64_Ehdr *)file;
|
|
Elf64_Shdr *elf_section_table = (Elf64_Shdr *)((char*)file + elf_header->e_shoff);
|
|
Elf64_Half strtab_index = elf_header->e_shstrndx;
|
|
|
|
Elf64_Shdr* strtable_header = (elf_section_table + strtab_index);
|
|
char *strings = (char*)file + (ptrdiff_t)strtable_header->sh_offset;
|
|
|
|
Elf64_Half nheaders = elf_header->e_shnum;
|
|
for (int i = 0; i < nheaders; ++i)
|
|
{
|
|
if (elf_section_table[i].sh_type == SHT_NULL)
|
|
continue;
|
|
|
|
Elf64_Off str_idx = elf_section_table[i].sh_name;
|
|
char *str = &strings[str_idx];
|
|
|
|
if (!strcmp(str, name))
|
|
return elf_section_table[i].sh_addralign;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
uint64_t get_offset_of_section(const void *file, const char *name)
|
|
{
|
|
Elf64_Ehdr *elf_header = (Elf64_Ehdr *)file;
|
|
Elf64_Shdr *elf_section_table = (Elf64_Shdr *)((char*)file + elf_header->e_shoff);
|
|
Elf64_Half strtab_index = elf_header->e_shstrndx;
|
|
|
|
Elf64_Shdr* strtable_header = (elf_section_table + strtab_index);
|
|
char *strings = (char*)file + (ptrdiff_t)strtable_header->sh_offset;
|
|
|
|
Elf64_Half nheaders = elf_header->e_shnum;
|
|
for (int i = 0; i < nheaders; ++i)
|
|
{
|
|
if (elf_section_table[i].sh_type == SHT_NULL)
|
|
continue;
|
|
|
|
Elf64_Off str_idx = elf_section_table[i].sh_name;
|
|
char *str = &strings[str_idx];
|
|
|
|
if (!strcmp(str, name))
|
|
return elf_section_table[i].sh_offset;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
void parse_section_names(const void *file, Elf64_Shdr* table, int n, int strtab_index)
|
|
{
|
|
Elf64_Shdr* strtable = (table + strtab_index);
|
|
char *strings = (char*)file + (ptrdiff_t)strtable->sh_offset;
|
|
for (int i = 0; i < n; ++i)
|
|
{
|
|
if (table[i].sh_type == SHT_NULL || table[i].sh_type == SHT_SHLIB)
|
|
continue;
|
|
Elf64_Off str_idx = table[i].sh_name;
|
|
fprintf(stdout, "Section #%d: %s\n", i, &strings[str_idx]);
|
|
}
|
|
}
|
|
|
|
void print_header(Elf64_Ehdr *header)
|
|
{
|
|
fprintf(stdout, "ELF identification:\n");
|
|
fprintf(stdout, "\tEL_MAG0: %#13x\n", header->e_ident[0]);
|
|
fprintf(stdout, "\tEL_MAG1: %10c\n", header->e_ident[1]);
|
|
fprintf(stdout, "\tEL_MAG2: %10c\n", header->e_ident[2]);
|
|
fprintf(stdout, "\tEL_MAG3: %10c\n", header->e_ident[3]);
|
|
fprintf(stdout, "\tEL_CLASS: %9d (ELFCLASS%d)\n", header->e_ident[4], header->e_ident[4] * 32);
|
|
fprintf(stdout, "\tEL_DATA: %10d (ELFDATA2%cSB)\n", header->e_ident[5], header->e_ident[5] == 1 ? 'L' : 'M');
|
|
fprintf(stdout, "\tEL_VERSION: %7d (%s)\n", header->e_ident[6], header->e_ident[6] == 1 ? "EV_CURRENT" : "?");
|
|
fprintf(stdout, "\tEL_OSABI: %9d (ELFOSABI_%s)\n", header->e_ident[7], header->e_ident[7] == 0 ? "SYSV" : header->e_ident[7] == 1 ? "HPUX" : "STANDALONE");
|
|
fprintf(stdout, "\tEL_ABIVERSION: %4d\n", header->e_ident[8]);
|
|
fprintf(stdout, "\tEL_PAD: /* Padding bytes */\n");
|
|
fprintf(stdout, "\tEL_NIDENT: %8d\n", header->e_ident[15]);
|
|
|
|
fprintf(stdout, "Object file type: ");
|
|
|
|
#define ET_CASE(name) case name: fprintf(stdout, "(" #name ")\n"); break
|
|
switch (header->e_type)
|
|
{
|
|
ET_CASE(ET_NONE);
|
|
ET_CASE(ET_REL);
|
|
ET_CASE(ET_EXEC);
|
|
ET_CASE(ET_DYN);
|
|
ET_CASE(ET_CORE);
|
|
ET_CASE(ET_LOOS);
|
|
ET_CASE(ET_HIOS);
|
|
ET_CASE(ET_LOPROC);
|
|
ET_CASE(ET_HIPROC);
|
|
}
|
|
#undef ET_CASE
|
|
|
|
fprintf(stdout, "Machine type: %#x\n", header->e_machine);
|
|
fprintf(stdout, "Object file version: %d (%s)\n", header->e_version, header->e_version == 1 ? "EV_CURRENT" : "?");
|
|
fprintf(stdout, "Entry point address: %#x:\n", header->e_entry);
|
|
fprintf(stdout, "Program header offset: %d:\n", header->e_phoff);
|
|
fprintf(stdout, "Section header offset: %d:\n", header->e_shoff);
|
|
fprintf(stdout, "Processor-specific flags: %#x\n", header->e_flags);
|
|
fprintf(stdout, "ELF header size: %d\n", header->e_ehsize);
|
|
fprintf(stdout, "Program header entry size: %d\n", header->e_phentsize);
|
|
fprintf(stdout, "# of program header entries: %d\n", header->e_phnum);
|
|
fprintf(stdout, "Size of section header entry: %d\n", header->e_shentsize);
|
|
fprintf(stdout, "# of section header entries: %d\n", header->e_shnum);
|
|
fprintf(stdout, "Section name string table index: %d\n", header->e_shstrndx);
|
|
}
|
|
|
|
int main(int argc, char *argv[])
|
|
{
|
|
if (argc < 2)
|
|
{
|
|
fprintf(stdout, "Usage: pesticide [ELF binary path]\n");
|
|
return 1;
|
|
}
|
|
fprintf(stdout, "Trying to read %s\n", argv[1]);
|
|
int fd = open(argv[1], O_RDONLY);
|
|
if (fd < 0)
|
|
{
|
|
fprintf(stdout, "Error trying to read %s\n", argv[1]);
|
|
return 1;
|
|
}
|
|
struct stat st;
|
|
fstat(fd, &st);
|
|
fprintf(stdout, "MMapping %d bytes\n", st.st_size);
|
|
void *addr = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
|
|
if (addr == MAP_FAILED)
|
|
{
|
|
fprintf(stdout, "Error MMapping %s\n", argv[1]);
|
|
return 1;
|
|
}
|
|
|
|
Elf64_Ehdr *elf_header = (Elf64_Ehdr *)addr;
|
|
print_header(elf_header);
|
|
|
|
Elf64_Shdr *elf_section_table = (Elf64_Shdr *)((char*)addr + elf_header->e_shoff);
|
|
parse_section_names(addr, elf_section_table, elf_header->e_shnum, elf_header->e_shstrndx);
|
|
fprintf(stdout, "Offset of section .debug_aranges is %d\n", get_offset_of_section(addr, ".debug_aranges"));
|
|
fprintf(stdout, "Byte alignment of section .debug_aranges is %d\n", get_alignment_of_section(addr, ".debug_aranges"));
|
|
parse_aranges_section(addr, get_offset_of_section(addr, ".debug_aranges"));
|
|
parse_debuginfo_section(addr);
|
|
}
|