#include #include #include #include #include #include #include #include #include #include #include #include "enums.h" #include "enum_names.h" // STD // TODO: maybe do not use this #include #define _STRINGIFY(symbol) #symbol #define STRINGIFY(symbol) _STRINGIFY(symbol) // TODO: dwarf32 // page 200 typedef struct __attribute__((packed)) { uint32_t preface; // should be 0xFFFFFFFF for 64b uint64_t unit_length; uint16_t version; // should be 5 uint8_t unit_type; // should be DW_UT_compile or DW_UT_partial uint8_t address_size; // should be 8 for 64b systems uint64_t debug_abbrev_offset; } compilation_unit_header_t; // TODO: dwarf32 typedef struct __attribute__((packed)) { uint32_t preface; // should be 0xFFFFFFFF for 64b uint64_t unit_length; uint16_t version; uint64_t debug_info_offset; uint8_t address_size; // should be 8 for 64b systems uint8_t segment_selector_size; } debug_aranges_header_t; static_assert(sizeof(debug_aranges_header_t) == 24); typedef struct { uint64_t abbrev_code; // ? uint32_t abbrev_tag; // DW_TAG_* uint8_t children; // DW_CHILDREN_* } debug_abbrev_entry_t; typedef struct { uint64_t name; // DW_AT_* uint64_t form; // DW_FORM_* uint64_t value; // if DW_FORM_implicit_const } attribute_spec_t; // TODO: Store this in a more space-sensitive way typedef struct { std::vector entries; std::vector> specs; } abbrev_table_t; abbrev_table_t abbrev_table; // function signatures void parse_debuginfo_section(const void *file); void parse_aranges_section(const void *file, uint64_t offset); uint64_t get_alignment_of_section(const void *file, const char *name); uint64_t get_offset_of_section(const void *file, const char *name); void parse_section_names(const void *file, Elf64_Shdr* table, int n, int strtab_index); void print_header(Elf64_Ehdr *header); // TODO: Deal with cases where size of LEB128 > word size inline int decode_leb128(uint8_t *src, uint64_t *dest) { *dest = 0; int shift = 0; uint8_t val; do { val = *src++; *dest |= (val & 0x7f) << shift; shift += 7; } while (val & 0x80); // return the number of bytes that we should move the src pointer return shift / 8 + (shift % 8 != 0); } // TODO: Deal with cases where size of LEB128 > word size inline int decode_sleb128(uint8_t *src, int64_t *dest) { *dest = 0; int shift = 0; uint8_t val; do { val = *src++; *dest |= (val & 0x7f) << shift; shift += 7; } while (val & 0x80); // deal with signedness if ((shift < 64) && (val & 0x40)) for (int i = 63; i < shift; ++i) *dest |= (1 << shift); // sign extend // return the number of bytes that we should move the src pointer return shift / 8 + (shift % 8 != 0); } // globals uint64_t cu_header_offset = 0; void parse_debuginfo_section(const void *file) { char *p = (char*)file; uint64_t debuginfo_offset = get_offset_of_section(file, ".debug_info"); fprintf(stdout, "Offset of .debug_info section: %d\n", debuginfo_offset); fprintf(stdout, "Offset of CUH in section: %d\n", cu_header_offset); p += debuginfo_offset + cu_header_offset; compilation_unit_header_t* cuh = (compilation_unit_header_t*)p; fprintf(stdout, "CUH length: %#x\n", cuh->unit_length); fprintf(stdout, "CUH version: %d\n", cuh->version); // TODO: multifile dwarf fprintf(stdout, "CUH unit_type: %d (%s)\n", cuh->unit_type, (cuh->unit_type == DW_UT_compile ? STRINGIFY(DW_UT_compile) : "?")); fprintf(stdout, "CUH address_size: %d\n", cuh->address_size); fprintf(stdout, "CUH debug_abbrev_offset: %d\n", cuh->debug_abbrev_offset); uint64_t abbrev_section_offset = get_offset_of_section(file, ".debug_abbrev"); // Pointer to .debug_abbrev section memory for current CU char *cu_abbrev = ((char*)file) + abbrev_section_offset + cuh->debug_abbrev_offset; fprintf(stdout, "ABBREV for this compilation unit should start at: %x\n", abbrev_section_offset + cuh->debug_abbrev_offset); // begin parsing proper uint64_t abbrev_code; uint64_t abbrev_tag; // TODO: Error handling do { cu_abbrev += decode_leb128((uint8_t*)cu_abbrev, &abbrev_code); // The entries for a compilation unit end with a 0-byte abbrev code if (!abbrev_code) break; cu_abbrev += decode_leb128((uint8_t*)cu_abbrev, &abbrev_tag); bool children = *cu_abbrev++; fprintf(stdout, " %d %s, children = %s\n", abbrev_code, dwarf_get_TAG_name(abbrev_tag), children ? "yes" : "no"); abbrev_table.entries.push_back({abbrev_code, (uint32_t)abbrev_tag, children}); std::vector attr_specs = {}; // for this code uint64_t attrib_name; uint64_t attrib_form; uint64_t implicit_const = 0; // TODO: Error handling do { cu_abbrev += decode_leb128((uint8_t*)cu_abbrev, &attrib_name); cu_abbrev += decode_leb128((uint8_t*)cu_abbrev, &attrib_form); if (attrib_form == DW_FORM_implicit_const) { cu_abbrev += decode_leb128((uint8_t*)cu_abbrev, &implicit_const); fprintf(stdout, "\t%-26s %s value: %d\n", dwarf_get_AT_name(attrib_name), dwarf_get_FORM_name(attrib_form), implicit_const); } else if (!attrib_name && !attrib_form) fprintf(stdout, "\t0\n"); else fprintf(stdout, "\t%-26s %s\n", dwarf_get_AT_name(attrib_name), dwarf_get_FORM_name(attrib_form)); if (attrib_name && attrib_form) { attr_specs.push_back({attrib_name, attrib_form, implicit_const}); } } while (attrib_name && attrib_form); abbrev_table.specs.push_back(attr_specs); } while (true); p += sizeof(compilation_unit_header_t); fprintf(stdout, "==================================\n"); uint64_t code; fprintf(stdout, ".debug_info offset for first DIE: %#x\n", ((uint64_t)p - (uint64_t)file)); // TODO: 32bit size // TODO: unit_length is supposed to be 4/12 Bytes for (; ((uint64_t)p - (uint64_t)cuh) < cuh->unit_length + sizeof(compilation_unit_header_t) - 12;) { p += decode_leb128((uint8_t*)p, &code); if (!code) // null entry, skip and continue as normal continue; fprintf(stdout, "%d (%s)\n", code, dwarf_get_TAG_name(abbrev_table.entries[code-1].abbrev_tag)); for (size_t i = 0; i < abbrev_table.specs[code-1].size(); ++i) { fprintf(stdout, "\t%s\t\t", dwarf_get_AT_name(abbrev_table.specs[code-1][i].name)); // Get the desired value according to FORM switch (abbrev_table.specs[code-1][i].form) { case DW_FORM_addr: // 0x01 { // TODO: 32bit addresses uint64_t addr_value = *(uint64_t*)p; p+= 8; fprintf(stdout, "\t%#x\n", addr_value); } break; case DW_FORM_block2: // 0x03 { uint16_t length = *(uint16_t*)p; p += 2; fprintf(stdout, "\t%d byte sized data block\n", length); p += length; } break; case DW_FORM_block4: // 0x04 { uint32_t length = *(uint32_t*)p; p += 4; fprintf(stdout, "\t%d byte sized data block\n", length); p += length; } break; case DW_FORM_data2: // 0x05 { uint16_t data = *(uint16_t*)p; p += 2; fprintf(stdout, "\t%#x\n", data); } break; case DW_FORM_data4: // 0x06 { uint32_t data = *(uint32_t*)p; p += 4; fprintf(stdout, "\t%#x\n", data); } break; case DW_FORM_data8: // 0x07 { uint64_t data = *(uint64_t*)p; p += 8; fprintf(stdout, "\t%#x\n", data); } break; case DW_FORM_string: // 0x08 { // String is inside the .debug_info, just read it and advance pointer past null terminator fprintf(stdout, "\t%s\n", p); while (*p++); } break; case DW_FORM_block: // 0x09 { uint64_t length = 0; p += decode_leb128((uint8_t*)p, &length); fprintf(stdout, "\t%d byte sized data block\n", length); p += length; while (*p++); } break; case DW_FORM_block1: // 0x0a { uint8_t length = *(uint8_t*)p; fprintf(stdout, "\t%d byte sized data block\n", length); p += length; while (*p++); } break; case DW_FORM_data1: // 0x0b { uint8_t data = *(uint8_t*)p; p += 1; if (abbrev_table.specs[code-1][i].name == DW_AT_language) fprintf(stdout, "\t%s\n", dwarf_get_LANG_name(data)); else fprintf(stdout, "\t%#x\n", data); } break; case DW_FORM_flag: // 0x0c { uint8_t value = *(uint8_t*)p; p += 1; fprintf(stdout, "\t%d\n", value); } break; case DW_FORM_sdata: // 0x0d { int64_t value; p += decode_sleb128((uint8_t*)p, &value); fprintf(stdout, "\t%d\n", value); } break; case DW_FORM_strp: // 0x0e { // TODO: 32 addresses // read offset into .debug_str of desired string uint64_t str_offset = *(uint64_t*)p; p+= 8; uint64_t section_offset = get_offset_of_section(file, ".debug_str"); char *string = (char*)file + section_offset + str_offset; fprintf(stdout, "\t%s\n", string); } break; case DW_FORM_udata: // 0x0f { uint64_t value; p += decode_leb128((uint8_t*)p, &value); fprintf(stdout, "\t%d\n", value); } break; case DW_FORM_ref_addr: // 0x10 { // TODO: dwarf32 uint64_t offset = *(uint64_t*)p; p += 8; // TODO: fetch the actual value fprintf(stdout, "\toffset %#x\n", offset); } break; case DW_FORM_ref1: // 0x11 { uint8_t offset = *(uint8_t*)p; p += 1; // TODO: fetch the actual value fprintf(stdout, "\toffset %#x\n", offset); } break; case DW_FORM_ref2: // 0x12 { uint16_t offset = *(uint16_t*)p; p += 2; // TODO: fetch the actual value fprintf(stdout, "\toffset %#x\n", offset); } break; case DW_FORM_ref4: // 0x13 { uint32_t offset = *(uint32_t*)p; p += 4; // TODO: fetch the actual value fprintf(stdout, "\toffset %#x\n", offset); } break; case DW_FORM_ref8: // 0x14 { uint64_t offset = *(uint64_t*)p; p += 8; // TODO: fetch the actual value fprintf(stdout, "\t%#x\n", offset); } break; case DW_FORM_ref_udata: // 0x15 { uint64_t offset; p += decode_leb128((uint8_t*)p, &offset); // TODO: fetch the actual value fprintf(stdout, "\t%#x\n", offset); } break; case DW_FORM_indirect: // 0x16 { uint64_t form; p += decode_leb128((uint8_t*)p, &form); // TODO: fetch the actual value fprintf(stdout, "\tTODO\n"); } break; case DW_FORM_sec_offset: // 0x17 { // TODO: 32 addresses // TODO: read more about this. Could both files exist at the same time? // NOTE: objdump does not seem to try reading the string, instead outputs the pointer value // read offset into .debug_rnglists or .debug_loclists of desired string uint64_t str_offset = *(uint64_t*)p; p+= 8; uint64_t section_offset = get_offset_of_section(file, ".debug_rnglists"); char *string = (char*)file + section_offset + str_offset; fprintf(stdout, "\t%s\n", string); } break; case DW_FORM_exprloc: // 0x18 { uint64_t length; p += decode_leb128((uint8_t*)p, &length); // TODO: save this data p += length; fprintf(stdout, "(%d bytes data)\n", length); } break; case DW_FORM_flag_present: // 0x19 { // Nothing to read here, flag_present just indicates that a flag is ON. We output '1' just like objdump does fprintf(stdout, "\t1\n"); } break; case DW_FORM_strx: // 0x1a { uint64_t offset; p += decode_leb128((uint8_t*)p, &offset); uint64_t section_offset = get_offset_of_section(file, ".debug_str_offsets"); fprintf(stdout, "\t%s\n", (char*)file + section_offset + offset); } break; case DW_FORM_implicit_const: // 0x21 { // We already have the value, do not advance the pointer at all fprintf(stdout, "\t%#x\n", abbrev_table.specs[code-1][i].value); } break; case DW_FORM_line_strp: // 0x1f { // TODO: 32 addresses // read offset into .debug_line_str of desired string uint64_t str_offset = *(uint64_t*)p; p+= 8; uint64_t section_offset = get_offset_of_section(file, ".debug_line_str"); char *string = (char*)file + section_offset + str_offset; fprintf(stdout, "\t%s\n", string); } break; default: fprintf(stdout, "\tTODO\n"); break; } } fprintf(stdout, "\n"); } } void parse_aranges_section(const void *file, uint64_t offset) { debug_aranges_header_t *headerinfo = (debug_aranges_header_t*)((char*)file + offset); fprintf(stdout, ".debug_aranges info:\n"); fprintf(stdout, "\tunit_length: %d\n", headerinfo->unit_length); fprintf(stdout, "\tversion: %d\n", headerinfo->version); fprintf(stdout, "\tdebug_info_offset: %d\n", headerinfo->debug_info_offset); fprintf(stdout, "\taddress_size: %d\n", headerinfo->address_size); fprintf(stdout, "\tsegment_selector_size: %d\n", headerinfo->segment_selector_size); // TODO: Deal with more than one address cu_header_offset = headerinfo->debug_info_offset; fprintf(stdout, "\tADDRESS\tLENGTH:\n"); char *p = (char*)headerinfo; int padding_boundary = headerinfo->segment_selector_size + headerinfo->address_size * 2; int curr_padding = sizeof(debug_aranges_header_t); while (curr_padding % padding_boundary) ++curr_padding; p = p + curr_padding; while (1) { // TODO: dwarf32 if (headerinfo->address_size != 8) { fprintf(stderr, "Address_size != 8 not implemented yet"); exit(1); } if (headerinfo->segment_selector_size) { fprintf(stderr, "Segment selectors are not implemented yet"); exit(1); } uint64_t address = *((uint64_t*)p); fprintf(stdout, "\t%x", address); p = p + headerinfo->address_size; uint64_t length = *((uint64_t*)p); fprintf(stdout, "\t%x\n", length); p = p + headerinfo->address_size; if (!address && !length) break; } } uint64_t get_alignment_of_section(const void *file, const char *name) { Elf64_Ehdr *elf_header = (Elf64_Ehdr *)file; Elf64_Shdr *elf_section_table = (Elf64_Shdr *)((char*)file + elf_header->e_shoff); Elf64_Half strtab_index = elf_header->e_shstrndx; Elf64_Shdr* strtable_header = (elf_section_table + strtab_index); char *strings = (char*)file + (ptrdiff_t)strtable_header->sh_offset; Elf64_Half nheaders = elf_header->e_shnum; for (int i = 0; i < nheaders; ++i) { if (elf_section_table[i].sh_type == SHT_NULL) continue; Elf64_Off str_idx = elf_section_table[i].sh_name; char *str = &strings[str_idx]; if (!strcmp(str, name)) return elf_section_table[i].sh_addralign; } return -1; } uint64_t get_offset_of_section(const void *file, const char *name) { Elf64_Ehdr *elf_header = (Elf64_Ehdr *)file; Elf64_Shdr *elf_section_table = (Elf64_Shdr *)((char*)file + elf_header->e_shoff); Elf64_Half strtab_index = elf_header->e_shstrndx; Elf64_Shdr* strtable_header = (elf_section_table + strtab_index); char *strings = (char*)file + (ptrdiff_t)strtable_header->sh_offset; Elf64_Half nheaders = elf_header->e_shnum; for (int i = 0; i < nheaders; ++i) { if (elf_section_table[i].sh_type == SHT_NULL) continue; Elf64_Off str_idx = elf_section_table[i].sh_name; char *str = &strings[str_idx]; if (!strcmp(str, name)) return elf_section_table[i].sh_offset; } return -1; } void parse_section_names(const void *file, Elf64_Shdr* table, int n, int strtab_index) { Elf64_Shdr* strtable = (table + strtab_index); char *strings = (char*)file + (ptrdiff_t)strtable->sh_offset; for (int i = 0; i < n; ++i) { if (table[i].sh_type == SHT_NULL || table[i].sh_type == SHT_SHLIB) continue; Elf64_Off str_idx = table[i].sh_name; fprintf(stdout, "Section #%d: %s\n", i, &strings[str_idx]); } } void print_header(Elf64_Ehdr *header) { fprintf(stdout, "ELF identification:\n"); fprintf(stdout, "\tEL_MAG0: %#13x\n", header->e_ident[0]); fprintf(stdout, "\tEL_MAG1: %10c\n", header->e_ident[1]); fprintf(stdout, "\tEL_MAG2: %10c\n", header->e_ident[2]); fprintf(stdout, "\tEL_MAG3: %10c\n", header->e_ident[3]); fprintf(stdout, "\tEL_CLASS: %9d (ELFCLASS%d)\n", header->e_ident[4], header->e_ident[4] * 32); fprintf(stdout, "\tEL_DATA: %10d (ELFDATA2%cSB)\n", header->e_ident[5], header->e_ident[5] == 1 ? 'L' : 'M'); fprintf(stdout, "\tEL_VERSION: %7d (%s)\n", header->e_ident[6], header->e_ident[6] == 1 ? "EV_CURRENT" : "?"); fprintf(stdout, "\tEL_OSABI: %9d (ELFOSABI_%s)\n", header->e_ident[7], header->e_ident[7] == 0 ? "SYSV" : header->e_ident[7] == 1 ? "HPUX" : "STANDALONE"); fprintf(stdout, "\tEL_ABIVERSION: %4d\n", header->e_ident[8]); fprintf(stdout, "\tEL_PAD: /* Padding bytes */\n"); fprintf(stdout, "\tEL_NIDENT: %8d\n", header->e_ident[15]); fprintf(stdout, "Object file type: "); #define ET_CASE(name) case name: fprintf(stdout, "(" #name ")\n"); break switch (header->e_type) { ET_CASE(ET_NONE); ET_CASE(ET_REL); ET_CASE(ET_EXEC); ET_CASE(ET_DYN); ET_CASE(ET_CORE); ET_CASE(ET_LOOS); ET_CASE(ET_HIOS); ET_CASE(ET_LOPROC); ET_CASE(ET_HIPROC); } #undef ET_CASE fprintf(stdout, "Machine type: %#x\n", header->e_machine); fprintf(stdout, "Object file version: %d (%s)\n", header->e_version, header->e_version == 1 ? "EV_CURRENT" : "?"); fprintf(stdout, "Entry point address: %#x:\n", header->e_entry); fprintf(stdout, "Program header offset: %d:\n", header->e_phoff); fprintf(stdout, "Section header offset: %d:\n", header->e_shoff); fprintf(stdout, "Processor-specific flags: %#x\n", header->e_flags); fprintf(stdout, "ELF header size: %d\n", header->e_ehsize); fprintf(stdout, "Program header entry size: %d\n", header->e_phentsize); fprintf(stdout, "# of program header entries: %d\n", header->e_phnum); fprintf(stdout, "Size of section header entry: %d\n", header->e_shentsize); fprintf(stdout, "# of section header entries: %d\n", header->e_shnum); fprintf(stdout, "Section name string table index: %d\n", header->e_shstrndx); } int main(int argc, char *argv[]) { if (argc < 2) { fprintf(stdout, "Usage: pesticide [ELF binary path]\n"); return 1; } fprintf(stdout, "Trying to read %s\n", argv[1]); int fd = open(argv[1], O_RDONLY); if (fd < 0) { fprintf(stdout, "Error trying to read %s\n", argv[1]); return 1; } struct stat st; fstat(fd, &st); fprintf(stdout, "MMapping %d bytes\n", st.st_size); void *addr = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); if (addr == MAP_FAILED) { fprintf(stdout, "Error MMapping %s\n", argv[1]); return 1; } Elf64_Ehdr *elf_header = (Elf64_Ehdr *)addr; print_header(elf_header); Elf64_Shdr *elf_section_table = (Elf64_Shdr *)((char*)addr + elf_header->e_shoff); parse_section_names(addr, elf_section_table, elf_header->e_shnum, elf_header->e_shstrndx); fprintf(stdout, "Offset of section .debug_aranges is %d\n", get_offset_of_section(addr, ".debug_aranges")); fprintf(stdout, "Byte alignment of section .debug_aranges is %d\n", get_alignment_of_section(addr, ".debug_aranges")); parse_aranges_section(addr, get_offset_of_section(addr, ".debug_aranges")); parse_debuginfo_section(addr); }