feat: initial commit

This commit is contained in:
Phireh 2023-09-23 20:17:03 +02:00
commit 5ec36f31e5
Signed by: Phireh
GPG key ID: DD169F1BA658A5E5
6 changed files with 1851 additions and 0 deletions

2
.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
# Executable
pesticide

2
Makefile Normal file
View file

@ -0,0 +1,2 @@
pesticide: main.cpp enums.h enum_names.h
g++ -o pesticide main.cpp -Werror -Wextra -pedantic -O0 -g -gdwarf64

1064
enum_names.h Normal file

File diff suppressed because it is too large Load diff

309
enums.h Normal file
View file

@ -0,0 +1,309 @@
#pragma once
// Table 7.4, page 207
typedef enum {
DW_CHILDREN_no = 0x00,
DW_CHILDREN_yes = 0x01
} children_encoding_t;
// Table 7.3, page 204-206
typedef enum {
DW_TAG_array_type = 0x01,
DW_TAG_class_type = 0x02,
DW_TAG_entry_point = 0x03,
DW_TAG_enumeration_type = 0x04,
DW_TAG_formal_parameter = 0x05,
DW_TAG_imported_declaration = 0x08,
DW_TAG_label = 0x0a,
DW_TAG_lexical_block = 0x0b,
DW_TAG_member = 0x0d,
DW_TAG_pointer_type = 0x0f,
DW_TAG_reference_type = 0x10,
DW_TAG_compile_unit = 0x11,
DW_TAG_string_type = 0x12,
DW_TAG_structure_type = 0x13,
DW_TAG_subroutine_type = 0x15,
DW_TAG_typedef = 0x16,
DW_TAG_union_type = 0x17,
DW_TAG_unspecified_parameters = 0x18,
DW_TAG_variant = 0x19,
DW_TAG_common_block = 0x1a,
DW_TAG_common_inclusion = 0x1b,
DW_TAG_inheritance = 0x1c,
DW_TAG_inlined_subroutine = 0x1d,
DW_TAG_module = 0x1e,
DW_TAG_ptr_to_member_type = 0x1f,
DW_TAG_set_type = 0x20,
DW_TAG_subrange_type = 0x21,
DW_TAG_with_stmt = 0x22,
DW_TAG_access_declaration = 0x23,
DW_TAG_base_type = 0x24,
DW_TAG_catch_block = 0x25,
DW_TAG_const_type = 0x26,
DW_TAG_constant = 0x27,
DW_TAG_enumerator = 0x28,
DW_TAG_file_type = 0x29,
DW_TAG_friend = 0x2a,
DW_TAG_namelist = 0x2b,
DW_TAG_namelist_item = 0x2c,
DW_TAG_packed_type = 0x2d,
DW_TAG_subprogram = 0x2e,
DW_TAG_template_type_parameter = 0x2f,
DW_TAG_template_value_parameter = 0x30,
DW_TAG_thrown_type = 0x31,
DW_TAG_try_block = 0x32,
DW_TAG_variant_part = 0x33,
DW_TAG_variable = 0x34,
DW_TAG_volatile_type = 0x35,
DW_TAG_dwarf_procedure = 0x36,
DW_TAG_restrict_type = 0x37,
DW_TAG_interface_type = 0x38,
DW_TAG_namespace = 0x39,
DW_TAG_imported_module = 0x3a,
DW_TAG_unspecified_type = 0x3b,
DW_TAG_partial_unit = 0x3c,
DW_TAG_imported_unit = 0x3d,
DW_TAG_condition = 0x3f,
DW_TAG_shared_type = 0x40,
DW_TAG_type_unit = 0x41,
DW_TAG_rvalue_reference_type = 0x42,
DW_TAG_template_alias = 0x43,
DW_TAG_coarray_type = 0x44,
DW_TAG_generic_subrange = 0x45,
DW_TAG_dynamic_type = 0x46,
DW_TAG_atomic_type = 0x47,
DW_TAG_call_site = 0x48,
DW_TAG_call_site_parameter = 0x49,
DW_TAG_skeleton_unit = 0x4a,
DW_TAG_immutable_type = 0x4b,
DW_TAG_lo_user = 0x4080,
DW_TAG_hi_user = 0xffff
} tag_type_t;
// Table 7.5, page 207-212
typedef enum {
DW_AT_sibling = 0x01,
DW_AT_location = 0x02,
DW_AT_name = 0x03,
DW_AT_ordering = 0x09,
DW_AT_byte_size = 0x0b,
DW_AT_bit_size = 0x0d,
DW_AT_stmt_list = 0x10,
DW_AT_low_pc = 0x11,
DW_AT_high_pc = 0x12,
DW_AT_language = 0x13,
DW_AT_discr = 0x15,
DW_AT_discr_value = 0x16,
DW_AT_visibility = 0x17,
DW_AT_import = 0x18,
DW_AT_string_length = 0x19,
DW_AT_common_reference = 0x1a,
DW_AT_comp_dir = 0x1b,
DW_AT_const_value = 0x1c,
DW_AT_containing_type = 0x1d,
DW_AT_default_value = 0x1e,
DW_AT_inline = 0x20,
DW_AT_is_optional = 0x21,
DW_AT_lower_bound = 0x22,
DW_AT_producer = 0x25,
DW_AT_prototyped = 0x27,
DW_AT_return_addr = 0x2a,
DW_AT_start_scope = 0x2c,
DW_AT_bit_stride = 0x2e,
DW_AT_upper_bound = 0x2f,
DW_AT_abstract_origin = 0x31,
DW_AT_accessibility = 0x32,
DW_AT_address_class = 0x33,
DW_AT_artificial = 0x34,
DW_AT_base_types = 0x35,
DW_AT_calling_convention = 0x36,
DW_AT_count = 0x37,
DW_AT_data_member_location = 0x38,
DW_AT_decl_column = 0x39,
DW_AT_decl_file = 0x3a,
DW_AT_decl_line = 0x3b,
DW_AT_declaration = 0x3c,
DW_AT_discr_list = 0x3d,
DW_AT_encoding = 0x3e,
DW_AT_external = 0x3f,
DW_AT_frame_base = 0x40,
DW_AT_friend = 0x41,
DW_AT_identifier_case = 0x42,
DW_AT_namelist_item = 0x44,
DW_AT_priority = 0x45,
DW_AT_segment = 0x46,
DW_AT_specification = 0x47,
DW_AT_static_link = 0x48,
DW_AT_type = 0x49,
DW_AT_use_location = 0x4a,
DW_AT_variable_parameter = 0x4b,
DW_AT_virtuality = 0x4c,
DW_AT_vtable_elem_location = 0x4d,
DW_AT_allocated = 0x4e,
DW_AT_associated = 0x4f,
DW_AT_data_location = 0x50,
DW_AT_byte_stride = 0x51,
DW_AT_entry_pc = 0x52,
DW_AT_use_UTF8 = 0x53,
DW_AT_extension = 0x54,
DW_AT_ranges = 0x55,
DW_AT_trampoline = 0x56,
DW_AT_call_column = 0x57,
DW_AT_call_file = 0x58,
DW_AT_call_line = 0x59,
DW_AT_description = 0x5a,
DW_AT_binary_scale = 0x5b,
DW_AT_decimal_scale = 0x5c,
DW_AT_small = 0x5d,
DW_AT_decimal_sign = 0x5e,
DW_AT_digit_count = 0x5f,
DW_AT_picture_string = 0x60,
DW_AT_mutable = 0x61,
DW_AT_threads_scaled = 0x62,
DW_AT_explicit = 0x63,
DW_AT_object_pointer = 0x64,
DW_AT_endianity = 0x65,
DW_AT_elemental = 0x66,
DW_AT_pure = 0x67,
DW_AT_recursive = 0x68,
DW_AT_signature = 0x69,
DW_AT_main_subprogram = 0x6a,
DW_AT_data_bit_offset = 0x6b,
DW_AT_const_expr = 0x6c,
DW_AT_enum_class = 0x6d,
DW_AT_linkage_name = 0x6e,
DW_AT_string_length_bit_size = 0x6f,
DW_AT_string_length_byte_size = 0x70,
DW_AT_rank = 0x71,
DW_AT_str_offsets_base = 0x72,
DW_AT_addr_base = 0x73,
DW_AT_rnglists_base = 0x74,
DW_AT_dwo_name = 0x76,
DW_AT_reference = 0x77,
DW_AT_rvalue_reference = 0x78,
DW_AT_macros = 0x79,
DW_AT_call_all_calls = 0x7a,
DW_AT_call_all_source_calls = 0x7b,
DW_AT_call_all_tail_calls = 0x7c,
DW_AT_call_return_pc = 0x7d,
DW_AT_call_value = 0x7e,
DW_AT_call_origin = 0x7f,
DW_AT_call_parameter = 0x80,
DW_AT_call_pc = 0x81,
DW_AT_call_tail_call = 0x82,
DW_AT_call_target = 0x83,
DW_AT_call_target_clobbered = 0x84,
DW_AT_call_data_location = 0x85,
DW_AT_call_data_value = 0x86,
DW_AT_noreturn = 0x87,
DW_AT_alignment = 0x88,
DW_AT_export_symbols = 0x89,
DW_AT_deleted = 0x8a,
DW_AT_defaulted = 0x8b,
DW_AT_loclists_base = 0x8c,
DW_AT_lo_user = 0x2000,
DW_AT_hi_user = 0x3ffff
} attribute_encoding_t;
typedef enum {
DW_FORM_addr = 0x01,
DW_FORM_block2 = 0x03,
DW_FORM_block4 = 0x04,
DW_FORM_data2 = 0x05,
DW_FORM_data4 = 0x06,
DW_FORM_data8 = 0x07,
DW_FORM_string = 0x08,
DW_FORM_block = 0x09,
DW_FORM_block1 = 0x0a,
DW_FORM_data1 = 0x0b,
DW_FORM_flag = 0x0c,
DW_FORM_sdata = 0x0d,
DW_FORM_strp = 0x0e,
DW_FORM_udata = 0x0f,
DW_FORM_ref_addr = 0x10,
DW_FORM_ref1 = 0x11,
DW_FORM_ref2 = 0x12,
DW_FORM_ref4 = 0x13,
DW_FORM_ref8 = 0x14,
DW_FORM_ref_udata = 0x15,
DW_FORM_indirect = 0x16,
DW_FORM_sec_offset = 0x17,
DW_FORM_exprloc = 0x18,
DW_FORM_flag_present = 0x19,
DW_FORM_strx = 0x1a,
DW_FORM_addrx = 0x1b,
DW_FORM_ref_sup4 = 0x1c,
DW_FORM_strp_sup = 0x1d,
DW_FORM_data16 = 0x1e,
DW_FORM_line_strp = 0x1f,
DW_FORM_ref_sig8 = 0x20,
DW_FORM_implicit_const = 0x21,
DW_FORM_loclistx = 0x22,
DW_FORM_rnglistx = 0x23,
DW_FORM_ref_sup8 = 0x24,
DW_FORM_strx1 = 0x25,
DW_FORM_strx2 = 0x26,
DW_FORM_strx3 = 0x27,
DW_FORM_strx4 = 0x28,
DW_FORM_addrx1 = 0x29,
DW_FORM_addrx2 = 0x2a,
DW_FORM_addrx3 = 0x2b,
DW_FORM_addrx4 = 0x2c,
} attribute_form_t;
typedef enum {
DW_LANG_C89 = 0x01,
DW_LANG_C = 0x02,
DW_LANG_Ada83 = 0x03,
DW_LANG_C_plus_plus = 0x04,
DW_LANG_Cobol74 = 0x05,
DW_LANG_Cobol85 = 0x06,
DW_LANG_Fortran77 = 0x07,
DW_LANG_Fortran90 = 0x08,
DW_LANG_Pascal83 = 0x09,
DW_LANG_Modula2 = 0x0a,
DW_LANG_Java = 0x0b,
DW_LANG_C99 = 0x0c,
DW_LANG_Ada95 = 0x0d,
DW_LANG_Fortran95 = 0x0e,
DW_LANG_PLI = 0x0f,
DW_LANG_ObjC = 0x10,
DW_LANG_ObjC_plus_plus = 0x11,
DW_LANG_UPC = 0x12,
DW_LANG_D = 0x13,
DW_LANG_Python = 0x14,
DW_LANG_OpenCL = 0x15,
DW_LANG_Go = 0x16,
DW_LANG_Modula3 = 0x17,
DW_LANG_Haskell = 0x18,
DW_LANG_C_plus_plus_03 = 0x19,
DW_LANG_C_plus_plus_11 = 0x1a,
DW_LANG_OCaml = 0x1b,
DW_LANG_Rust = 0x1c,
DW_LANG_C11 = 0x1d,
DW_LANG_Swift = 0x1e,
DW_LANG_Julia = 0x1f,
DW_LANG_Dylan = 0x20,
DW_LANG_C_plus_plus_14 = 0x21,
DW_LANG_Fortran03 = 0x22,
DW_LANG_Fortran08 = 0x23,
DW_LANG_RenderScript = 0x24,
DW_LANG_BLISS = 0x25,
DW_LANG_lo_user = 0x8000,
DW_LANG_hi_user = 0xffff,
} language_type_t;
// Table 7.2, page 199
typedef enum {
DW_UT_compile = 0x01,
DW_UT_type = 0x02,
DW_UT_partial = 0x03,
DW_UT_skeleton = 0x04,
DW_UT_split_compile = 0x05,
DW_UT_split_type = 0x06,
DW_UT_lo_user = 0x80,
DW_UT_hi_user = 0xff,
} unit_header_types_t;

470
main.cpp Normal file
View file

@ -0,0 +1,470 @@
#include <stdio.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <fcntl.h>
#include <elf.h>
#include <cstddef>
#include <cstring>
#include <cstdlib>
#include <assert.h>
#include "enums.h"
#include "enum_names.h"
// STD
// TODO: maybe do not use this
#include <vector>
#define _STRINGIFY(symbol) #symbol
#define STRINGIFY(symbol) _STRINGIFY(symbol)
// TODO: dwarf32
// page 200
typedef struct __attribute__((packed)) {
uint32_t preface; // should be 0xFFFFFFFF for 64b
uint64_t unit_length;
uint16_t version; // should be 5
uint8_t unit_type; // should be DW_UT_compile or DW_UT_partial
uint8_t address_size; // should be 8 for 64b systems
uint64_t debug_abbrev_offset;
} compilation_unit_header_t;
// TODO: dwarf32
typedef struct __attribute__((packed)) {
uint32_t preface; // should be 0xFFFFFFFF for 64b
uint64_t unit_length;
uint16_t version;
uint64_t debug_info_offset;
uint8_t address_size; // should be 8 for 64b systems
uint8_t segment_selector_size;
} debug_aranges_header_t;
static_assert(sizeof(debug_aranges_header_t) == 24);
typedef struct {
uint64_t abbrev_code; // ?
uint32_t abbrev_tag; // DW_TAG_*
uint8_t children; // DW_CHILDREN_*
} debug_abbrev_entry_t;
typedef struct {
uint64_t name; // DW_AT_*
uint64_t form; // DW_FORM_*
uint64_t value; // if DW_FORM_implicit_const
} attribute_spec_t;
// TODO: Store this in a more space-sensitive way
typedef struct {
std::vector<debug_abbrev_entry_t> entries;
std::vector<std::vector<attribute_spec_t>> specs;
} abbrev_table_t;
abbrev_table_t abbrev_table;
// function signatures
void parse_debuginfo_section(const void *file);
void parse_aranges_section(const void *file, uint64_t offset);
uint64_t get_alignment_of_section(const void *file, const char *name);
uint64_t get_offset_of_section(const void *file, const char *name);
void parse_section_names(const void *file, Elf64_Shdr* table, int n, int strtab_index);
void print_header(Elf64_Ehdr *header);
// TODO: Deal with cases where size of LEB128 > word size
inline int decode_leb128(uint8_t *src, uint64_t *dest)
{
*dest = 0;
int shift = 0;
uint8_t val;
do {
val = *src++;
*dest |= (val & 0x7f) << shift;
shift += 7;
} while (val & 0x80);
// return the number of bytes that we should move the src pointer
return shift / 8 + (shift % 8 != 0);
}
// globals
uint64_t cu_header_offset = 0;
void parse_debuginfo_section(const void *file)
{
char *p = (char*)file;
uint64_t debuginfo_offset = get_offset_of_section(file, ".debug_info");
fprintf(stdout, "Offset of .debug_info section: %d\n", debuginfo_offset);
fprintf(stdout, "Offset of CUH in section: %d\n", cu_header_offset);
p += debuginfo_offset + cu_header_offset;
compilation_unit_header_t* cuh = (compilation_unit_header_t*)p;
fprintf(stdout, "CUH length: %#x\n", cuh->unit_length);
fprintf(stdout, "CUH version: %d\n", cuh->version);
// TODO: multifile dwarf
fprintf(stdout, "CUH unit_type: %d (%s)\n", cuh->unit_type, (cuh->unit_type == DW_UT_compile ? STRINGIFY(DW_UT_compile) : "?"));
fprintf(stdout, "CUH address_size: %d\n", cuh->address_size);
fprintf(stdout, "CUH debug_abbrev_offset: %d\n", cuh->debug_abbrev_offset);
uint64_t abbrev_section_offset = get_offset_of_section(file, ".debug_abbrev");
// Pointer to .debug_abbrev section memory for current CU
char *cu_abbrev = ((char*)file) + abbrev_section_offset + cuh->debug_abbrev_offset;
fprintf(stdout, "ABBREV for this compilation unit should start at: %x\n", abbrev_section_offset + cuh->debug_abbrev_offset);
// begin parsing proper
uint64_t abbrev_code;
uint64_t abbrev_tag;
// TODO: Error handling
do {
cu_abbrev += decode_leb128((uint8_t*)cu_abbrev, &abbrev_code);
// The entries for a compilation unit end with a 0-byte abbrev code
if (!abbrev_code)
break;
cu_abbrev += decode_leb128((uint8_t*)cu_abbrev, &abbrev_tag);
bool children = *cu_abbrev++;
fprintf(stdout, " %d %s, children = %s\n", abbrev_code, dwarf_get_TAG_name(abbrev_tag), children ? "yes" : "no");
abbrev_table.entries.push_back({abbrev_code, (uint32_t)abbrev_tag, children});
std::vector<attribute_spec_t> attr_specs = {}; // for this code
uint64_t attrib_name;
uint64_t attrib_form;
uint64_t implicit_const = 0;
// TODO: Error handling
do {
cu_abbrev += decode_leb128((uint8_t*)cu_abbrev, &attrib_name);
cu_abbrev += decode_leb128((uint8_t*)cu_abbrev, &attrib_form);
if (attrib_form == DW_FORM_implicit_const)
{
cu_abbrev += decode_leb128((uint8_t*)cu_abbrev, &implicit_const);
fprintf(stdout, "\t%-26s %s value: %d\n", dwarf_get_AT_name(attrib_name), dwarf_get_FORM_name(attrib_form), implicit_const);
}
else if (!attrib_name && !attrib_form)
fprintf(stdout, "\t0\n");
else
fprintf(stdout, "\t%-26s %s\n", dwarf_get_AT_name(attrib_name), dwarf_get_FORM_name(attrib_form));
if (attrib_name && attrib_form)
{
attr_specs.push_back({attrib_name, attrib_form, implicit_const});
}
} while (attrib_name && attrib_form);
abbrev_table.specs.push_back(attr_specs);
} while (true);
p += sizeof(compilation_unit_header_t);
fprintf(stdout, "==================================\n");
uint64_t code;
fprintf(stdout, ".debug_info offset for first DIE: %#x\n", ((uint64_t)p - (uint64_t)file));
// TODO: 32bit size
for (; ((uint64_t)p - (uint64_t)cuh) < cuh->unit_length - 12;)
{
p += decode_leb128((uint8_t*)p, &code);
fprintf(stdout, "%d (%s)\n", code, dwarf_get_TAG_name(abbrev_table.entries[code-1].abbrev_tag));
for (size_t i = 0; i < abbrev_table.specs[code-1].size(); ++i)
{
fprintf(stdout, "\t%s\t\t", dwarf_get_AT_name(abbrev_table.specs[code-1][i].name));
// Get the desired value according to FORM
switch (abbrev_table.specs[code-1][i].form)
{
case DW_FORM_addr: // 0x01
{
// TODO: 32bit addresses
uint64_t addr_value = *(uint64_t*)p;
p+= 8;
fprintf(stdout, "\t%#x\n", addr_value);
} break;
case DW_FORM_data2: // 0x05
{
uint16_t data = *(uint16_t*)p;
p += 2;
fprintf(stdout, "\t%#x\n", data);
} break;
case DW_FORM_data4: // 0x06
{
uint32_t data = *(uint32_t*)p;
p += 4;
fprintf(stdout, "\t%#x\n", data);
} break;
case DW_FORM_data8: // 0x06
{
uint64_t data = *(uint64_t*)p;
p += 8;
fprintf(stdout, "\t%#x\n", data);
} break;
case DW_FORM_string: // 0x08
{
// String is inside the .debug_info, just read it and advance pointer past null terminator
fprintf(stdout, "\t%s\n", p);
while (*p++);
} break;
case DW_FORM_data1: // 0x13
{
uint8_t data = *(uint8_t*)p;
p += 1;
if (abbrev_table.specs[code-1][i].name == DW_AT_language)
fprintf(stdout, "\t%s\n", dwarf_get_LANG_name(data));
else
fprintf(stdout, "\t%#x\n", data);
} break;
case DW_FORM_ref8:
{
uint64_t data = *(uint64_t*)p;
p += 8;
fprintf(stdout, "\t%#x\n", data);
} break;
case DW_FORM_sec_offset: // 0x17
{
// TODO: 32 addresses
// TODO: read more about this. Could both files exist at the same time?
// NOTE: objdump does not seem to try reading the string, instead outputs the pointer value
// read offset into .debug_rnglists or .debug_loclists of desired string
uint64_t str_offset = *(uint64_t*)p;
p+= 8;
uint64_t section_offset = get_offset_of_section(file, ".debug_rnglists");
char *string = (char*)file + section_offset + str_offset;
fprintf(stdout, "\t%s\n", string);
} break;
case DW_FORM_exprloc: // 0x18
{
uint64_t length;
p += decode_leb128((uint8_t*)p, &length);
// TODO: save this data
p += length;
fprintf(stdout, "(%d bytes data)\n");
} break;
case DW_FORM_flag_present:
{
// Nothing to read here, flag_present just indicates that a flag is ON. We output '1' just like objdump does
fprintf(stdout, "\t1\n");
} break;
case DW_FORM_implicit_const:
{
// We already have the value, do not advance the pointer at all
fprintf(stdout, "\t%#x\n", abbrev_table.specs[code-1][i].value);
} break;
case DW_FORM_strp: // 0x0e
{
// TODO: 32 addresses
// read offset into .debug_str of desired string
uint64_t str_offset = *(uint64_t*)p;
p+= 8;
uint64_t section_offset = get_offset_of_section(file, ".debug_str");
char *string = (char*)file + section_offset + str_offset;
fprintf(stdout, "\t%s\n", string);
} break;
case DW_FORM_line_strp: // 0x1f
{
// TODO: 32 addresses
// read offset into .debug_line_str of desired string
uint64_t str_offset = *(uint64_t*)p;
p+= 8;
uint64_t section_offset = get_offset_of_section(file, ".debug_line_str");
char *string = (char*)file + section_offset + str_offset;
fprintf(stdout, "\t%s\n", string);
} break;
default:
fprintf(stdout, "\tTODO\n");
break;
}
}
fprintf(stdout, "\n");
}
}
void parse_aranges_section(const void *file, uint64_t offset)
{
debug_aranges_header_t *headerinfo = (debug_aranges_header_t*)((char*)file + offset);
fprintf(stdout, ".debug_aranges info:\n");
fprintf(stdout, "\tunit_length: %d\n", headerinfo->unit_length);
fprintf(stdout, "\tversion: %d\n", headerinfo->version);
fprintf(stdout, "\tdebug_info_offset: %d\n", headerinfo->debug_info_offset);
fprintf(stdout, "\taddress_size: %d\n", headerinfo->address_size);
fprintf(stdout, "\tsegment_selector_size: %d\n", headerinfo->segment_selector_size);
// TODO: Deal with more than one address
cu_header_offset = headerinfo->debug_info_offset;
fprintf(stdout, "\tADDRESS\tLENGTH:\n");
char *p = (char*)headerinfo;
int padding_boundary = headerinfo->segment_selector_size + headerinfo->address_size * 2;
int curr_padding = sizeof(debug_aranges_header_t);
while (curr_padding % padding_boundary)
++curr_padding;
p = p + curr_padding;
while (1)
{
// TODO: dwarf32
if (headerinfo->address_size != 8)
{
fprintf(stderr, "Address_size != 8 not implemented yet");
exit(1);
}
if (headerinfo->segment_selector_size)
{
fprintf(stderr, "Segment selectors are not implemented yet");
exit(1);
}
uint64_t address = *((uint64_t*)p);
fprintf(stdout, "\t%x", address);
p = p + headerinfo->address_size;
uint64_t length = *((uint64_t*)p);
fprintf(stdout, "\t%x\n", length);
p = p + headerinfo->address_size;
if (!address && !length)
break;
}
}
uint64_t get_alignment_of_section(const void *file, const char *name)
{
Elf64_Ehdr *elf_header = (Elf64_Ehdr *)file;
Elf64_Shdr *elf_section_table = (Elf64_Shdr *)((char*)file + elf_header->e_shoff);
Elf64_Half strtab_index = elf_header->e_shstrndx;
Elf64_Shdr* strtable_header = (elf_section_table + strtab_index);
char *strings = (char*)file + (ptrdiff_t)strtable_header->sh_offset;
Elf64_Half nheaders = elf_header->e_shnum;
for (int i = 0; i < nheaders; ++i)
{
if (elf_section_table[i].sh_type == SHT_NULL)
continue;
Elf64_Off str_idx = elf_section_table[i].sh_name;
char *str = &strings[str_idx];
if (!strcmp(str, name))
return elf_section_table[i].sh_addralign;
}
return -1;
}
uint64_t get_offset_of_section(const void *file, const char *name)
{
Elf64_Ehdr *elf_header = (Elf64_Ehdr *)file;
Elf64_Shdr *elf_section_table = (Elf64_Shdr *)((char*)file + elf_header->e_shoff);
Elf64_Half strtab_index = elf_header->e_shstrndx;
Elf64_Shdr* strtable_header = (elf_section_table + strtab_index);
char *strings = (char*)file + (ptrdiff_t)strtable_header->sh_offset;
Elf64_Half nheaders = elf_header->e_shnum;
for (int i = 0; i < nheaders; ++i)
{
if (elf_section_table[i].sh_type == SHT_NULL)
continue;
Elf64_Off str_idx = elf_section_table[i].sh_name;
char *str = &strings[str_idx];
if (!strcmp(str, name))
return elf_section_table[i].sh_offset;
}
return -1;
}
void parse_section_names(const void *file, Elf64_Shdr* table, int n, int strtab_index)
{
Elf64_Shdr* strtable = (table + strtab_index);
char *strings = (char*)file + (ptrdiff_t)strtable->sh_offset;
for (int i = 0; i < n; ++i)
{
if (table[i].sh_type == SHT_NULL || table[i].sh_type == SHT_SHLIB)
continue;
Elf64_Off str_idx = table[i].sh_name;
fprintf(stdout, "Section #%d: %s\n", i, &strings[str_idx]);
}
}
void print_header(Elf64_Ehdr *header)
{
fprintf(stdout, "ELF identification:\n");
fprintf(stdout, "\tEL_MAG0: %#13x\n", header->e_ident[0]);
fprintf(stdout, "\tEL_MAG1: %10c\n", header->e_ident[1]);
fprintf(stdout, "\tEL_MAG2: %10c\n", header->e_ident[2]);
fprintf(stdout, "\tEL_MAG3: %10c\n", header->e_ident[3]);
fprintf(stdout, "\tEL_CLASS: %9d (ELFCLASS%d)\n", header->e_ident[4], header->e_ident[4] * 32);
fprintf(stdout, "\tEL_DATA: %10d (ELFDATA2%cSB)\n", header->e_ident[5], header->e_ident[5] == 1 ? 'L' : 'M');
fprintf(stdout, "\tEL_VERSION: %7d (%s)\n", header->e_ident[6], header->e_ident[6] == 1 ? "EV_CURRENT" : "?");
fprintf(stdout, "\tEL_OSABI: %9d (ELFOSABI_%s)\n", header->e_ident[7], header->e_ident[7] == 0 ? "SYSV" : header->e_ident[7] == 1 ? "HPUX" : "STANDALONE");
fprintf(stdout, "\tEL_ABIVERSION: %4d\n", header->e_ident[8]);
fprintf(stdout, "\tEL_PAD: /* Padding bytes */\n");
fprintf(stdout, "\tEL_NIDENT: %8d\n", header->e_ident[15]);
fprintf(stdout, "Object file type: ");
#define ET_CASE(name) case name: fprintf(stdout, "(" #name ")\n"); break
switch (header->e_type)
{
ET_CASE(ET_NONE);
ET_CASE(ET_REL);
ET_CASE(ET_EXEC);
ET_CASE(ET_DYN);
ET_CASE(ET_CORE);
ET_CASE(ET_LOOS);
ET_CASE(ET_HIOS);
ET_CASE(ET_LOPROC);
ET_CASE(ET_HIPROC);
}
#undef ET_CASE
fprintf(stdout, "Machine type: %#x\n", header->e_machine);
fprintf(stdout, "Object file version: %d (%s)\n", header->e_version, header->e_version == 1 ? "EV_CURRENT" : "?");
fprintf(stdout, "Entry point address: %#x:\n", header->e_entry);
fprintf(stdout, "Program header offset: %d:\n", header->e_phoff);
fprintf(stdout, "Section header offset: %d:\n", header->e_shoff);
fprintf(stdout, "Processor-specific flags: %#x\n", header->e_flags);
fprintf(stdout, "ELF header size: %d\n", header->e_ehsize);
fprintf(stdout, "Program header entry size: %d\n", header->e_phentsize);
fprintf(stdout, "# of program header entries: %d\n", header->e_phnum);
fprintf(stdout, "Size of section header entry: %d\n", header->e_shentsize);
fprintf(stdout, "# of section header entries: %d\n", header->e_shnum);
fprintf(stdout, "Section name string table index: %d\n", header->e_shstrndx);
}
int main(int argc, char *argv[])
{
if (argc < 2)
{
fprintf(stdout, "Usage: pesticide [ELF binary path]\n");
return 1;
}
fprintf(stdout, "Trying to read %s\n", argv[1]);
int fd = open(argv[1], O_RDONLY);
if (fd < 0)
{
fprintf(stdout, "Error trying to read %s\n", argv[1]);
return 1;
}
struct stat st;
fstat(fd, &st);
fprintf(stdout, "MMapping %d bytes\n", st.st_size);
void *addr = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
if (addr == MAP_FAILED)
{
fprintf(stdout, "Error MMapping %s\n", argv[1]);
return 1;
}
Elf64_Ehdr *elf_header = (Elf64_Ehdr *)addr;
print_header(elf_header);
Elf64_Shdr *elf_section_table = (Elf64_Shdr *)((char*)addr + elf_header->e_shoff);
parse_section_names(addr, elf_section_table, elf_header->e_shnum, elf_header->e_shstrndx);
fprintf(stdout, "Offset of section .debug_aranges is %d\n", get_offset_of_section(addr, ".debug_aranges"));
fprintf(stdout, "Byte alignment of section .debug_aranges is %d\n", get_alignment_of_section(addr, ".debug_aranges"));
parse_aranges_section(addr, get_offset_of_section(addr, ".debug_aranges"));
parse_debuginfo_section(addr);
}

4
notes.txt Normal file
View file

@ -0,0 +1,4 @@
What is the deal with DW_FORM_GNU_* and DW_FORM_LLVM_* values? They're obviously not on the DWARF5 spec
Same for DW_AT_APPLE_*
DW_AT_subscr_data, DW_AT_subscr_data, DW_AT_element_list, DW_AT_member, DW_AT_dwo_id, DW_AT_ghs_namespace_alias, DW_AT_ghs_rsm, DW_AT_ghs_frsm, DW_AT_ghs_frames, DW_AT_ghs_rso, DW_AT_ghs_subcpu, DW_AT_ghs_lbrace_line, DW_AT_sf_names, DW_AT_src_info, DW_AT_mac_info, DW_AT_src_coords, DW_AT_body_begin, DW_AT_body_end is not to be found on the PDF
DW_AT_bit_offset, DW_AT_macro_info, DW_TAG_mutable_type is deprecated