C++ 基于libbfd实现二进制加载器
阅读原文时间:2023年07月09日阅读:1

构建工具解析二进制文件,基于libbfd实现,提取符号和节

文档参考: LIB BFD, the Binary File Descriptor Library

BFD及Binary File Descriptor,为读取和解析所有二进制格式提供了一个公共接口,该库也提供了各种体系结构的编译版本。BFD库是GNU项目的一部分,在终端执行cat /usr/include/bfd.h可以看到头文件中的定义。

首先创建一个头文件,定义相关的类和函数

#include <stdint.h>
#include <string>
#include <vector>

class Binary;
class Section;
class Symbol;

引入要使用的头文件,并且定义3个类名

定义一个Binary类,作为整个二进制文件的抽象,定义Section和Symbol类,作为节和符号的抽象。

Symbol

高级语言的源代码在编译时,编译器会翻译符号,记录二进制代码和数据与符号的对应。使用readelf --syms可以查看程序的符号信息:

$ readelf --syms hello

Symbol table '.dynsym' contains 4 entries:
   Num:    Value          Size Type    Bind   Vis      Ndx Name
     0: 0000000000000000     0 NOTYPE  LOCAL  DEFAULT  UND
     1: 0000000000000000     0 FUNC    GLOBAL DEFAULT  UND puts@GLIBC_2.2.5 (2)
     2: 0000000000000000     0 FUNC    GLOBAL DEFAULT  UND __libc_start_main@GLIBC_2.2.5 (2)
     3: 0000000000000000     0 NOTYPE  WEAK   DEFAULT  UND __gmon_start__

Symbol table '.symtab' contains 61 entries:
   Num:    Value          Size Type    Bind   Vis      Ndx Name
     0: 0000000000000000     0 NOTYPE  LOCAL  DEFAULT  UND
     1: 00000000004002a8     0 SECTION LOCAL  DEFAULT    1
     2: 00000000004002c4     0 SECTION LOCAL  DEFAULT    2
     3: 00000000004002e4     0 SECTION LOCAL  DEFAULT    3
     4: 0000000000400308     0 SECTION LOCAL  DEFAULT    4
     5: 0000000000400328     0 SECTION LOCAL  DEFAULT    5
     6: 0000000000400388     0 SECTION LOCAL  DEFAULT    6
     7: 00000000004003c6     0 SECTION LOCAL  DEFAULT    7
     8: 00000000004003d0     0 SECTION LOCAL  DEFAULT    8
     9: 00000000004003f0     0 SECTION LOCAL  DEFAULT    9
    10: 0000000000400420     0 SECTION LOCAL  DEFAULT   10
    11: 0000000000401000     0 SECTION LOCAL  DEFAULT   11
    12: 0000000000401020     0 SECTION LOCAL  DEFAULT   12
    13: 0000000000401040     0 SECTION LOCAL  DEFAULT   13
    14: 00000000004011a4     0 SECTION LOCAL  DEFAULT   14
    15: 0000000000402000     0 SECTION LOCAL  DEFAULT   15
    16: 0000000000402010     0 SECTION LOCAL  DEFAULT   16
    17: 0000000000402050     0 SECTION LOCAL  DEFAULT   17
    18: 0000000000403e10     0 SECTION LOCAL  DEFAULT   18
    19: 0000000000403e18     0 SECTION LOCAL  DEFAULT   19
    20: 0000000000403e20     0 SECTION LOCAL  DEFAULT   20
    21: 0000000000403ff0     0 SECTION LOCAL  DEFAULT   21
    22: 0000000000404000     0 SECTION LOCAL  DEFAULT   22
    23: 0000000000404020     0 SECTION LOCAL  DEFAULT   23
    24: 0000000000404030     0 SECTION LOCAL  DEFAULT   24
    25: 0000000000000000     0 SECTION LOCAL  DEFAULT   25
    26: 0000000000000000     0 FILE    LOCAL  DEFAULT  ABS crtstuff.c
    27: 0000000000401080     0 FUNC    LOCAL  DEFAULT   13 deregister_tm_clones
    28: 00000000004010b0     0 FUNC    LOCAL  DEFAULT   13 register_tm_clones
    29: 00000000004010f0     0 FUNC    LOCAL  DEFAULT   13 __do_global_dtors_aux
    30: 0000000000404030     1 OBJECT  LOCAL  DEFAULT   24 completed.7325
    31: 0000000000403e18     0 OBJECT  LOCAL  DEFAULT   19 __do_global_dtors_aux_fin
    32: 0000000000401120     0 FUNC    LOCAL  DEFAULT   13 frame_dummy
    33: 0000000000403e10     0 OBJECT  LOCAL  DEFAULT   18 __frame_dummy_init_array_
    34: 0000000000000000     0 FILE    LOCAL  DEFAULT  ABS hello.c
    35: 0000000000000000     0 FILE    LOCAL  DEFAULT  ABS crtstuff.c
    36: 000000000040214c     0 OBJECT  LOCAL  DEFAULT   17 __FRAME_END__
    37: 0000000000000000     0 FILE    LOCAL  DEFAULT  ABS
    38: 0000000000403e18     0 NOTYPE  LOCAL  DEFAULT   18 __init_array_end
    39: 0000000000403e20     0 OBJECT  LOCAL  DEFAULT   20 _DYNAMIC
    40: 0000000000403e10     0 NOTYPE  LOCAL  DEFAULT   18 __init_array_start
    41: 0000000000402010     0 NOTYPE  LOCAL  DEFAULT   16 __GNU_EH_FRAME_HDR
&nbsp;&nbsp;&nbsp;&nbsp;.....

Symbol类与二进制文件的符号相关,ELF文件中的符号表包括局部和全局变量、函数、重定位表达式及对象等,此处只解析函数符号。

class Symbol {
public:
    enum SymbolType
    {
        SYM_TYPE_UKN = 0,
        SYM_TYPE_FUNC = 1, // 函数符号
    };

    Symbol() : type(SYM_TYPE_UKN), name(), addr(0) {}

    SymbolType type;  // 符号类型
    std::string name; // 符号名称
    uint64_t addr;    // 起始地址
};

如上是类的实现,SymbolType枚举体代表符号类型,其唯一的一个有效值为SYM_TYPE_FUNC,类中所定义的type是符号类型,name是符号名称,addr是函数起始地址。

Section

Section类围绕二进制文件的节进行实现。例如Linux上的ELF格式文件,由一系列二进制节组织而成。使用readelf都会显示相关的基本信息,包括节头表里的索引、节的名称和类型,除此以外,还可以查看节的虚拟地址、文件偏移及大小、节的标志等等信息。

使用该命令: $ readelf --sections --wide ./hello解析一个最简单的helloword程序:

输出

$ readelf --sections --wide ./hello
There are 29 section headers, starting at offset 0x38e0:

节头:
  [Nr] Name              Type            Address          Off    Size   ES Flg Lk Inf Al
  [ 0]                   NULL            0000000000000000 000000 000000 00      0   0  0
  [ 1] .interp           PROGBITS        00000000004002a8 0002a8 00001c 00   A  0   0  1
  [ 2] .note.ABI-tag     NOTE            00000000004002c4 0002c4 000020 00   A  0   0  4
  [ 3] .note.gnu.build-id NOTE            00000000004002e4 0002e4 000024 00   A  0   0  4
  [ 4] .gnu.hash         GNU_HASH        0000000000400308 000308 00001c 00   A  5   0  8
  [ 5] .dynsym           DYNSYM          0000000000400328 000328 000060 18   A  6   1  8
  [ 6] .dynstr           STRTAB          0000000000400388 000388 00003d 00   A  0   0  1
  [ 7] .gnu.version      VERSYM          00000000004003c6 0003c6 000008 02   A  5   0  2
  [ 8] .gnu.version_r    VERNEED         00000000004003d0 0003d0 000020 00   A  6   1  8
  [ 9] .rela.dyn         RELA            00000000004003f0 0003f0 000030 18   A  5   0  8
  [10] .rela.plt         RELA            0000000000400420 000420 000018 18  AI  5  22  8
  [11] .init             PROGBITS        0000000000401000 001000 000017 00  AX  0   0  4
  [12] .plt              PROGBITS        0000000000401020 001020 000020 10  AX  0   0 16
  [13] .text             PROGBITS        0000000000401040 001040 000161 00  AX  0   0 16
  [14] .fini             PROGBITS        00000000004011a4 0011a4 000009 00  AX  0   0  4
  [15] .rodata           PROGBITS        0000000000402000 002000 000010 00   A  0   0  4
  [16] .eh_frame_hdr     PROGBITS        0000000000402010 002010 00003c 00   A  0   0  4
  [17] .eh_frame         PROGBITS        0000000000402050 002050 000100 00   A  0   0  8
  [18] .init_array       INIT_ARRAY      0000000000403e10 002e10 000008 08  WA  0   0  8
  [19] .fini_array       FINI_ARRAY      0000000000403e18 002e18 000008 08  WA  0   0  8
  [20] .dynamic          DYNAMIC         0000000000403e20 002e20 0001d0 10  WA  6   0  8
  [21] .got              PROGBITS        0000000000403ff0 002ff0 000010 08  WA  0   0  8
  [22] .got.plt          PROGBITS        0000000000404000 003000 000020 08  WA  0   0  8
  [23] .data             PROGBITS        0000000000404020 003020 000010 00  WA  0   0  8
  [24] .bss              NOBITS          0000000000404030 003030 000008 00  WA  0   0  1
  [25] .comment          PROGBITS        0000000000000000 003030 000023 01  MS  0   0  1
  [26] .symtab           SYMTAB          0000000000000000 003058 0005b8 18     27  43  8
  [27] .strtab           STRTAB          0000000000000000 003610 0001c9 00      0   0  1
  [28] .shstrtab         STRTAB          0000000000000000 0037d9 000103 00      0   0  1
Key to Flags:
  W (write), A (alloc), X (execute), M (merge), S (strings), I (info),
  L (link order), O (extra OS processing required), G (group), T (TLS),
  C (compressed), x (unknown), o (OS specific), E (exclude),
  l (large), p (processor specific)

这些节有着自己的含义:

名称

类型

属性

含义

.bss

SHT_NOBITS

SHF_ALLOC SHF_WRITE

包含将出现在程序的内存映像中的为初始化数据。根据定义,当程序开始执行,系统将把这些数据初始化为 0。此节区不占用文件空间。

.data

SHT_PROGBITS

(无)

包含版本控制信息。

.data1

SHT_PROGBITS

SHF_ALLOC SHF_WRITE

这些节区包含初始化了的数据,将出现在程序的内存映像中。

.debug

SHT_PROGBITS

(无)

此节区包含用于符号调试的信息。

.dynamic

SHT_DYNAMIC

此节区包含动态链接信息。节区的属性将包含 SHF_ALLOC 位。是否 SHF_WRITE 位被设置取决于处理器。

.dynstr

SHT_STRTAB

SHF_ALLOC

此节区包含用于动态链接的字符串,大多数情况下这些字符串代表了与符号表项相关的名称。

.dynsym

SHT_DYNSYM

SHF_ALLOC

此节区包含了动态链接符号表。

.fini

SHT_PROGBITS

SHF_ALLOCSHF_EXECINSTR

此节区包含了可执行的指令,是进程终止代码的一部分。程序正常退出时,系统将安排执行这里的代码。

.got

SHT_PROGBITS

此节区包含全局偏移表。

.hash

SHT_HASH

SHF_ALLOC

此节区包含了一个符号哈希表.

.init

SHT_PROGBITS

SHF_ALLOCSHF_EXECINSTR

此节区包含了可执行指令,是进程初始化代码的一部分。当程序开始执行时,系统要在SHF_EXECINSTR 开始调用主程序入口之前(通常指 C 语言的 main 函数)执行这些代码。

.interp

SHT_PROGBITS

此节区包含程序解释器的路径名。如果程序包含一个可加载的段,段中包含此节区,那么节区的属性将包含 SHF_ALLOC 位,否则该位为 0。

.line

SHT_PROGBITS

此节区包含符号调试的行号信息,其中描述了源程序与机器指令之间的对应关系。其内容是未定义的。

.note

SHT_NOTE

此节区中包含注释信息,有独立的格式。

.plt

SHT_PROGBITS

此节区包含过程链接表(procedure linkage table)。

.relname

SHT_REL

这些节区中包含了重定位信息。如果文件中包含可加载的段,段中有重定位内容,节区的属性将包含 SHF_ALLOC 位,否则该位 置 0。传统上 name 根据重定位所适用的节区给定。例如 .text 节区的重定位节区名字将是:.rel.text 或者 .rela.text。

.rela name

SHT_RELA

.rodata

SHT_PROGBITS

SHF_ALLOC

这些节区包含只读数据,这些数据通常参与进程映像的不可写段。

.rodata1

SHT_PROGBITS

SHF_ALLOC

.shstrtab

SHT_STRTAB

此节区包含节区名称。

.strtab

SHT_STRTAB

此节区包含字符串,通常是代表与符号表项相关的名称。如果文件拥有一个可加载的段,段中包含符号串表,节区的属性将包含 SHF_ALLOC 位,否则该位为 0。

.symtab

SHT_SYMTAB

此节区包含一个符号表。如果文件中包含一个可加载的段,并且该段中包含符号表,那么节区的属性中包含SHF_ALLOC 位,否则该位置为 0。

.text

SHT_PROGBITS

此节区包含程序的可执行指令。

定义如下:

class Section {
public:
    enum SectionType {
        SEC_TYPE_NONE = 0,
        SEC_TYPE_CODE = 1,  // 代码段
        SEC_TYPE_DATA = 2   // 数据段
    };

    Section() : binary(NULL),type(SEC_TYPE_NONE),
    vma(0),size(0),bytes(NULL) {}

    // 判断地址是否在节中
    bool contains(uint64_t addr) {
        return (addr >= vma) && (addr-vma < size);
    }

    Binary *binary;
    std::string name;  // 节的名称
    SectionType type;  // 节的类型
    uint64_t size;     // 节的大小
    uint64_t vma;      // 起始地址
    uint64_t *bytes;   // 原始字节
};

其中封装了节的主要属性: 节的名称、类型、起始地址(vma成员)、大小(以字节为单位)及节中包含的原始字节,节的类型由枚举体SectionType的值表示,该值表示该段是代码段还是数据段。

Binary

这个类作为整个二进制文件的抽象,其中包含了二进制文件的文件名、类型、平台架构、位宽、入口点地址、节及符号。

class Binary {
public:
    enum BinaryType // 二进制文件类型
    {
        BIN_TYPE_AUTO = 0,
        BIN_TYPE_ELF = 1, // ELF文件格式
        BIN_TYPE_PE = 2   // PE文件格式
    };
    enum BinaryArch // 平台架构
    {
        ARCH_NONE = 0,
        ARCH_X86 = 1  // x86架构
    };

    Binary() : type(BIN_TYPE_AUTO), arch(ARCH_NONE), bits(0), entry(0) {}

    // 获取.text节
    Section *get_text_section() {
        for (auto &s : sections) {
            if (s.name == ".text") // .text节
                return &s;
        }
        return NULL;
    }

    std::string filename;           // 文件名
    BinaryType type;                // 二进制文件格式
    std::string type_str;           // 字符串表示形式
    BinaryArch arch;                // 平台架构
    std::string arch_str;           // 字符串表示形式
    unsigned bits;                  // 位宽
    uint64_t entry;                 // 入口点地址
    std::vector<Section> sections;  // 存放节
    std::vector<Symbol> symbols;    // 存放符号
};

类中定义了一个get_text_section() 函数,查找并返回.text节的内容。

头文件完整内容

#ifndef LOADER_H
#define LOADER_H

#include <stdint.h>
#include <string>
#include <vector>

class Binary;
class Section;
class Symbol;

class Symbol {
public:
    enum SymbolType
    {
        SYM_TYPE_UKN = 0,
        SYM_TYPE_FUNC = 1,
    };

    Symbol() : type(SYM_TYPE_UKN), name(), addr(0) {}

    SymbolType type;
    std::string name;
    uint64_t addr;
};

class Section {
public:
    enum SectionType {
        SEC_TYPE_NONE = 0,
        SEC_TYPE_CODE = 1,
        SEC_TYPE_DATA = 2
    };

    Section() : binary(NULL),type(SEC_TYPE_NONE),
    vma(0),size(0),bytes(NULL) {}

    bool contains(uint64_t addr) {
        return (addr >= vma) && (addr-vma < size);
    }

    Binary *binary;
    std::string name;
    SectionType type;
    uint64_t size;
    uint64_t vma;
    uint8_t *bytes;
};

class Binary {
public:
    enum BinaryType
    {
        BIN_TYPE_AUTO = 0,
        BIN_TYPE_ELF = 1,
        BIN_TYPE_PE = 2
    };
    enum BinaryArch
    {
        ARCH_NONE = 0,
        ARCH_X86 = 1
    };

    Binary() : type(BIN_TYPE_AUTO), arch(ARCH_NONE), bits(0), entry(0) {}

    Section *get_text_section() {
        for (auto &s : sections) {
            if (s.name == ".text")
                return &s;
        }
        return NULL;
    }

    std::string filename;
    BinaryType type;
    std::string type_str;
    BinaryArch arch;
    std::string arch_str;
    unsigned bits;
    uint64_t entry;
    std::vector<Section> sections;
    std::vector<Symbol> symbols;
};

int load_binary(std::string &fname,Binary *bin,Binary::BinaryType type);
void unload_binary(Binary *bin);

#endif

定义加载器的两个入口函数load_binary和unload_binary函数

int load_binary(std::string &fname,Binary *bin,Binary::BinaryType type)
{
    return load_binary_bfd(fname,bin,type);
}

void unload_binary(Binary *bin)
{
    size_t i;
    Section *sec;

    for(i = 0;i < bin->sections.size();i++) {
        sec = &bin->sections[i];
        if (sec ->bytes) {
            free(sec->bytes);
        }
    }
}

load_binary解析由文件名指定的二进制文件,并将其加载到Binary对象中,其中调用了load_binary_bfd函数,将在后续实现。

unload_binary负责释放资源,实际上就是将Binary中malloc出的内存空间都释放。每个Section对象都要开辟一段空间来保存原始字节,将bytes成员释放掉即可。

如下实现了一个open_bfd函数,使用libbfd通过文件名(fname参数)确定二进制文件的属性,并将其打开,然后返回该二进制文件的句柄。

// 通过文件名 打开二进制文件
static bfd* open_bfd(std::string &fname)
{
    static int bfd_inited = 0;
    bfd *bfd_h;   // 二进制文件句柄

    if (bfd_inited) {
        bfd_init(); // 初始化libbfd的内部状态
        bfd_inited = 1;
    }
    // 以文件名打开文件
    bfd_h = bfd_openr(fname.c_str(),NULL); // 返回句柄指针
    if (!bfd_h) {
        fprintf(stderr,"failed to open binary '%s' (%s)\n",
            fname.c_str(),bfd_errmsg(bfd_get_error()));
        return NULL;
    }
    // 检查二进制文件格式
    if (!bfd_check_format(bfd_h,bfd_object)) {
        fprintf(stderr,"file '%s' does not look like an executable (%s)\n",
            fname.c_str(),bfd_errmsg(bfd_get_error()));
        return NULL;
    }
    bfd_set_error(bfd_error_no_error);

    // 获取文件格式
    if (bfd_get_flavour(bfd_h) == bfd_target_unknown_flavour) {
        fprintf(stderr,"unrecognized format for binary '%s' (%s)\n",
            fname.c_str(),bfd_errmsg(bfd_get_error()));
        return NULL;
    }
    return bfd_h;
}

事先要使用bfd_init函数来初始化内部结构,open_inited标识是否已经初始化。通过调用bfd_openr函数以文件名打开二进制文件,该函数第二个参数指定了文件类型,传入NULL则表示让libbfd自动确定二进制文件类型。

bfd_openr返回一个指向bfd类型的文件句柄指针,这是libbfd的根数据结构,如果打开发生错误,则为NULL。使用bfd_get_error函数得到最近的错误类型,返回bfd_error_type对象,与预定义的错误标识符进行比较。

错误类型定义如下:

typedef enum bfd_error
{
  bfd_error_no_error = 0,
  bfd_error_system_call,
  bfd_error_invalid_target,
  bfd_error_wrong_format,
  bfd_error_wrong_object_format,
  bfd_error_invalid_operation,
  bfd_error_no_memory,
  bfd_error_no_symbols,
  bfd_error_no_armap,
  bfd_error_no_more_archived_files,
  bfd_error_malformed_archive,
  bfd_error_missing_dso,
  bfd_error_file_not_recognized,
  bfd_error_file_ambiguously_recognized,
  bfd_error_no_contents,
  bfd_error_nonrepresentable_section,
  bfd_error_no_debug_section,
  bfd_error_bad_value,
  bfd_error_file_truncated,
  bfd_error_file_too_big,
  bfd_error_on_input,
  bfd_error_invalid_error_code
}

bfd_errmsg可以将错误类型转换为字符串打印到屏幕。

获得文件句柄后,可用bfd_check_format函数检查二进制文件格式。该函数传入bfd句柄和bfd_format值。bfd_format定义如下:

typedef enum bfd_format
{
  bfd_unknown = 0,    /* File format is unknown.  */
  bfd_object,        /* Linker/assembler/compiler output.  */
  bfd_archive,        /* Object archive file.  */
  bfd_core,        /* Core dump.  */
  bfd_type_end        /* Marks the end; don't use it!  */
}

bfd_object对应了可执行文件、可重定位对象和共享库。确定为bfd_object类型的文件后,使用bfd_set_error函数手动设置错误状态,是为了某些版本的libbfd会在检测格式前设置bfd_error_wrong_format错误。

最后通过bfd_get_flavour函数检查二进制文件是否有已知的flavour,其实表示二进制文件的格式。

enum bfd_flavour
{
  /* N.B. Update bfd_flavour_name if you change this.  */
  bfd_target_unknown_flavour,
  bfd_target_aout_flavour,
  bfd_target_coff_flavour,
  bfd_target_ecoff_flavour,
  bfd_target_xcoff_flavour,
  bfd_target_elf_flavour,
  bfd_target_tekhex_flavour,
  bfd_target_srec_flavour,
  bfd_target_verilog_flavour,
  bfd_target_ihex_flavour,
  bfd_target_som_flavour,
  bfd_target_os9k_flavour,
  bfd_target_versados_flavour,
  bfd_target_msdos_flavour,
  bfd_target_ovax_flavour,
  bfd_target_evax_flavour,
  bfd_target_mmo_flavour,
  bfd_target_mach_o_flavour,
  bfd_target_pef_flavour,
  bfd_target_pef_xlib_flavour,
  bfd_target_sym_flavour
};

bfd_target_coff_flavour便是微软的PE文件格式,bfd_target_elf_flavour是ELF文件格式,如果二进制格式未知,就返回bfd_target_unknown_flavour。

通过上述流程,可以打开一个有效的二进制文件

将二进制文件中的一些重要属性加载到Binary中

// 解析基本属性
static int load_binary_bfd(std::string &fname,Binary *bin,Binary::BinaryType type)
{
    int ret;
    bfd *bfd_h;
    const bfd_arch_info_type *bfd_info;

    bfd_h = NULL;
    bfd_h = open_bfd(fname); // 打开二进制文件
    if (!bfd_h) {
        goto fail; // 跳转
    }
    // bin指向Binary对象
    bin->filename = std::string(fname); // 文件名
    bin->entry = bfd_get_start_address(bfd_h); // 入口点地址
    bin->type_str = std::string(bfd_h->xvec->name); // 目标类型名称

    // 判断二进制文件格式
    switch(bfd_h->xvec->flavour) {
        case bfd_target_elf_flavour: // ELF文件格式
            bin->type = Binary::BIN_TYPE_ELF;
            break;
        case bfd_target_coff_flavour: // PE文件格式
            bin->type = Binary::BIN_TYPE_PE;
            break;
        case bfd_target_unknown_flavour: // 未知文件格式
        default:  // 仅支持PE或ELF
            fprintf(stderr,"unsupported binary (%s)\n",bfd_h->xvec->name);
            goto fail;
    }

    bfd_info = bfd_get_arch_info(bfd_h); // 获取平台架构信息
    bin->arch_str = std::string(bfd_info->printable_name);
    switch(bfd_info->mach) {
        case bfd_mach_i386_i386: // x86架构
            bin->arch = Binary::ARCH_X86;
            bin->bits = 32; // 设置位宽为32
            break;
        case bfd_mach_x86_64:    // x86_64架构
            bin->arch = Binary::ARCH_X86;
            bin->bits = 64; // 设置位宽为64
            break;
        default:
            fprintf(stderr,"unsupported architecture (%s)\n",
                bfd_info->printable_name);
            goto fail;
    }

    load_symbols_bfd(bfd_h,bin); // 加载静态符号
    load_dynsym_bfd(bfd_h,bin);  // 加载动态符号

    if (load_sections_bfd(bfd_h,bin)<0)  // 加载节
        goto fail;

    ret = 0;
    goto cleanup;

    fail: // 执行失败
        ret = -1;
    cleanup: // 清理
        if (bfd_h) bfd_close(bfd_h);

    return ret;
}

load_binary_bfd函数中,会首先使用open_bfd函数打开fname参数指定的二进制文件,并获得该二进制文件的bfd句柄。bin是Binary指针,是二进制文件的抽象。获取一些基本信息,对该对象进行赋值。用bfd_get_start_address来获取入口点地址,即返回了bfd对象中start_address字段的值。此处bfd_h中的xvec实际上指向一个bfd_target结构,其中就包含了二进制文件的各种信息:

typedef struct bfd_target
{
  /* Identifies the kind of target, e.g., SunOS4, Ultrix, etc.  */
  char *name;

 /* The "flavour" of a back end is a general indication about
    the contents of a file.  */
  enum bfd_flavour flavour;

  /* The order of bytes within the data area of a file.  */
  enum bfd_endian byteorder;

 /* The order of bytes within the header parts of a file.  */
  enum bfd_endian header_byteorder;

  /* A mask of all the flags which an executable may have set -
     from the set <<BFD_NO_FLAGS>>, <<HAS_RELOC>>, ...<<D_PAGED>>.  */
  flagword object_flags;

 /* A mask of all the flags which a section may have set - from
    the set <<SEC_NO_FLAGS>>, <<SEC_ALLOC>>, ...<<SET_NEVER_LOAD>>.  */
  flagword section_flags;

 /* The character normally found at the front of a symbol.
    (if any), perhaps `_'.  */
  char symbol_leading_char;

 /* The pad character for file names within an archive header.  */
  char ar_pad_char;

  /* The maximum number of characters in an archive header.  */
  unsigned char ar_max_namelen;

  /* How well this target matches, used to select between various
     possible targets when more than one target matches.  */
  unsigned char match_priority;

  /* Entries for byte swapping for data. These are different from the
     other entry points, since they don't take a BFD as the first argument.
     Certain other handlers could do the same.  */
  bfd_uint64_t   (*bfd_getx64) (const void *);
  bfd_int64_t    (*bfd_getx_signed_64) (const void *);
  void           (*bfd_putx64) (bfd_uint64_t, void *);
  bfd_vma        (*bfd_getx32) (const void *);
  bfd_signed_vma (*bfd_getx_signed_32) (const void *);
  void           (*bfd_putx32) (bfd_vma, void *);
  bfd_vma        (*bfd_getx16) (const void *);
  bfd_signed_vma (*bfd_getx_signed_16) (const void *);
  void           (*bfd_putx16) (bfd_vma, void *);

  /* Byte swapping for the headers.  */
  bfd_uint64_t   (*bfd_h_getx64) (const void *);
  bfd_int64_t    (*bfd_h_getx_signed_64) (const void *);
  void           (*bfd_h_putx64) (bfd_uint64_t, void *);
  bfd_vma        (*bfd_h_getx32) (const void *);
  bfd_signed_vma (*bfd_h_getx_signed_32) (const void *);
  void           (*bfd_h_putx32) (bfd_vma, void *);
  bfd_vma        (*bfd_h_getx16) (const void *);
  bfd_signed_vma (*bfd_h_getx_signed_16) (const void *);
  void           (*bfd_h_putx16) (bfd_vma, void *);
  ......
}

使用bfd_get_arch_info获取到平台架构信息,返回一个结构体:

typedef struct bfd_arch_info
{
  int bits_per_word;
  int bits_per_address;
  int bits_per_byte;
  enum bfd_architecture arch;
  unsigned long mach;
  const char *arch_name;
  const char *printable_name;
  unsigned int section_align_power;
  /* TRUE if this is the default machine for the architecture.
     The default arch should be the first entry for an arch so that
     all the entries for that arch can be accessed via <<next>>.  */
  bfd_boolean the_default;
  const struct bfd_arch_info * (*compatible) (const struct bfd_arch_info *,
                                              const struct bfd_arch_info *);

  bfd_boolean (*scan) (const struct bfd_arch_info *, const char *);

  /* Allocate via bfd_malloc and return a fill buffer of size COUNT.  If
     IS_BIGENDIAN is TRUE, the order of bytes is big endian.  If CODE is
     TRUE, the buffer contains code.  */
  void *(*fill) (bfd_size_type count, bfd_boolean is_bigendian,
                 bfd_boolean code);

  const struct bfd_arch_info *next;
}
bfd_arch_info_type;

该结构中有很多数据,其中的mach字段标识了平台架构,如果该字段为bfd_mach_i386_i386,说明它是一个32位x86架构的二进制文件,则在Binary设置相应的字段,如果mach为bfd_mach_x86_64,说明它是一个64位x86架构下的二进制文件,则在Binary中设置字段。

加载二进制文件的静态符号表

// 加载二进制符号
static int load_symbols_bfd(bfd *bfd_h, Binary *bin)
{
    int ret;
    long n, nsyms, i;
    asymbol **bfd_symtab; // 符号表
    Symbol *sym;

    bfd_symtab = NULL;
    n = bfd_get_symtab_upper_bound(bfd_h); // 获取字节数
    if (n < 0) {
        fprintf(stderr, "failed to read symtab (%s)\n",
                bfd_errmsg(bfd_get_error()));
        goto fail;
    } else if (n) {
        bfd_symtab = (asymbol **)malloc(n); // 分配空间
        if (!bfd_symtab) {
            fprintf(stderr, "out of memory\n");
            goto fail;
        }
        // 向符号表填充字节
        nsyms = bfd_canonicalize_symtab(bfd_h, bfd_symtab);
        if (nsyms < 0) {
            fprintf(stderr, "failed to read dynamic symtab (%s)\n",
                    bfd_errmsg(bfd_get_error()));
            goto fail;
        }
        for (i = 0; i < nsyms; i++) {
            // 判断是否为函数符号
            if (bfd_symtab[i]->flags & BSF_FUNCTION) {
                bin->symbols.push_back(Symbol());  // 存放符号
                sym = &bin->symbols.back();
                sym->type = Symbol::SYM_TYPE_FUNC; // 标记为函数符号
                sym->name = std::string(bfd_symtab[i]->name); // 符号名称
                sym->addr = bfd_asymbol_value(bfd_symtab[i]); // 获取起始地址
            }
        }
    }

    ret = 0;
    goto cleanup;

    fail:
        ret = -1;
    cleanup:
        if (bfd_symtab) free(bfd_symtab);
    return ret;
}

在该程序中,解析得到的符号由asymbol结构表示,其定义如下:

typedef struct bfd_symbol
{
  /* A pointer to the BFD which owns the symbol. This information
     is necessary so that a back end can work out what additional
     information (invisible to the application writer) is carried
     with the symbol.

     This field is *almost* redundant, since you can use section->owner
     instead, except that some symbols point to the global sections
     bfd_{abs,com,und}_section.  This could be fixed by making
     these globals be per-bfd (or per-target-flavor).  FIXME.  */
  struct bfd *the_bfd; /* Use bfd_asymbol_bfd(sym) to access this field.  */

  /* The text of the symbol. The name is left alone, and not copied; the
     application may not alter it.  */
  const char *name;

  /* The value of the symbol.  This really should be a union of a
     numeric value with a pointer, since some flags indicate that
     a pointer to another symbol is stored here.  */
  symvalue value;
  .....
  flagword flags;

  /* A pointer to the section to which this symbol is
     relative.  This will always be non NULL, there are special
     sections for undefined and absolute symbols.  */
  struct bfd_section *section;

  /* Back end special data.  */
  union
    {
      void *p;
      bfd_vma i;
    }
  udata;
} asymbol;

将符号表定义成一个二维数组,所以类型是asymbol二级指针,load_symbols_bfd负责填充asymbol指针数组,然后将信息复制到Binary对象中。

load_symbols_bfd的输入参数是bfd句柄和用于存储符号信息的Binary对象,因此要开辟内存空间来存放符号指针。bfd_get_symtab_upper_bound函数会返回要分配的字节数,使用malloc函数来为代表符号表的二级指针分配内存空间,可以用bfd_canonicalize_symtab来填充符号表,将bfd句柄和要填充的符号表(asymbol**)作为参数,返回符号个数,如果返回的是负数则说明发生了错误。

此处只对函数符号感兴趣,通过检查BSF_FUNCTION标志来判断符号是否为一个函数符号,如果是则放入Binary对象中symbols中,并获取其起始地址,赋值给对应的Symbol对象。将数据加载到Symbol对象中后就释放原来为符号表申请的空间。

从动态符号表中加载符号

// 加载动态符号
static int load_dynsym_bfd(bfd* bfd_h,Binary *bin)
{
    int ret;
    long n,nsyms,i;
    asymbol **bfd_dynsym;
    Symbol *sym;

    bfd_dynsym = NULL;

    n = bfd_get_dynamic_symtab_upper_bound(bfd_h); // 获取字节数
    if (n < 0) {
        fprintf(stderr,"failed to read dynamic symtab (%s)\n",
            bfd_errmsg(bfd_get_error()));
        goto fail;
    } else if(n) {
        bfd_dynsym = (asymbol**)malloc(n); // 分配空间
        if (!bfd_dynsym) {
            fprintf(stderr,"out of memory\n");
            goto fail;
        }
        // 填充符号表
        nsyms = bfd_canonicalize_dynamic_symtab(bfd_h,bfd_dynsym);
        if (nsyms < 0) {
            fprintf(stderr,"failed to read dynmaic symtab (%s)\n",
                bfd_errmsg(bfd_get_error()));
            goto fail;
        }
        for (i=0;i<nsyms;i++) {
            if (bfd_dynsym[i]->flags & BSF_FUNCTION) {
                bin->symbols.push_back(Symbol());
                sym = &bin->symbols.back();
                sym->type = Symbol::SYM_TYPE_FUNC;
                sym->name = std::string(bfd_dynsym[i]->name);
                sym->addr = bfd_asymbol_value(bfd_dynsym[i]); // 获取起始地址
            }
        }
    }

    ret = 0;
    goto cleanup;

    fail:
        ret = -1;
    cleanup:
        if (bfd_dynsym)
            free(bfd_dynsym);

    return ret;
}

与加载静态符号的流程相似,首先获取要分配的字节数,然后使用malloc进行内存空间的分配。使用bfd_canonicalize_dynamic_symtab来填充符号表,返回符号数。

在for循环中遍历符号表,判断是否为函数符号,如果是则存入Binary对象中

加载二进制文件的节

// 加载节
static int load_sections_bfd(bfd* bfd_h,Binary *bin)
{
    int bfd_flags;
    uint64_t vma,size;
    const char* secname; // 节名称
    asection *bfd_sec;
    Section *sec;
    Section::SectionType sectype;

    for (bfd_sec = bfd_h->sections;bfd_sec;bfd_sec=bfd_sec->next) {
        bfd_flags = bfd_get_section_flags(bfd_h,bfd_sec);

        // 判断代码段或数据段
        sectype = Section::SEC_TYPE_NONE;
        if (bfd_flags & SEC_CODE) {
            sectype = Section::SEC_TYPE_CODE;
        } else if (bfd_flags & SEC_DATA) {
            sectype = Section::SEC_TYPE_DATA;
        } else {
            continue;
        }
        vma = bfd_section_vma(bfd_h,bfd_sec);      // 获取节的虚拟基址
        size = bfd_section_size(bfd_h,bfd_sec);    // 获取节大小
        secname = bfd_section_name(bfd_h,bfd_sec); // 获取节的名称
        if (!secname) // 节无名称
            secname = "<unnamed>";
        bin->sections.push_back(Section());  // 将节存入Binary对象
        sec = &bin->sections.back();
        // 将节的数据复制进Section对象
        sec->binary = bin;
        sec->name = std::string(secname);
        sec->type = sectype;
        sec->vma = vma;
        sec->size = size;
        sec->bytes = (uint8_t*)malloc(size);
        if (!sec->bytes) {
            fprintf(stderr,"out of memory\n");
            return -1;
        }

        if (!bfd_get_section_contents(bfd_h,bfd_sec,sec->bytes,0,size)) {
            fprintf(stderr,"failed to read section '%s' (%s)\n",
                secname,bfd_errmsg(bfd_get_error()));
            return -1;
        }
    }
    return 0;
}

libbfd使用一个名为asection的数据结构来保存节信息,也称为bfd_section结构。在内部,libbfd通过asection链表表示所有的节。遍历这个链表,先使用bfd_get_section_flags函数来获取标志位,通过检查这个标志位,来判断节的类型。

接下来要获取节的虚拟地址、大小、名称及其原始字节数。使用bfd_section_vma函数来获取节的虚拟基址,bfd_section_size函数返回节的大小,bfd_section_name函数返回节的名称,如果节没有名称,则返回NULL。Section对象中的bytes存储节的原始字节,使用malloc为Section对象的bytes指针开辟空间。bfd_get_section_contents函数将节的所有原始字节数据复制到Section对象的bytes中,如果成功则返回true,失败则返回false。

作为程序的入口,所具有的功能是接收用户的传参,如果命令行参数小于2,则有误。如果等于2,也就是只指定了文件名,就输出符号表和节信息。同时可以在后续的参数中指定节名称,以16进制显示节的字节数据。

int main(int argc,char* argv[])
{
    size_t i;
    Binary bin;
    Section *sec;
    Symbol *sym;
    std::string fname;

    if (argc < 2) {
        printf("Usage: %s <binary>\n",argv[0]);
        return 1;
    }

    fname.assign(argv[1]);

    if (load_binary(fname,&bin,Binary::BIN_TYPE_AUTO) < 0) {
        return 1;
    }

    if (argc == 2) {
        printf("loaded binary '%s' %s/%s (%u bits) entry@0x%016jx\n",
            bin.filename.c_str(),bin.type_str.c_str(),
            bin.arch_str.c_str(),bin.bits,bin.entry
        );

        for (i=0;i<bin.sections.size();i++) {
            sec = &bin.sections[i];
            printf(" 0x%016jx %-8ju %-20s %s\n",
                sec->vma,sec->size,sec->name.c_str(),
                sec->type == Section::SEC_TYPE_CODE ? "CODE":"DATA"
            );
        }

        if (bin.symbols.size() > 0) {
            printf("scanned symbol tables\n");
            for (i=0;i<bin.symbols.size();i++) {
                sym = &bin.symbols[i];
                printf(" %-40s 0x%016jx %s\n",
                    sym->name.c_str(),sym->addr,
                    (sym->type & Symbol::SYM_TYPE_FUNC) ? "FUNC":""
                );
            }
        }
    } else {
        for (int n=2;n<argc;n++) {
            std::string secname;
            secname.assign(argv[n]);
            for (i=0;i<bin.sections.size();i++) {
                sec = &bin.sections[i];
                if (sec->name == secname) {
                    if (sec->size < 0)
                        break;
                    printf("the content of %s",secname.c_str());
                    for (size_t j=0;j<sec->size;j++) {
                        if (j % 16 == 0)
                            printf("\n");
                        printf("%02jx",sec->bytes[j]);
                        if (j % 2 == 1)
                            printf(" ");
                    }
                    printf("\n");
                }
            }
        }
    }
    unload_binary(&bin);

    return 0;
}

最后调用unload_binary做收尾工作

#include <bfd.h>
#include "loader.h"

// 通过文件名 打开二进制文件
static bfd *open_bfd(std::string &fname)
{
    static int bfd_inited = 0;
    bfd *bfd_h;

    if (bfd_inited) {
        bfd_init();
        bfd_inited = 1;
    }

    bfd_h = bfd_openr(fname.c_str(), NULL);
    if (!bfd_h) {
        fprintf(stderr, "failed to open binary '%s' (%s)\n",
                fname.c_str(), bfd_errmsg(bfd_get_error()));
        return NULL;
    }

    if (!bfd_check_format(bfd_h, bfd_object)) {
        fprintf(stderr, "file '%s' does not look like an executable (%s)\n",
                fname.c_str(), bfd_errmsg(bfd_get_error()));
        return NULL;
    }
    bfd_set_error(bfd_error_no_error);

    if (bfd_get_flavour(bfd_h) == bfd_target_unknown_flavour) {
        fprintf(stderr, "unrecognized format for binary '%s' (%s)\n",
                fname.c_str(), bfd_errmsg(bfd_get_error()));
        return NULL;
    }

    return bfd_h;
}

// 加载静态符号
static int load_symbols_bfd(bfd *bfd_h, Binary *bin)
{
    int ret;
    long n, nsyms, i;
    asymbol **bfd_symtab;
    Symbol *sym;

    bfd_symtab = NULL;
    n = bfd_get_symtab_upper_bound(bfd_h);
    if (n < 0) {
        fprintf(stderr, "failed to read symtab (%s)\n",
                bfd_errmsg(bfd_get_error()));
        goto fail;
    } else if (n) {
        bfd_symtab = (asymbol **)malloc(n);
        if (!bfd_symtab) {
            fprintf(stderr, "out of memory\n");
            goto fail;
        }
        nsyms = bfd_canonicalize_symtab(bfd_h, bfd_symtab);
        if (nsyms < 0) {
            fprintf(stderr, "failed to read dynamic symtab (%s)\n",
                    bfd_errmsg(bfd_get_error()));
            goto fail;
        }
        for (i = 0; i < nsyms; i++) {
            if (bfd_symtab[i]->flags & BSF_FUNCTION) {
                bin->symbols.push_back(Symbol());
                sym = &bin->symbols.back();
                sym->type = Symbol::SYM_TYPE_FUNC;
                sym->name = std::string(bfd_symtab[i]->name);
                sym->addr = bfd_asymbol_value(bfd_symtab[i]);
            }
        }
    }
    ret = 0;
    goto cleanup;

    fail:
        ret = -1;
    cleanup:
        if (bfd_symtab) free(bfd_symtab);

    return ret;
}

// 加载动态符号
static int load_dynsym_bfd(bfd* bfd_h,Binary *bin)
{
    int ret;
    long n,nsyms,i;
    asymbol **bfd_dynsym;
    Symbol *sym;

    bfd_dynsym = NULL;

    n = bfd_get_dynamic_symtab_upper_bound(bfd_h);
    if (n < 0) {
        fprintf(stderr,"failed to read dynamic symtab (%s)\n",
            bfd_errmsg(bfd_get_error()));
        goto fail;
    } else if(n) {
        bfd_dynsym = (asymbol**)malloc(n);
        if (!bfd_dynsym) {
            fprintf(stderr,"out of memory\n");
            goto fail;
        }
        nsyms = bfd_canonicalize_dynamic_symtab(bfd_h,bfd_dynsym);
        if (nsyms < 0) {
            fprintf(stderr,"failed to read dynmaic symtab (%s)\n",
                bfd_errmsg(bfd_get_error()));
            goto fail;
        }
        for (i=0;i<nsyms;i++) {
            if (bfd_dynsym[i]->flags & BSF_FUNCTION) {
                bin->symbols.push_back(Symbol());
                sym = &bin->symbols.back();
                sym->type = Symbol::SYM_TYPE_FUNC;
                sym->name = std::string(bfd_dynsym[i]->name);
                sym->addr = bfd_asymbol_value(bfd_dynsym[i]);
            }
        }
    }

    ret = 0;
    goto cleanup;

    fail:
        ret = -1;
    cleanup:
        if (bfd_dynsym)
            free(bfd_dynsym);

    return ret;
}

static int load_sections_bfd(bfd* bfd_h,Binary *bin)
{
    int bfd_flags;
    uint64_t vma,size;
    const char* secname;
    asection *bfd_sec;
    Section *sec;
    Section::SectionType sectype;

    for (bfd_sec = bfd_h->sections;bfd_sec;bfd_sec = bfd_sec->next) {
        bfd_flags = bfd_get_section_flags(bfd_h,bfd_sec);

        sectype = Section::SEC_TYPE_NONE;
        if (bfd_flags & SEC_CODE) {
            sectype = Section::SEC_TYPE_CODE;
        } else if (bfd_flags & SEC_DATA) {
            sectype = Section::SEC_TYPE_DATA;
        } else {
            continue;
        }
        vma = bfd_section_vma(bfd_h,bfd_sec);
        size = bfd_section_size(bfd_h,bfd_sec);
        secname = bfd_section_name(bfd_h,bfd_sec);
        if (!secname)
            secname = "<unnamed>";
        bin->sections.push_back(Section());
        sec = &bin->sections.back();

        sec->binary = bin;
        sec->name = std::string(secname);
        sec->type = sectype;
        sec->vma = vma;
        sec->size = size;
        sec->bytes = (uint8_t*)malloc(size);
        if (!sec->bytes) {
            fprintf(stderr,"out of memory\n");
            return -1;
        }

        if (!bfd_get_section_contents(bfd_h,bfd_sec,sec->bytes,0,size)) {
            fprintf(stderr,"failed to read section '%s' (%s)\n",
            secname,bfd_errmsg(bfd_get_error()));
            return -1;
        }
    }

    return 0;
}

// 解析基本属性
static int load_binary_bfd(std::string &fname, Binary *bin, Binary::BinaryType type)
{
    int ret;
    bfd *bfd_h;
    const bfd_arch_info_type *bfd_info;

    bfd_h = NULL;
    bfd_h = open_bfd(fname);
    if (!bfd_h)
    {
        goto fail;
    }

    bin->filename = std::string(fname);
    bin->entry = bfd_get_start_address(bfd_h);
    bin->type_str = std::string(bfd_h->xvec->name);

    switch (bfd_h->xvec->flavour)
    {
    case bfd_target_elf_flavour:
        bin->type = Binary::BIN_TYPE_ELF;
        break;
    case bfd_target_coff_flavour:
        bin->type = Binary::BIN_TYPE_PE;
        break;
    case bfd_target_unknown_flavour:
    default:
        fprintf(stderr, "unsupported binary (%s)\n", bfd_h->xvec->name);
        goto fail;
    }

    bfd_info = bfd_get_arch_info(bfd_h);
    bin->arch_str = std::string(bfd_info->printable_name);
    switch (bfd_info->mach)
    {
    case bfd_mach_i386_i386:
        bin->arch = Binary::ARCH_X86;
        bin->bits = 32;
        break;
    case bfd_mach_x86_64:
        bin->arch = Binary::ARCH_X86;
        bin->bits = 64;
        break;
    default:
        fprintf(stderr, "unsupported architecture (%s)\n",
                bfd_info->printable_name);
        goto fail;
    }

    load_symbols_bfd(bfd_h, bin);
    load_dynsym_bfd(bfd_h, bin);

    if (load_sections_bfd(bfd_h, bin) < 0)
        goto fail;

    ret = 0;
    goto cleanup;

    fail:
        ret = -1;
    cleanup:
        if (bfd_h) bfd_close(bfd_h);

    return ret;
}

int load_binary(std::string &fname, Binary *bin, Binary::BinaryType type)
{
    return load_binary_bfd(fname, bin, type);
}

void unload_binary(Binary *bin)
{
    size_t i;
    Section *sec;

    for (i = 0; i < bin->sections.size(); i++)
    {
        sec = &bin->sections[i];
        if (sec->bytes) {
            free(sec->bytes);
        }
    }
}

int main(int argc,char* argv[])
{
    size_t i;
    Binary bin;
    Section *sec;
    Symbol *sym;
    std::string fname;

    if (argc < 2) {
        printf("Usage: %s <binary>\n",argv[0]);
        return 1;
    }

    fname.assign(argv[1]);

    if (load_binary(fname,&bin,Binary::BIN_TYPE_AUTO) < 0) {
        return 1;
    }

    if (argc == 2) {
        printf("loaded binary '%s' %s/%s (%u bits) entry@0x%016jx\n",
            bin.filename.c_str(),bin.type_str.c_str(),
            bin.arch_str.c_str(),bin.bits,bin.entry
        );

        for (i=0;i<bin.sections.size();i++) {
            sec = &bin.sections[i];
            printf(" 0x%016jx %-8ju %-20s %s\n",
                sec->vma,sec->size,sec->name.c_str(),
                sec->type == Section::SEC_TYPE_CODE ? "CODE":"DATA"
            );
        }

        if (bin.symbols.size() > 0) {
            printf("scanned symbol tables\n");
            for (i=0;i<bin.symbols.size();i++) {
                sym = &bin.symbols[i];
                printf(" %-40s 0x%016jx %s\n",
                    sym->name.c_str(),sym->addr,
                    (sym->type & Symbol::SYM_TYPE_FUNC) ? "FUNC":""
                );
            }
        }
    } else {
        for (int n=2;n<argc;n++) {
            std::string secname;
            secname.assign(argv[n]);
            for (i=0;i<bin.sections.size();i++) {
                sec = &bin.sections[i];
                if (sec->name == secname) {
                    if (sec->size < 0)
                        break;
                    printf("the content of %s",secname.c_str());
                    for (size_t j=0;j<sec->size;j++) {
                        if (j % 16 == 0)
                            printf("\n");
                        printf("%02jx",sec->bytes[j]);
                        if (j % 2 == 1)
                            printf(" ");
                    }
                    printf("\n");
                }
            }
        }
    }
    unload_binary(&bin);

    return 0;
}

在当前文件夹中编译:g++ -I ./ loader.cpp -o loader -lbfd

运行:

$ ./loader /bin/ls
loaded binary '/bin/ls' elf64-x86-64/i386:x86-64 (64 bits) entry@0x0000000000006130
 0x00000000000002a8 28       .interp              DATA
 0x00000000000002c4 32       .note.ABI-tag        DATA
 0x00000000000002e4 36       .note.gnu.build-id   DATA
 0x0000000000000308 184      .gnu.hash            DATA
 0x00000000000003c0 3168     .dynsym              DATA
 0x0000000000001020 1484     .dynstr              DATA
 0x00000000000015ec 264      .gnu.version         DATA
 0x00000000000016f8 112      .gnu.version_r       DATA
 0x0000000000001768 4944     .rela.dyn            DATA
 0x0000000000002ab8 2544     .rela.plt            DATA
 0x0000000000004000 23       .init                CODE
 0x0000000000004020 1712     .plt                 CODE
 0x00000000000046d0 24       .plt.got             CODE
 0x00000000000046f0 75086    .text                CODE
 0x0000000000016c40 9        .fini                CODE
 0x0000000000017000 20777    .rodata              DATA
 0x000000000001c12c 2300     .eh_frame_hdr        DATA
 0x000000000001ca28 12008    .eh_frame            DATA
 0x0000000000021390 8        .init_array          DATA
 0x0000000000021398 8        .fini_array          DATA
 0x00000000000213a0 2616     .data.rel.ro         DATA
 0x0000000000021dd8 496      .dynamic             DATA
 0x0000000000021fc8 56       .got                 DATA
 0x0000000000022000 872      .got.plt             DATA
 0x0000000000022380 616      .data                DATA
scanned symbol tables
.....
 _obstack_memory_used                     0x00000000000163a0 FUNC
 _obstack_begin                           0x00000000000161b0 FUNC
 _obstack_free                            0x0000000000016330 FUNC
 _obstack_allocated_p                     0x00000000000162f0 FUNC
 _obstack_begin_1                         0x00000000000161d0 FUNC
 __cxa_finalize                           0x0000000000000000 FUNC
 _obstack_newchunk                        0x00000000000161f0 FUNC
 malloc                                   0x0000000000000000 FUNC


$ ./loader /bin/sh .rodata .data
the content of .rodata
0100 0200 756e 616c 6961 7300 2573 3a20
2573 206e 6f74 2066 6f75 6e64 0a00 2573
3d25 730a 0000 0000 6172 6974 686d 6574
6963 2065 7870 7265 7373 696f 6e3a 2025
733a 2022 2573 2200 6469 7669 7369 6f6e
2062 7920 7a65 726f 0065 7870 6563 7469
6e67 2027 3a27 0065 7870 6563 7469 6e67
2027 2927 0065 7870 6563 7469 6e67 2070
7269 6d61 7279 0065 7870 6563 7469 6e67
2045 4f46 0000 0000 0000 0000 0000 0000
b0ec feff c0ec feff d0ec feff e0ec feff
f0ec feff 60ed feff 00ed feff 08ed feff
.....

手机扫一扫

移动阅读更方便

阿里云服务器
腾讯云服务器
七牛云服务器

你可能感兴趣的文章