为什么gdb通过0地址显示偏移会提示地址错误
阅读原文时间:2023年07月08日阅读:2

现象

在gdb中,如果想看一个struct的某个field的偏移量,和C语言一样可以通过对一个0地址变量取地址,然后取成员的偏移量获得。更神奇的地方在于和C语言一样,这里也不会触发内存访问异常。

另外还有一个奇怪的现象:对于x取地址的时候没有问题,但是对于c字段取地址之后会有一个莫名其妙的错误提示""。

tsecer@harry: cat -n main.cpp
     1  struct tsecer
     2  {
     3      double x;
     4      char c;
     5      double d;
     6  };
     7
     8  int main()
     9  {
    10      tsecer x;
    11      return x.c;
    12  }
tsecer@harry: gcc -g main.cpp
tsecer@harry: gdb -quiet ./a.out
Reading symbols from ./a.out...
(gdb) p &((tsecer*)0)->x
$1 = (double *) 0x0
(gdb) p &((tsecer*)0)->c
$2 = 0x8 <error: Cannot access memory at address 0x8>
(gdb)

lazy evaluate

在取一个struct的特定域(field)时,gdb其实只是计算了它的相对地址(offset)而没有真正取这个field的绝对地址。这也意味着,一个变量是由两部分(基地址+offset)而不是一部分(绝对地址)组成。

在前面的例子中,x的地址其实是通过0作为基地址加上0作为offset两部分表示。

下面是计算一个结构field值(value)的函数。可以看到,它主要是计算了offset偏移量而不是计算了绝对地址。

/// @file: gdb-10.1\gdb\value.c
/* Given a value ARG1 (offset by OFFSET bytes)
   of a struct or union type ARG_TYPE,
   extract and return the value of one of its (non-static) fields.
   FIELDNO says which field.  */

struct value *
value_primitive_field (struct value *arg1, LONGEST offset,
               int fieldno, struct type *arg_type)
{
///....
  else
    {
      /* Plain old data member */
      offset += (TYPE_FIELD_BITPOS (arg_type, fieldno)
             / (HOST_CHAR_BIT * unit_size));

      /* Lazy register values with offsets are not supported.  */
      if (VALUE_LVAL (arg1) == lval_register && value_lazy (arg1))
    value_fetch_lazy (arg1);

      if (value_lazy (arg1))
    v = allocate_value_lazy (type);
      else
    {
      v = allocate_value (type);
      value_contents_copy_raw (v, value_embedded_offset (v),
                   arg1, value_embedded_offset (arg1) + offset,
                   type_length_units (type));
    }
      v->offset = (value_offset (arg1) + offset
           + value_embedded_offset (arg1));
    }
  set_value_component_location (v, arg1);
  return v;
}

这也意味着一个value的内存存储结构相对单独存储一个绝度地址会稍微复杂一些。下面的value结构中存储了基地址address和偏移量offset两个字段。

/// @file:
/* Note that the fields in this structure are arranged to save a bit
   of memory.  */

struct value
{
///...
  /* Location of value (if lval).  */
  union
  {
    /* If lval == lval_memory, this is the address in the inferior  */
    CORE_ADDR address;

    /*If lval == lval_register, the value is from a register.  */
    struct
    {
      /* Register number.  */
      int regnum;
      /* Frame ID of "next" frame to which a register value is relative.
     If the register value is found relative to frame F, then the
     frame id of F->next will be stored in next_frame_id.  */
      struct frame_id next_frame_id;
    } reg;

    /* Pointer to internal variable.  */
    struct internalvar *internalvar;

    /* Pointer to xmethod worker.  */
    struct xmethod_worker *xm_worker;

    /* If lval == lval_computed, this is a set of function pointers
       to use to access and describe the value, and a closure pointer
       for them to use.  */
    struct
    {
      /* Functions to call.  */
      const struct lval_funcs *funcs;

      /* Closure for those functions to use.  */
      void *closure;
    } computed;
  } location {};

  /* Describes offset of a value within lval of a structure in target
     addressable memory units.  Note also the member embedded_offset
     below.  */
  LONGEST offset = 0;
///...

取地址

对于&这种操作,通过value_address取到值的内存地址即可,没必要真正访问内容。再强调一遍,只是计算内存地址

下面代码是返回“基地址+offset”获得地址。

CORE_ADDR
value_address (const struct value *value)
{
  if (value->lval != lval_memory)
    return 0;
  if (value->parent != NULL)
    return value_address (value->parent.get ()) + value->offset;
  if (NULL != TYPE_DATA_LOCATION (value_type (value)))
    {
      gdb_assert (PROP_CONST == TYPE_DATA_LOCATION_KIND (value_type (value)));
      return TYPE_DATA_LOCATION_ADDR (value_type (value));
    }

  return value->location.address + value->offset;
}

为什么有错误提示

正如注释

/* For a pointer to a textual type, also print the string

pointed to, unless pointer is null. */

所说,对于一个指向文本(字符)结构的指针,除了输出地址之外,还会输出指向的字符串内容(除非指针为0),此时就会真正访问内存地址。

而对于通常的double指针就不会,所以double取地址就不会报错。

/// @file: gdb-10.1\gdb\c-valprint.c
/* Print a pointer based on the type of its target.

   Arguments to this functions are roughly the same as those in c_val_print.
   A difference is that ADDRESS is the address to print, with embedded_offset
   already added.  UNRESOLVED_ELTTYPE and ELTTYPE represent the pointed type,
   respectively before and after check_typedef.  */

static void
print_unpacked_pointer (struct type *type, struct type *elttype,
            struct type *unresolved_elttype,
            const gdb_byte *valaddr, int embedded_offset,
            CORE_ADDR address, struct ui_file *stream, int recurse,
            const struct value_print_options *options)
{
  int want_space = 0;
  struct gdbarch *gdbarch = get_type_arch (type);

  if (elttype->code () == TYPE_CODE_FUNC)
    {
      /* Try to print what function it points to.  */
      print_function_pointer_address (options, gdbarch, address, stream);
      return;
    }

  if (options->symbol_print)
    want_space = print_address_demangle (options, gdbarch, address, stream,
                     demangle);
  else if (options->addressprint)
    {
      fputs_filtered (paddress (gdbarch, address), stream);
      want_space = 1;
    }

  /* For a pointer to a textual type, also print the string
     pointed to, unless pointer is null.  */

  if (c_textual_element_type (unresolved_elttype, options->format)
      && address != 0)
    {
      if (want_space)
    fputs_filtered (" ", stream);
      val_print_string (unresolved_elttype, NULL, address, -1, stream, options);
    }
  else if (cp_is_vtbl_member (type))
    {
      /* Print vtbl's nicely.  */
      CORE_ADDR vt_address = unpack_pointer (type, valaddr + embedded_offset);
      struct bound_minimal_symbol msymbol =
    lookup_minimal_symbol_by_pc (vt_address);

      /* If 'symbol_print' is set, we did the work above.  */
      if (!options->symbol_print
      && (msymbol.minsym != NULL)
      && (vt_address == BMSYMBOL_VALUE_ADDRESS (msymbol)))
    {
      if (want_space)
        fputs_filtered (" ", stream);
      fputs_filtered (" <", stream);
      fputs_filtered (msymbol.minsym->print_name (), stream);
      fputs_filtered (">", stream);
      want_space = 1;
    }

      if (vt_address && options->vtblprint)
    {
      struct value *vt_val;
      struct symbol *wsym = NULL;
      struct type *wtype;

      if (want_space)
        fputs_filtered (" ", stream);

      if (msymbol.minsym != NULL)
        {
          const char *search_name = msymbol.minsym->search_name ();
          wsym = lookup_symbol_search_name (search_name, NULL,
                        VAR_DOMAIN).symbol;
        }

      if (wsym)
        {
          wtype = SYMBOL_TYPE (wsym);
        }
      else
        {
          wtype = unresolved_elttype;
        }
      vt_val = value_at (wtype, vt_address);
      common_val_print (vt_val, stream, recurse + 1, options,
                current_language);
      if (options->prettyformat)
        {
          fprintf_filtered (stream, "\n");
          print_spaces_filtered (2 + 2 * recurse, stream);
        }
    }
    }
}

什么时候真正从地址取值

例如,在最后print变量的时候必然是需要取内存中内容的;反之,对于&这种只取地址的操作就没有必要了。

/* See valprint.h.  */

void
print_value (value *val, const value_print_options &opts)
{
  int histindex = record_latest_value (val);

  annotate_value_history_begin (histindex, value_type (val));

  printf_filtered ("$%d = ", histindex);

  annotate_value_history_value ();

  print_formatted (val, 0, &opts, gdb_stdout);
  printf_filtered ("\n");

  annotate_value_history_end ();
}

/* Access to the value history.  */

/* Record a new value in the value history.
   Returns the absolute history index of the entry.  */

int
record_latest_value (struct value *val)
{
  /* We don't want this value to have anything to do with the inferior anymore.
     In particular, "set $1 = 50" should not affect the variable from which
     the value was taken, and fast watchpoints should be able to assume that
     a value on the value history never changes.  */
  if (value_lazy (val))
    value_fetch_lazy (val);
  /* We preserve VALUE_LVAL so that the user can find out where it was fetched
     from.  This is a bit dubious, because then *&$1 does not just return $1
     but the current contents of that location.  c'est la vie...  */
  val->modifiable = 0;

  value_history.push_back (release_value (val));

  return value_history.size ();
}

更直观的例子

如果把char* 类型强转为int,因为print看到的是int类型而不是char * 类型,所以gdb不会尝试输出字符串的内容,进而不会有这个报错了。

(gdb) p (char*)1
$5 = 0x1 <error: Cannot access memory at address 0x1>
(gdb) p (int)(char*)1
$6 = 1
(gdb)