drgn/libdrgn/language_c.c

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

3625 lines
96 KiB
C
Raw Normal View History

// Copyright (c) Meta Platforms, Inc. and affiliates.
// SPDX-License-Identifier: LGPL-2.1-or-later
#include <assert.h>
#include <ctype.h>
#include <float.h>
#include <inttypes.h>
#include <limits.h>
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include "array.h"
#include "bitops.h"
#include "error.h"
#include "language.h" // IWYU pragma: associated
#include "lexer.h"
#include "minmax.h"
#include "object.h"
#include "program.h"
#include "string_builder.h"
#include "symbol.h"
#include "type.h"
#include "util.h"
#include "vector.h"
static struct drgn_error *
c_declare_variable(struct drgn_qualified_type qualified_type,
struct string_callback *name, size_t indent,
bool define_anonymous_type, struct string_builder *sb);
static struct drgn_error *
c_define_type(struct drgn_qualified_type qualified_type, size_t indent,
struct string_builder *sb);
static bool append_tabs(int n, struct string_builder *sb)
{
while (n-- > 0) {
if (!string_builder_appendc(sb, '\t'))
return false;
}
return true;
}
static struct drgn_error *c_variable_name(struct string_callback *name,
void *arg, struct string_builder *sb)
{
if (!string_builder_append(sb, arg))
return &drgn_enomem;
return NULL;
}
static struct drgn_error *c_append_qualifiers(enum drgn_qualifiers qualifiers,
struct string_builder *sb)
{
static const char *qualifier_names[] = {
"const", "volatile", "restrict", "_Atomic",
};
bool first = true;
unsigned int i;
static_assert((1 << array_size(qualifier_names)) - 1 ==
DRGN_ALL_QUALIFIERS, "missing C qualifier name");
for (i = 0; (1U << i) & DRGN_ALL_QUALIFIERS; i++) {
if (!(qualifiers & (1U << i)))
continue;
if (!first) {
if (!string_builder_appendc(sb, ' '))
return &drgn_enomem;
}
if (!string_builder_append(sb, qualifier_names[i]))
return &drgn_enomem;
first = false;
}
return NULL;
}
static struct drgn_error *
c_declare_basic(struct drgn_qualified_type qualified_type,
struct string_callback *name, size_t indent,
struct string_builder *sb)
{
struct drgn_error *err;
if (!append_tabs(indent, sb))
return &drgn_enomem;
if (qualified_type.qualifiers) {
err = c_append_qualifiers(qualified_type.qualifiers, sb);
if (err)
return err;
if (!string_builder_appendc(sb, ' '))
return &drgn_enomem;
}
if (!string_builder_append(sb,
drgn_type_kind(qualified_type.type) == DRGN_TYPE_VOID ?
"void" : drgn_type_name(qualified_type.type)))
return &drgn_enomem;
if (name) {
if (!string_builder_appendc(sb, ' '))
return &drgn_enomem;
err = string_callback_call(name, sb);
if (err)
return err;
}
return NULL;
}
static struct drgn_error *
c_append_tagged_name(struct drgn_qualified_type qualified_type, size_t indent,
struct string_builder *sb)
{
struct drgn_error *err;
const char *keyword, *tag;
switch (drgn_type_kind(qualified_type.type)) {
case DRGN_TYPE_STRUCT:
keyword = "struct";
break;
case DRGN_TYPE_UNION:
keyword = "union";
break;
case DRGN_TYPE_CLASS:
keyword = "class";
break;
case DRGN_TYPE_ENUM:
keyword = "enum";
break;
default:
UNREACHABLE();
}
if (!append_tabs(indent, sb))
return &drgn_enomem;
if (qualified_type.qualifiers) {
err = c_append_qualifiers(qualified_type.qualifiers, sb);
if (err)
return err;
if (!string_builder_appendc(sb, ' '))
return &drgn_enomem;
}
if (!string_builder_append(sb, keyword))
return &drgn_enomem;
tag = drgn_type_tag(qualified_type.type);
if (tag) {
if (!string_builder_appendc(sb, ' ') ||
!string_builder_append(sb, tag))
return &drgn_enomem;
}
return NULL;
}
static struct drgn_error *
c_declare_tagged(struct drgn_qualified_type qualified_type,
struct string_callback *name, size_t indent,
bool define_anonymous_type, struct string_builder *sb)
{
struct drgn_error *err;
bool anonymous = drgn_type_is_anonymous(qualified_type.type);
if (anonymous && define_anonymous_type)
err = c_define_type(qualified_type, indent, sb);
else
err = c_append_tagged_name(qualified_type, indent, sb);
if (err)
return err;
if (anonymous && !define_anonymous_type &&
!string_builder_append(sb, " <anonymous>"))
return &drgn_enomem;
if (name) {
if (!string_builder_appendc(sb, ' '))
return &drgn_enomem;
err = string_callback_call(name, sb);
if (err)
return err;
}
return NULL;
}
static struct drgn_error *c_pointer_name(struct string_callback *name,
void *arg, struct string_builder *sb)
{
struct drgn_error *err;
struct drgn_qualified_type *qualified_type = arg;
struct drgn_qualified_type referenced_type;
enum drgn_type_kind referenced_kind;
bool parenthesize;
referenced_type = drgn_type_type(qualified_type->type);
referenced_kind = drgn_type_kind(referenced_type.type);
parenthesize = (referenced_kind == DRGN_TYPE_ARRAY ||
referenced_kind == DRGN_TYPE_FUNCTION);
if (parenthesize && !string_builder_appendc(sb, '('))
return &drgn_enomem;
if (!string_builder_appendc(sb, '*'))
return &drgn_enomem;
if (qualified_type->qualifiers) {
if (!string_builder_appendc(sb, ' '))
return &drgn_enomem;
err = c_append_qualifiers(qualified_type->qualifiers, sb);
if (err)
return err;
if (name) {
if (!string_builder_appendc(sb, ' '))
return &drgn_enomem;
}
}
err = string_callback_call(name, sb);
if (err)
return err;
if (parenthesize && !string_builder_appendc(sb, ')'))
return &drgn_enomem;
return NULL;
}
static struct drgn_error *
c_declare_pointer(struct drgn_qualified_type qualified_type,
struct string_callback *name, size_t indent,
struct string_builder *sb)
{
struct string_callback pointer_name = {
.fn = c_pointer_name,
.str = name,
.arg = &qualified_type,
};
struct drgn_qualified_type referenced_type;
referenced_type = drgn_type_type(qualified_type.type);
return c_declare_variable(referenced_type, &pointer_name, indent, false,
sb);
}
static struct drgn_error *c_array_name(struct string_callback *name, void *arg,
struct string_builder *sb)
{
struct drgn_error *err;
struct drgn_qualified_type *qualified_type = arg;
err = string_callback_call(name, sb);
if (err)
return err;
if (drgn_type_is_complete(qualified_type->type)) {
uint64_t length = drgn_type_length(qualified_type->type);
if (!string_builder_appendf(sb, "[%" PRIu64 "]", length))
return &drgn_enomem;
} else {
if (!string_builder_append(sb, "[]"))
return &drgn_enomem;
}
return NULL;
}
static struct drgn_error *
c_declare_array(struct drgn_qualified_type qualified_type,
struct string_callback *name, size_t indent,
struct string_builder *sb)
{
struct string_callback array_name = {
.fn = c_array_name,
.str = name,
.arg = &qualified_type,
};
struct drgn_qualified_type element_type;
element_type = drgn_type_type(qualified_type.type);
return c_declare_variable(element_type, &array_name, indent, false, sb);
}
static struct drgn_error *
c_declare_function(struct drgn_qualified_type qualified_type,
struct string_callback *name, size_t indent,
struct string_builder *sb)
{
struct drgn_error *err;
struct drgn_type_parameter *parameters;
size_t num_parameters, i;
struct drgn_qualified_type return_type;
if (!name) {
return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT,
"function must have name");
}
parameters = drgn_type_parameters(qualified_type.type);
num_parameters = drgn_type_num_parameters(qualified_type.type);
return_type = drgn_type_type(qualified_type.type);
err = c_declare_variable(return_type, name, indent, false, sb);
if (err)
return err;
if (!string_builder_appendc(sb, '('))
return &drgn_enomem;
for (i = 0; i < num_parameters; i++) {
const char *parameter_name = parameters[i].name;
struct drgn_qualified_type parameter_type;
struct string_callback name_cb = {
.fn = c_variable_name,
.arg = (void *)parameter_name,
};
err = drgn_parameter_type(&parameters[i], &parameter_type);
if (err)
return err;
if (i > 0) {
if (!string_builder_append(sb, ", "))
return &drgn_enomem;
}
err = c_declare_variable(parameter_type,
parameter_name && parameter_name[0] ?
&name_cb : NULL, 0, false, sb);
if (err)
return err;
}
if (num_parameters && drgn_type_is_variadic(qualified_type.type)) {
if (!string_builder_append(sb, ", ..."))
return &drgn_enomem;
} else if (!num_parameters &&
!drgn_type_is_variadic(qualified_type.type)) {
if (!string_builder_append(sb, "void"))
return &drgn_enomem;
}
if (!string_builder_appendc(sb, ')'))
return &drgn_enomem;
return NULL;
}
static struct drgn_error *
c_declare_variable(struct drgn_qualified_type qualified_type,
struct string_callback *name, size_t indent,
bool define_anonymous_type, struct string_builder *sb)
{
SWITCH_ENUM(drgn_type_kind(qualified_type.type),
case DRGN_TYPE_VOID:
case DRGN_TYPE_INT:
case DRGN_TYPE_BOOL:
case DRGN_TYPE_FLOAT:
case DRGN_TYPE_TYPEDEF:
return c_declare_basic(qualified_type, name, indent, sb);
case DRGN_TYPE_STRUCT:
case DRGN_TYPE_UNION:
case DRGN_TYPE_CLASS:
case DRGN_TYPE_ENUM:
return c_declare_tagged(qualified_type, name, indent,
define_anonymous_type, sb);
case DRGN_TYPE_POINTER:
return c_declare_pointer(qualified_type, name, indent, sb);
case DRGN_TYPE_ARRAY:
return c_declare_array(qualified_type, name, indent, sb);
case DRGN_TYPE_FUNCTION:
return c_declare_function(qualified_type, name, indent, sb);
)
}
static struct drgn_error *
c_define_compound(struct drgn_qualified_type qualified_type, size_t indent,
struct string_builder *sb)
{
struct drgn_error *err;
struct drgn_type_member *members;
size_t num_members, i;
if (!drgn_type_is_complete(qualified_type.type)) {
return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT,
"cannot get definition of incomplete compound type");
}
members = drgn_type_members(qualified_type.type);
num_members = drgn_type_num_members(qualified_type.type);
err = c_append_tagged_name(qualified_type, indent, sb);
if (err)
return err;
if (!string_builder_append(sb, " {\n"))
return &drgn_enomem;
for (i = 0; i < num_members; i++) {
struct drgn_qualified_type member_type;
uint64_t member_bit_field_size;
err = drgn_member_type(&members[i], &member_type,
&member_bit_field_size);
if (err)
return err;
const char *member_name = members[i].name;
struct string_callback name_cb = {
.fn = c_variable_name,
.arg = (void *)member_name,
};
err = c_declare_variable(member_type,
member_name && member_name[0] ?
&name_cb : NULL, indent + 1, true, sb);
if (err)
return err;
if (member_bit_field_size &&
!string_builder_appendf(sb, " : %" PRIu64,
member_bit_field_size))
return &drgn_enomem;
if (!string_builder_append(sb, ";\n"))
return &drgn_enomem;
}
if (!append_tabs(indent, sb) || !string_builder_appendc(sb, '}'))
return &drgn_enomem;
return NULL;
}
static struct drgn_error *
c_define_enum(struct drgn_qualified_type qualified_type, size_t indent,
struct string_builder *sb)
{
struct drgn_error *err;
const struct drgn_type_enumerator *enumerators;
size_t num_enumerators, i;
bool is_signed;
if (!drgn_type_is_complete(qualified_type.type)) {
return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT,
"cannot get definition of incomplete enum type");
}
enumerators = drgn_type_enumerators(qualified_type.type);
num_enumerators = drgn_type_num_enumerators(qualified_type.type);
err = c_append_tagged_name(qualified_type, indent, sb);
if (err)
return err;
if (!string_builder_append(sb, " {\n"))
return &drgn_enomem;
is_signed = drgn_enum_type_is_signed(qualified_type.type);
for (i = 0; i < num_enumerators; i++) {
if (!append_tabs(indent + 1, sb) ||
!string_builder_append(sb, enumerators[i].name) ||
!string_builder_append(sb, " = "))
return &drgn_enomem;
if (is_signed) {
if (!string_builder_appendf(sb, "%" PRId64 ",\n",
enumerators[i].svalue))
return &drgn_enomem;
} else {
if (!string_builder_appendf(sb, "%" PRIu64 ",\n",
enumerators[i].uvalue))
return &drgn_enomem;
}
}
if (!append_tabs(indent, sb) || !string_builder_appendc(sb, '}'))
return &drgn_enomem;
return NULL;
}
static struct drgn_error *
c_define_typedef(struct drgn_qualified_type qualified_type, size_t indent,
struct string_builder *sb)
{
struct string_callback typedef_name = {
.fn = c_variable_name,
.arg = (char *)drgn_type_name(qualified_type.type),
};
struct drgn_qualified_type aliased_type;
struct drgn_error *err;
if (!append_tabs(indent, sb))
return &drgn_enomem;
if (qualified_type.qualifiers) {
err = c_append_qualifiers(qualified_type.qualifiers, sb);
if (err)
return err;
if (!string_builder_appendc(sb, ' '))
return &drgn_enomem;
}
if (!string_builder_append(sb, "typedef "))
return &drgn_enomem;
aliased_type = drgn_type_type(qualified_type.type);
return c_declare_variable(aliased_type, &typedef_name, 0, true, sb);
}
static struct drgn_error *
c_define_type(struct drgn_qualified_type qualified_type, size_t indent,
struct string_builder *sb)
{
SWITCH_ENUM(drgn_type_kind(qualified_type.type),
case DRGN_TYPE_VOID:
case DRGN_TYPE_INT:
case DRGN_TYPE_BOOL:
case DRGN_TYPE_FLOAT:
return c_declare_basic(qualified_type, NULL, indent, sb);
case DRGN_TYPE_STRUCT:
case DRGN_TYPE_UNION:
case DRGN_TYPE_CLASS:
return c_define_compound(qualified_type, indent, sb);
case DRGN_TYPE_ENUM:
return c_define_enum(qualified_type, indent, sb);
case DRGN_TYPE_TYPEDEF:
return c_define_typedef(qualified_type, indent, sb);
case DRGN_TYPE_POINTER:
return c_declare_pointer(qualified_type, NULL, indent, sb);
case DRGN_TYPE_ARRAY:
return c_declare_array(qualified_type, NULL, indent, sb);
case DRGN_TYPE_FUNCTION:
return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT,
"function type cannot be formatted");
)
}
static struct drgn_error *
c_format_type_name_impl(struct drgn_qualified_type qualified_type,
struct string_builder *sb)
{
if (drgn_type_kind(qualified_type.type) == DRGN_TYPE_FUNCTION) {
struct string_callback name_cb = {
.fn = c_variable_name,
.arg = (void *)"",
};
return c_declare_function(qualified_type, &name_cb, 0, sb);
} else {
return c_declare_variable(qualified_type, NULL, 0, false, sb);
}
}
static struct drgn_error *
c_format_type_name(struct drgn_qualified_type qualified_type, char **ret)
{
struct drgn_error *err;
struct string_builder sb = STRING_BUILDER_INIT;
err = c_format_type_name_impl(qualified_type, &sb);
if (err) {
free(sb.str);
return err;
}
if (!(*ret = string_builder_null_terminate(&sb)))
return &drgn_enomem;
return NULL;
}
static struct drgn_error *
c_format_type(struct drgn_qualified_type qualified_type, char **ret)
{
struct drgn_error *err;
struct string_builder sb = STRING_BUILDER_INIT;
if (drgn_type_is_complete(qualified_type.type))
err = c_define_type(qualified_type, 0, &sb);
else
err = c_format_type_name_impl(qualified_type, &sb);
if (err) {
free(sb.str);
return err;
}
if (!(*ret = string_builder_null_terminate(&sb)))
return &drgn_enomem;
return NULL;
}
static struct drgn_error *
c_format_object_impl(const struct drgn_object *obj, size_t indent,
size_t one_line_columns, size_t multi_line_columns,
enum drgn_format_object_flags flags,
struct string_builder *sb);
2019-12-06 19:17:43 +00:00
static bool is_character_type(struct drgn_type *type)
{
switch (drgn_type_primitive(type)) {
case DRGN_C_TYPE_CHAR:
case DRGN_C_TYPE_SIGNED_CHAR:
case DRGN_C_TYPE_UNSIGNED_CHAR:
return true;
default:
return false;
}
}
static struct drgn_error *
c_format_character(unsigned char c, bool escape_single_quote,
bool escape_double_quote, struct string_builder *sb)
{
bool ret;
switch (c) {
case '\0':
ret = string_builder_append(sb, "\\0");
break;
case '\a':
ret = string_builder_append(sb, "\\a");
break;
case '\b':
ret = string_builder_append(sb, "\\b");
break;
case '\t':
ret = string_builder_append(sb, "\\t");
break;
case '\n':
ret = string_builder_append(sb, "\\n");
break;
case '\v':
ret = string_builder_append(sb, "\\v");
break;
case '\f':
ret = string_builder_append(sb, "\\f");
break;
case '\r':
ret = string_builder_append(sb, "\\r");
break;
case '"':
if (!escape_double_quote)
goto no_escape;
ret = string_builder_append(sb, "\\\"");
break;
case '\'':
if (!escape_single_quote)
goto no_escape;
ret = string_builder_append(sb, "\\'");
break;
case '\\':
ret = string_builder_append(sb, "\\\\");
break;
default:
if (c <= '\x1f' || c >= '\x7f') {
ret = string_builder_appendf(sb, "\\x%02x", c);
} else {
no_escape:
ret = string_builder_appendc(sb, c);
}
break;
}
return ret ? NULL : &drgn_enomem;
}
static struct drgn_error *
c_format_string(struct drgn_program *prog, uint64_t address, uint64_t length,
struct string_builder *sb)
2019-12-06 19:17:43 +00:00
{
struct drgn_error *err;
if (!string_builder_appendc(sb, '"'))
return &drgn_enomem;
while (length) {
unsigned char c;
err = drgn_program_read_memory(prog, &c, address++, 1, false);
2019-12-06 19:17:43 +00:00
if (err)
return err;
if (c == '\0') {
break;
} else {
err = c_format_character(c, false, true, sb);
if (err)
return err;
}
length--;
}
if (!string_builder_appendc(sb, '"'))
return &drgn_enomem;
return NULL;
}
static struct drgn_error *
2019-12-06 19:17:43 +00:00
c_format_int_object(const struct drgn_object *obj,
enum drgn_format_object_flags flags,
struct string_builder *sb)
{
struct drgn_error *err;
2019-12-06 19:17:43 +00:00
if ((flags & DRGN_FORMAT_OBJECT_CHAR) && is_character_type(obj->type)) {
union drgn_value value;
if (!string_builder_appendc(sb, '\''))
return &drgn_enomem;
err = drgn_object_read_integer(obj, &value);
if (err)
return err;
err = c_format_character(value.uvalue, true, false, sb);
if (err)
return err;
if (!string_builder_appendc(sb, '\''))
return &drgn_enomem;
return NULL;
}
libdrgn: support value objects with >64-bit integer types The Linux kernel's struct task_struct on AArch64 contains an array of __uint128_t: >>> task = find_task(prog, 1) >>> task.type_ struct task_struct * >>> task.thread.type_ struct thread_struct { struct cpu_context cpu_context; struct { unsigned long tp_value; unsigned long tp2_value; struct user_fpsimd_state fpsimd_state; } uw; enum fp_type fp_type; unsigned int fpsimd_cpu; void *sve_state; void *sme_state; unsigned int vl[2]; unsigned int vl_onexec[2]; unsigned long fault_address; unsigned long fault_code; struct debug_info debug; struct ptrauth_keys_user keys_user; struct ptrauth_keys_kernel keys_kernel; u64 mte_ctrl; u64 sctlr_user; u64 svcr; u64 tpidr2_el0; } >>> task.thread.uw.fpsimd_state.type_ struct user_fpsimd_state { __int128 unsigned vregs[32]; __u32 fpsr; __u32 fpcr; __u32 __reserved[2]; } As a result, printing a task_struct fails: >>> task Traceback (most recent call last): File "<console>", line 1, in <module> File "/host/home/osandov/repos/drgn3/drgn/cli.py", line 140, in _displayhook text = value.format_(columns=shutil.get_terminal_size((0, 0)).columns) NotImplementedError: integer values larger than 64 bits are not yet supported PR #311 suggested treating >64-bit integers as byte arrays for now; I tried an alternate hack of handling >64-bit integers only in the pretty-printing code. Both of these had issues, though. Instead, let's push >64-bit integer support a little further and allow storing "big integer" value objects. We still don't support any operations on them, so this still doesn't complete #170. We store the raw bytes of the value for now, but we'll probably change this if we add support for operations (e.g., to store the value as an mp_limb_t array for GMP). We also print >64-bit integer types in hexadecimal for simplicity. This is inconsistent with the existing behavior of printing in decimal, but more readable. In the future, we might want to add heuristics to decide when to print in decimal vs hexadecimal for all sizes. Closes #311. Signed-off-by: Omar Sandoval <osandov@osandov.com>
2023-08-02 21:12:19 +01:00
union drgn_value value_mem;
const union drgn_value *value;
err = drgn_object_read_value(obj, &value_mem, &value);
if (err)
return err;
switch (obj->encoding) {
libdrgn: support value objects with >64-bit integer types The Linux kernel's struct task_struct on AArch64 contains an array of __uint128_t: >>> task = find_task(prog, 1) >>> task.type_ struct task_struct * >>> task.thread.type_ struct thread_struct { struct cpu_context cpu_context; struct { unsigned long tp_value; unsigned long tp2_value; struct user_fpsimd_state fpsimd_state; } uw; enum fp_type fp_type; unsigned int fpsimd_cpu; void *sve_state; void *sme_state; unsigned int vl[2]; unsigned int vl_onexec[2]; unsigned long fault_address; unsigned long fault_code; struct debug_info debug; struct ptrauth_keys_user keys_user; struct ptrauth_keys_kernel keys_kernel; u64 mte_ctrl; u64 sctlr_user; u64 svcr; u64 tpidr2_el0; } >>> task.thread.uw.fpsimd_state.type_ struct user_fpsimd_state { __int128 unsigned vregs[32]; __u32 fpsr; __u32 fpcr; __u32 __reserved[2]; } As a result, printing a task_struct fails: >>> task Traceback (most recent call last): File "<console>", line 1, in <module> File "/host/home/osandov/repos/drgn3/drgn/cli.py", line 140, in _displayhook text = value.format_(columns=shutil.get_terminal_size((0, 0)).columns) NotImplementedError: integer values larger than 64 bits are not yet supported PR #311 suggested treating >64-bit integers as byte arrays for now; I tried an alternate hack of handling >64-bit integers only in the pretty-printing code. Both of these had issues, though. Instead, let's push >64-bit integer support a little further and allow storing "big integer" value objects. We still don't support any operations on them, so this still doesn't complete #170. We store the raw bytes of the value for now, but we'll probably change this if we add support for operations (e.g., to store the value as an mp_limb_t array for GMP). We also print >64-bit integer types in hexadecimal for simplicity. This is inconsistent with the existing behavior of printing in decimal, but more readable. In the future, we might want to add heuristics to decide when to print in decimal vs hexadecimal for all sizes. Closes #311. Signed-off-by: Omar Sandoval <osandov@osandov.com>
2023-08-02 21:12:19 +01:00
case DRGN_OBJECT_ENCODING_SIGNED:
if (!string_builder_appendf(sb, "%" PRId64, value->svalue)) {
err = &drgn_enomem;
goto out;
}
break;
case DRGN_OBJECT_ENCODING_UNSIGNED:
if (!string_builder_appendf(sb, "%" PRIu64, value->uvalue)) {
err = &drgn_enomem;
goto out;
}
break;
case DRGN_OBJECT_ENCODING_SIGNED_BIG:
case DRGN_OBJECT_ENCODING_UNSIGNED_BIG: {
if (!string_builder_append(sb, "0x")) {
err = &drgn_enomem;
goto out;
}
const uint8_t *buf = (uint8_t *)value->bufp;
size_t bytes = drgn_object_size(obj);
if (obj->little_endian) {
size_t i = bytes - 1;
while (i > 0 && buf[i] == 0)
i--;
if (!string_builder_appendf(sb, "%" PRIx8, buf[i])) {
err = &drgn_enomem;
goto out;
}
while (i-- > 0) {
if (!string_builder_appendf(sb, "%02" PRIx8, buf[i])) {
err = &drgn_enomem;
goto out;
}
}
} else {
size_t i = 0;
while (i < bytes - 1 && buf[i] == 0)
i++;
if (!string_builder_appendf(sb, "%" PRIx8, buf[i])) {
err = &drgn_enomem;
goto out;
}
while (++i < bytes) {
if (!string_builder_appendf(sb, "%02" PRIx8, buf[i])) {
err = &drgn_enomem;
goto out;
}
}
}
break;
}
default:
UNREACHABLE();
}
libdrgn: support value objects with >64-bit integer types The Linux kernel's struct task_struct on AArch64 contains an array of __uint128_t: >>> task = find_task(prog, 1) >>> task.type_ struct task_struct * >>> task.thread.type_ struct thread_struct { struct cpu_context cpu_context; struct { unsigned long tp_value; unsigned long tp2_value; struct user_fpsimd_state fpsimd_state; } uw; enum fp_type fp_type; unsigned int fpsimd_cpu; void *sve_state; void *sme_state; unsigned int vl[2]; unsigned int vl_onexec[2]; unsigned long fault_address; unsigned long fault_code; struct debug_info debug; struct ptrauth_keys_user keys_user; struct ptrauth_keys_kernel keys_kernel; u64 mte_ctrl; u64 sctlr_user; u64 svcr; u64 tpidr2_el0; } >>> task.thread.uw.fpsimd_state.type_ struct user_fpsimd_state { __int128 unsigned vregs[32]; __u32 fpsr; __u32 fpcr; __u32 __reserved[2]; } As a result, printing a task_struct fails: >>> task Traceback (most recent call last): File "<console>", line 1, in <module> File "/host/home/osandov/repos/drgn3/drgn/cli.py", line 140, in _displayhook text = value.format_(columns=shutil.get_terminal_size((0, 0)).columns) NotImplementedError: integer values larger than 64 bits are not yet supported PR #311 suggested treating >64-bit integers as byte arrays for now; I tried an alternate hack of handling >64-bit integers only in the pretty-printing code. Both of these had issues, though. Instead, let's push >64-bit integer support a little further and allow storing "big integer" value objects. We still don't support any operations on them, so this still doesn't complete #170. We store the raw bytes of the value for now, but we'll probably change this if we add support for operations (e.g., to store the value as an mp_limb_t array for GMP). We also print >64-bit integer types in hexadecimal for simplicity. This is inconsistent with the existing behavior of printing in decimal, but more readable. In the future, we might want to add heuristics to decide when to print in decimal vs hexadecimal for all sizes. Closes #311. Signed-off-by: Omar Sandoval <osandov@osandov.com>
2023-08-02 21:12:19 +01:00
err = NULL;
out:
drgn_object_deinit_value(obj, value);
return err;
}
static struct drgn_error *
c_format_float_object(const struct drgn_object *obj, struct string_builder *sb)
{
struct drgn_error *err;
double fvalue;
err = drgn_object_read_float(obj, &fvalue);
if (err)
return err;
if (rint(fvalue) == fvalue) {
if (!string_builder_appendf(sb, "%.1f", fvalue))
return &drgn_enomem;
} else {
if (!string_builder_appendf(sb, "%.*g", DBL_DECIMAL_DIG,
fvalue))
return &drgn_enomem;
}
return NULL;
}
static struct drgn_error drgn_line_wrap = {
.code = DRGN_ERROR_STOP,
.message = "needs line wrap",
};
struct initializer_iter {
struct drgn_error *(*next)(struct initializer_iter *,
struct drgn_object *,
enum drgn_format_object_flags *);
void (*reset)(struct initializer_iter *);
struct drgn_error *(*append_designation)(struct initializer_iter *,
struct string_builder *);
};
static struct drgn_error *c_format_initializer(struct drgn_program *prog,
struct initializer_iter *iter,
size_t indent,
size_t one_line_columns,
size_t multi_line_columns,
bool same_line,
struct string_builder *sb)
{
struct drgn_error *err;
struct drgn_object obj;
enum drgn_format_object_flags initializer_flags;
size_t brace, remaining_columns, start_columns;
drgn_object_init(&obj, prog);
/* First, try to fit everything on one line. */
brace = sb->len;
if (!string_builder_appendc(sb, '{')) {
err = &drgn_enomem;
goto out;
}
if (__builtin_sub_overflow(one_line_columns, 1, &remaining_columns))
remaining_columns = 0;
for (;;) {
size_t initializer_start;
err = iter->next(iter, &obj, &initializer_flags);
if (err == &drgn_stop)
break;
else if (err)
goto out;
if (!same_line) {
err = &drgn_line_wrap;
break;
} else if (sb->len == brace + 1) {
if (remaining_columns < 3) {
/*
* The preceding space and closing space and
* brace don't fit.
*/
err = &drgn_line_wrap;
break;
}
if (!string_builder_appendc(sb, ' ')) {
err = &drgn_enomem;
goto out;
}
remaining_columns--;
} else {
if (remaining_columns < 4) {
/*
* The preceding comma and space and closing
* space and brace don't fit.
*/
err = &drgn_line_wrap;
break;
}
if (!string_builder_append(sb, ", ")) {
err = &drgn_enomem;
goto out;
}
remaining_columns -= 2;
}
if (iter->append_designation) {
size_t designation_start = sb->len;
err = iter->append_designation(iter, sb);
if (err)
goto out;
if (__builtin_sub_overflow(remaining_columns,
sb->len - designation_start,
&remaining_columns)) {
err = &drgn_line_wrap;
break;
}
}
initializer_start = sb->len;
err = c_format_object_impl(&obj, indent + 1,
remaining_columns - 2, 0,
initializer_flags, sb);
if (err == &drgn_line_wrap)
break;
else if (err)
goto out;
if (__builtin_sub_overflow(remaining_columns,
sb->len - initializer_start,
&remaining_columns)) {
err = &drgn_line_wrap;
break;
}
}
if (err != &drgn_line_wrap) {
/* All of the initializers fit. */
if (sb->len == brace + 1) {
/* There were no initializers. */
if (string_builder_appendc(sb, '}'))
err = NULL;
else
err = &drgn_enomem;
goto out;
} else if (remaining_columns >= 2) {
if (string_builder_append(sb, " }"))
err = NULL;
else
err = &drgn_enomem;
goto out;
}
/* The final space and closing brace didn't fit. */
}
/* It didn't fit on one line. Try multiple lines. */
if (multi_line_columns == 0) {
/* We were asked to stay on one line. */
err = &drgn_line_wrap;
goto out;
}
sb->len = brace + 1;
if (__builtin_sub_overflow(multi_line_columns, 8 * (indent + 1),
&start_columns))
start_columns = 0;
remaining_columns = 0;
iter->reset(iter);
for (;;) {
size_t newline, designation_start, line_columns;
err = iter->next(iter, &obj, &initializer_flags);
if (err == &drgn_stop)
break;
else if (err)
goto out;
newline = sb->len;
if (!string_builder_appendc(sb, '\n') ||
!append_tabs(indent + 1, sb)) {
err = &drgn_enomem;
goto out;
}
designation_start = sb->len;
line_columns = start_columns;
if (iter->append_designation) {
err = iter->append_designation(iter, sb);
if (err)
goto out;
if (__builtin_sub_overflow(line_columns,
sb->len - designation_start,
&line_columns))
line_columns = 0;
}
if (line_columns > 1) {
size_t initializer_start = sb->len;
err = c_format_object_impl(&obj, 0, line_columns - 1,
0, initializer_flags, sb);
if (!err) {
size_t len = sb->len - designation_start;
if (same_line && len + 2 <= remaining_columns) {
/*
* It would've fit on the previous line.
* Move it over.
*/
sb->str[newline] = ' ';
memmove(&sb->str[newline + 1],
&sb->str[designation_start],
len);
sb->len = newline + 1 + len;
if (!string_builder_appendc(sb, ',')) {
err = &drgn_enomem;
goto out;
}
remaining_columns -= len + 2;
continue;
}
if (len < start_columns) {
/* It fit on the new line. */
if (!string_builder_appendc(sb, ',')) {
err = &drgn_enomem;
goto out;
}
remaining_columns =
start_columns - len - 1;
continue;
}
} else if (err != &drgn_line_wrap) {
goto out;
}
/* It didn't fit. */
sb->len = initializer_start;
}
err = c_format_object_impl(&obj, indent + 1, 0,
multi_line_columns,
initializer_flags, sb);
if (err)
goto out;
if (!string_builder_appendc(sb, ',')) {
err = &drgn_enomem;
goto out;
}
remaining_columns = 0;
}
if (!string_builder_appendc(sb, '\n') || !append_tabs(indent, sb) ||
!string_builder_appendc(sb, '}')) {
err = &drgn_enomem;
goto out;
}
err = NULL;
out:
drgn_object_deinit(&obj);
return err;
}
struct compound_initializer_state {
struct drgn_type_member *member, *end;
uint64_t bit_offset;
};
DEFINE_VECTOR(compound_initializer_stack, struct compound_initializer_state);
struct compound_initializer_iter {
struct initializer_iter iter;
const struct drgn_object *obj;
struct compound_initializer_stack stack;
enum drgn_format_object_flags flags, member_flags;
};
static struct drgn_error *
compound_initializer_iter_next(struct initializer_iter *iter_,
struct drgn_object *obj_ret,
enum drgn_format_object_flags *flags_ret)
{
struct drgn_error *err;
struct compound_initializer_iter *iter =
container_of(iter_, struct compound_initializer_iter, iter);
for (;;) {
if (!iter->stack.size)
return &drgn_stop;
struct compound_initializer_state *top =
&iter->stack.data[iter->stack.size - 1];
if (top->member == top->end) {
iter->stack.size--;
continue;
}
uint64_t bit_offset = top->bit_offset;
struct drgn_type_member *member = top->member++;
struct drgn_qualified_type member_type;
uint64_t member_bit_field_size;
err = drgn_member_type(member, &member_type,
&member_bit_field_size);
if (err)
return err;
/*
* If the member is named or we are not including names, return
* it. Otherwise, if it has members, descend into it. If it
* doesn't, then this isn't valid C, but let's return it
* anyways.
*/
if (member->name ||
!(iter->flags & DRGN_FORMAT_OBJECT_MEMBER_NAMES) ||
!drgn_type_has_members(member_type.type)) {
err = drgn_object_slice(obj_ret, iter->obj, member_type,
bit_offset + member->bit_offset,
member_bit_field_size);
if (err)
return err;
/* If we're including names, we can skip zeroes. */
if ((iter->flags & (DRGN_FORMAT_OBJECT_MEMBER_NAMES |
DRGN_FORMAT_OBJECT_IMPLICIT_MEMBERS)) ==
DRGN_FORMAT_OBJECT_MEMBER_NAMES) {
bool zero;
err = drgn_object_is_zero(obj_ret, &zero);
if (err)
return err;
if (zero)
continue;
}
break;
}
struct compound_initializer_state *new =
compound_initializer_stack_append_entry(&iter->stack);
if (!new)
return &drgn_enomem;
new->member = drgn_type_members(member_type.type);
new->end = new->member + drgn_type_num_members(member_type.type);
new->bit_offset = bit_offset + member->bit_offset;
}
*flags_ret = iter->member_flags;
return NULL;
}
static void compound_initializer_iter_reset(struct initializer_iter *iter_)
{
struct compound_initializer_iter *iter =
container_of(iter_, struct compound_initializer_iter, iter);
struct drgn_type *underlying_type =
drgn_underlying_type(iter->obj->type);
iter->stack.size = 1;
iter->stack.data[0].member = drgn_type_members(underlying_type);
}
static struct drgn_error *
compound_initializer_append_designation(struct initializer_iter *iter_,
struct string_builder *sb)
{
struct compound_initializer_iter *iter =
container_of(iter_, struct compound_initializer_iter, iter);
struct compound_initializer_state *top =
&iter->stack.data[iter->stack.size - 1];
const char *name = top->member[-1].name;
if (name && !string_builder_appendf(sb, ".%s = ", name))
return &drgn_enomem;
return NULL;
}
static struct drgn_error *
c_format_compound_object(const struct drgn_object *obj,
struct drgn_type *underlying_type, size_t indent,
size_t one_line_columns, size_t multi_line_columns,
enum drgn_format_object_flags flags,
struct string_builder *sb)
{
struct drgn_error *err;
if (!drgn_type_is_complete(underlying_type)) {
const char *keyword;
switch (drgn_type_kind(underlying_type)) {
case DRGN_TYPE_STRUCT:
keyword = "struct";
break;
case DRGN_TYPE_UNION:
keyword = "union";
break;
case DRGN_TYPE_CLASS:
keyword = "class";
break;
default:
UNREACHABLE();
}
return drgn_error_format(DRGN_ERROR_TYPE,
"cannot format incomplete %s object",
keyword);
}
struct compound_initializer_iter iter = {
.iter = {
.next = compound_initializer_iter_next,
.reset = compound_initializer_iter_reset,
.append_designation =
flags & DRGN_FORMAT_OBJECT_MEMBER_NAMES ?
compound_initializer_append_designation : NULL,
},
.obj = obj,
.stack = VECTOR_INIT,
.flags = flags,
.member_flags = drgn_member_format_object_flags(flags),
};
struct compound_initializer_state *new =
compound_initializer_stack_append_entry(&iter.stack);
if (!new) {
err = &drgn_enomem;
goto out;
}
new->member = drgn_type_members(underlying_type);
new->end = add_to_possibly_null_pointer(new->member,
drgn_type_num_members(underlying_type));
new->bit_offset = 0;
/*
* If we don't want zero members, ignore any at the end. If we're
* including member names, then we'll skip past zero members as we
* iterate, so we don't need to do this.
*/
if (!(flags & (DRGN_FORMAT_OBJECT_MEMBER_NAMES |
DRGN_FORMAT_OBJECT_IMPLICIT_MEMBERS)) &&
new->member < new->end) {
struct drgn_object member;
drgn_object_init(&member, drgn_object_program(obj));
do {
struct drgn_qualified_type member_type;
uint64_t member_bit_field_size;
err = drgn_member_type(&new->end[-1], &member_type,
&member_bit_field_size);
if (err)
break;
err = drgn_object_slice(&member, obj, member_type,
new->end[-1].bit_offset,
member_bit_field_size);
if (err)
break;
bool zero;
err = drgn_object_is_zero(&member, &zero);
if (err)
break;
if (zero)
new->end--;
else
break;
} while (new->member < new->end);
drgn_object_deinit(&member);
if (err)
goto out;
}
err = c_format_initializer(drgn_object_program(obj), &iter.iter, indent,
one_line_columns, multi_line_columns,
flags & DRGN_FORMAT_OBJECT_MEMBERS_SAME_LINE,
sb);
out:
compound_initializer_stack_deinit(&iter.stack);
return err;
}
static struct drgn_error *
c_format_enum_object(const struct drgn_object *obj,
struct drgn_type *underlying_type,
struct string_builder *sb)
{
struct drgn_error *err;
struct drgn_type_enumerator *enumerators;
size_t num_enumerators, i;
if (!drgn_type_is_complete(underlying_type)) {
return drgn_error_create(DRGN_ERROR_TYPE,
"cannot format incomplete enum object");
}
enumerators = drgn_type_enumerators(underlying_type);
num_enumerators = drgn_type_num_enumerators(underlying_type);
if (drgn_enum_type_is_signed(underlying_type)) {
int64_t svalue;
err = drgn_object_read_signed(obj, &svalue);
if (err)
return err;
for (i = 0; i < num_enumerators; i++) {
if (enumerators[i].svalue == svalue) {
if (!string_builder_append(sb,
enumerators[i].name))
return &drgn_enomem;
return NULL;
}
}
if (!string_builder_appendf(sb, "%" PRId64, svalue))
return &drgn_enomem;
return NULL;
} else {
uint64_t uvalue;
err = drgn_object_read_unsigned(obj, &uvalue);
if (err)
return err;
for (i = 0; i < num_enumerators; i++) {
if (enumerators[i].uvalue == uvalue) {
if (!string_builder_append(sb,
enumerators[i].name))
return &drgn_enomem;
return NULL;
}
}
if (!string_builder_appendf(sb, "%" PRIu64, uvalue))
return &drgn_enomem;
return NULL;
}
}
static struct drgn_error *
c_format_pointer_object(const struct drgn_object *obj,
struct drgn_type *underlying_type,
size_t indent, size_t one_line_columns,
size_t multi_line_columns,
enum drgn_format_object_flags flags,
struct string_builder *sb)
{
struct drgn_error *err;
enum drgn_format_object_flags passthrough_flags =
drgn_passthrough_format_object_flags(flags);
bool dereference = flags & DRGN_FORMAT_OBJECT_DEREFERENCE;
2019-12-06 18:59:31 +00:00
bool c_string =
((flags & DRGN_FORMAT_OBJECT_STRING) &&
is_character_type(drgn_type_type(underlying_type).type));
bool have_symbol;
uint64_t uvalue;
struct drgn_symbol sym;
size_t start, type_start, type_end, value_start, value_end;
start = sb->len;
2019-12-06 18:59:31 +00:00
if (dereference && !c_string && !string_builder_appendc(sb, '*'))
return &drgn_enomem;
type_start = sb->len;
if (flags & DRGN_FORMAT_OBJECT_TYPE_NAME) {
if (!string_builder_appendc(sb, '('))
return &drgn_enomem;
err = c_format_type_name_impl(drgn_object_qualified_type(obj),
sb);
if (err)
return err;
if (!string_builder_appendc(sb, ')'))
return &drgn_enomem;
}
type_end = sb->len;
err = drgn_object_read_unsigned(obj, &uvalue);
if (err)
return err;
have_symbol = ((flags & DRGN_FORMAT_OBJECT_SYMBOLIZE) &&
drgn_program_find_symbol_by_address_internal(drgn_object_program(obj),
uvalue,
NULL,
&sym));
2019-12-06 18:59:31 +00:00
if (have_symbol && dereference && !c_string &&
!string_builder_appendc(sb, '('))
return &drgn_enomem;
value_start = sb->len;
if (have_symbol &&
!string_builder_appendf(sb, "%s+0x%" PRIx64 " = ", sym.name,
uvalue - sym.address))
return &drgn_enomem;
if (!string_builder_appendf(sb, "0x%" PRIx64, uvalue))
return &drgn_enomem;
2019-12-06 18:59:31 +00:00
if (!dereference && !c_string)
return NULL;
value_end = sb->len;
2019-12-06 18:59:31 +00:00
if ((have_symbol && dereference && !c_string &&
!string_builder_appendc(sb, ')')) ||
!string_builder_append(sb, " = "))
return &drgn_enomem;
2019-12-06 18:59:31 +00:00
if (c_string) {
err = c_format_string(drgn_object_program(obj), uvalue,
UINT64_MAX, sb);
} else {
struct drgn_object dereferenced;
drgn_object_init(&dereferenced, drgn_object_program(obj));
err = drgn_object_dereference(&dereferenced, obj);
if (err) {
drgn_object_deinit(&dereferenced);
if (err->code == DRGN_ERROR_TYPE)
goto no_dereference;
return err;
}
if (__builtin_sub_overflow(one_line_columns, sb->len - start,
&one_line_columns))
one_line_columns = 0;
err = c_format_object_impl(&dereferenced, indent,
one_line_columns, multi_line_columns,
passthrough_flags, sb);
drgn_object_deinit(&dereferenced);
}
if (!err || (err->code != DRGN_ERROR_FAULT && err->code != DRGN_ERROR_OUT_OF_BOUNDS)) {
/* We either succeeded or hit a fatal error. */
return err;
}
no_dereference:
/*
* We hit a non-fatal error. Delete the asterisk and symbol parentheses
* and truncate everything after the address.
*/
drgn_error_destroy(err);
if (type_start != start) {
memmove(&sb->str[start], &sb->str[type_start],
type_end - type_start);
}
if (start + type_end - type_start != value_start) {
memmove(&sb->str[start + type_end - type_start],
&sb->str[value_start], value_end - value_start);
}
sb->len = start + type_end - type_start + value_end - value_start;
return NULL;
}
struct array_initializer_iter {
struct initializer_iter iter;
const struct drgn_object *obj;
struct drgn_qualified_type element_type;
uint64_t element_bit_size;
uint64_t length, i;
enum drgn_format_object_flags flags, element_flags;
};
static struct drgn_error *
array_initializer_iter_next(struct initializer_iter *iter_,
struct drgn_object *obj_ret,
enum drgn_format_object_flags *flags_ret)
{
struct drgn_error *err;
struct array_initializer_iter *iter =
container_of(iter_, struct array_initializer_iter, iter);
for (;;) {
bool zero;
if (iter->i >= iter->length)
return &drgn_stop;
err = drgn_object_slice(obj_ret, iter->obj, iter->element_type,
iter->i * iter->element_bit_size, 0);
if (err)
return err;
iter->i++;
/* If we're including indices, we can skip zeroes. */
if ((iter->flags & (DRGN_FORMAT_OBJECT_ELEMENT_INDICES |
DRGN_FORMAT_OBJECT_IMPLICIT_ELEMENTS)) !=
DRGN_FORMAT_OBJECT_ELEMENT_INDICES)
break;
err = drgn_object_is_zero(obj_ret, &zero);
if (err)
return err;
if (!zero)
break;
}
*flags_ret = iter->element_flags;
return NULL;
}
static void array_initializer_iter_reset(struct initializer_iter *iter_)
{
struct array_initializer_iter *iter =
container_of(iter_, struct array_initializer_iter, iter);
iter->i = 0;
}
static struct drgn_error *
array_initializer_append_designation(struct initializer_iter *iter_,
struct string_builder *sb)
{
struct array_initializer_iter *iter =
container_of(iter_, struct array_initializer_iter, iter);
if (!string_builder_appendf(sb, "[%" PRIu64 "] = ", iter->i - 1))
return &drgn_enomem;
return NULL;
}
static struct drgn_error *
c_format_array_object(const struct drgn_object *obj,
struct drgn_type *underlying_type, size_t indent,
size_t one_line_columns, size_t multi_line_columns,
enum drgn_format_object_flags flags,
struct string_builder *sb)
{
struct drgn_error *err;
struct array_initializer_iter iter = {
.iter = {
.next = array_initializer_iter_next,
.reset = array_initializer_iter_reset,
.append_designation =
flags & DRGN_FORMAT_OBJECT_ELEMENT_INDICES ?
array_initializer_append_designation : NULL,
},
.obj = obj,
.element_type = drgn_type_type(underlying_type),
.length = drgn_type_length(underlying_type),
.flags = flags,
.element_flags = drgn_element_format_object_flags(flags),
};
if ((flags & DRGN_FORMAT_OBJECT_STRING) && iter.length &&
is_character_type(iter.element_type.type)) {
SWITCH_ENUM(obj->kind,
case DRGN_OBJECT_VALUE: {
const unsigned char *buf;
uint64_t size, i;
if (!string_builder_appendc(sb, '"'))
return &drgn_enomem;
buf = (const unsigned char *)drgn_object_buffer(obj);
size = drgn_object_size(obj);
for (i = 0; i < size; i++) {
if (buf[i] == '\0')
break;
2019-12-06 19:17:43 +00:00
err = c_format_character(buf[i], false, true,
sb);
if (err)
return err;
}
if (!string_builder_appendc(sb, '"'))
return &drgn_enomem;
return NULL;
}
case DRGN_OBJECT_REFERENCE:
return c_format_string(drgn_object_program(obj),
obj->address, iter.length, sb);
case DRGN_OBJECT_ABSENT:
)
}
err = drgn_type_bit_size(iter.element_type.type,
&iter.element_bit_size);
if (err)
return err;
/*
* If we don't want zero elements, ignore any at the end. If we're
* including indices, then we'll skip past zeroes as we iterate, so we
* don't need to do this.
*/
if (!(flags & (DRGN_FORMAT_OBJECT_ELEMENT_INDICES |
DRGN_FORMAT_OBJECT_IMPLICIT_ELEMENTS)) &&
iter.length) {
struct drgn_object element;
drgn_object_init(&element, drgn_object_program(obj));
do {
bool zero;
err = drgn_object_slice(&element, obj,
iter.element_type,
(iter.length - 1) *
iter.element_bit_size,
0);
if (err)
break;
err = drgn_object_is_zero(&element, &zero);
if (err)
break;
if (zero)
iter.length--;
else
break;
} while (iter.length);
drgn_object_deinit(&element);
if (err)
return err;
}
return c_format_initializer(drgn_object_program(obj), &iter.iter,
indent, one_line_columns,
multi_line_columns,
flags & DRGN_FORMAT_OBJECT_ELEMENTS_SAME_LINE,
sb);
}
static struct drgn_error *
c_format_function_object(const struct drgn_object *obj,
struct string_builder *sb)
{
assert(obj->kind == DRGN_OBJECT_REFERENCE);
if (!string_builder_appendf(sb, "0x%" PRIx64, obj->address))
return &drgn_enomem;
return NULL;
}
static struct drgn_error *
c_format_object_impl(const struct drgn_object *obj, size_t indent,
size_t one_line_columns, size_t multi_line_columns,
enum drgn_format_object_flags flags,
struct string_builder *sb)
{
struct drgn_error *err;
struct drgn_type *underlying_type = drgn_underlying_type(obj->type);
if (drgn_type_kind(underlying_type) == DRGN_TYPE_VOID) {
return drgn_error_create(DRGN_ERROR_TYPE,
"cannot format void object");
}
/*
* Pointers are special because they can have an asterisk prefix if
* we're dereferencing them.
*/
if (drgn_type_kind(underlying_type) == DRGN_TYPE_POINTER &&
obj->kind != DRGN_OBJECT_ABSENT) {
return c_format_pointer_object(obj, underlying_type, indent,
one_line_columns,
multi_line_columns, flags, sb);
}
if (flags & DRGN_FORMAT_OBJECT_TYPE_NAME) {
size_t old_len = sb->len;
if (!string_builder_appendc(sb, '('))
return &drgn_enomem;
err = c_format_type_name_impl(drgn_object_qualified_type(obj),
sb);
if (err)
return err;
if (!string_builder_appendc(sb, ')'))
return &drgn_enomem;
if (__builtin_sub_overflow(one_line_columns, sb->len - old_len,
&one_line_columns))
one_line_columns = 0;
}
if (obj->kind == DRGN_OBJECT_ABSENT) {
if (!string_builder_append(sb, "<absent>"))
return &drgn_enomem;
return NULL;
}
SWITCH_ENUM(drgn_type_kind(underlying_type),
case DRGN_TYPE_INT:
case DRGN_TYPE_BOOL:
2019-12-06 19:17:43 +00:00
return c_format_int_object(obj, flags, sb);
case DRGN_TYPE_FLOAT:
return c_format_float_object(obj, sb);
case DRGN_TYPE_STRUCT:
case DRGN_TYPE_UNION:
case DRGN_TYPE_CLASS:
return c_format_compound_object(obj, underlying_type, indent,
one_line_columns,
multi_line_columns, flags, sb);
case DRGN_TYPE_ENUM:
return c_format_enum_object(obj, underlying_type, sb);
case DRGN_TYPE_ARRAY:
return c_format_array_object(obj, underlying_type, indent,
one_line_columns,
multi_line_columns, flags, sb);
case DRGN_TYPE_FUNCTION:
return c_format_function_object(obj, sb);
case DRGN_TYPE_VOID:
case DRGN_TYPE_TYPEDEF:
case DRGN_TYPE_POINTER:
)
}
static struct drgn_error *c_format_object(const struct drgn_object *obj,
size_t columns,
enum drgn_format_object_flags flags,
char **ret)
{
struct drgn_error *err;
struct string_builder sb = STRING_BUILDER_INIT;
err = c_format_object_impl(obj, 0, columns, max(columns, (size_t)1),
flags, &sb);
if (err) {
free(sb.str);
return err;
}
if (!(*ret = string_builder_null_terminate(&sb)))
return &drgn_enomem;
return NULL;
}
/* This obviously incomplete since we only handle the tokens we care about. */
enum {
C_TOKEN_EOF = -1,
MIN_KEYWORD_TOKEN,
MIN_SPECIFIER_TOKEN = MIN_KEYWORD_TOKEN,
C_TOKEN_VOID = MIN_SPECIFIER_TOKEN,
C_TOKEN_CHAR,
C_TOKEN_SHORT,
C_TOKEN_INT,
C_TOKEN_LONG,
C_TOKEN_SIGNED,
C_TOKEN_UNSIGNED,
C_TOKEN_BOOL,
C_TOKEN_FLOAT,
C_TOKEN_DOUBLE,
C_TOKEN_COMPLEX,
MAX_SPECIFIER_TOKEN = C_TOKEN_COMPLEX,
MIN_QUALIFIER_TOKEN,
C_TOKEN_CONST = MIN_QUALIFIER_TOKEN,
C_TOKEN_RESTRICT,
C_TOKEN_VOLATILE,
C_TOKEN_ATOMIC,
MAX_QUALIFIER_TOKEN = C_TOKEN_ATOMIC,
C_TOKEN_STRUCT,
C_TOKEN_UNION,
C_TOKEN_CLASS,
C_TOKEN_ENUM,
MAX_KEYWORD_TOKEN = C_TOKEN_ENUM,
C_TOKEN_LPAREN,
C_TOKEN_RPAREN,
C_TOKEN_LBRACKET,
C_TOKEN_RBRACKET,
C_TOKEN_ASTERISK,
C_TOKEN_DOT,
C_TOKEN_NUMBER,
C_TOKEN_IDENTIFIER,
C_TOKEN_TEMPLATE_ARGUMENTS,
C_TOKEN_COLON,
};
#include "c_keywords.inc"
struct drgn_c_family_lexer {
struct drgn_lexer lexer;
bool cpp;
};
struct drgn_error *drgn_c_family_lexer_func(struct drgn_lexer *lexer,
struct drgn_token *token) {
const char *p = lexer->p;
bool cpp = ((struct drgn_c_family_lexer *)lexer)->cpp;
while (isspace(*p))
p++;
token->value = p;
switch (*p) {
case '\0':
token->kind = C_TOKEN_EOF;
break;
case '(':
token->kind = C_TOKEN_LPAREN;
p++;
break;
case ')':
token->kind = C_TOKEN_RPAREN;
p++;
break;
case '[':
token->kind = C_TOKEN_LBRACKET;
p++;
break;
case ']':
token->kind = C_TOKEN_RBRACKET;
p++;
break;
case '*':
token->kind = C_TOKEN_ASTERISK;
p++;
break;
case '.':
token->kind = C_TOKEN_DOT;
p++;
break;
case ':':
token->kind = C_TOKEN_COLON;
p++;
break;
case '<':
// This is a hack for cpp_append_to_identifier(). We don't want
// to deal with actually parsing template arguments, and we
// don't care about "<" otherwise, so this scans a token from
// the "<" to its matching ">".
if (cpp) {
token->kind = C_TOKEN_TEMPLATE_ARGUMENTS;
p++;
size_t less_thans = 1;
bool in_single_quotes = false;
do {
switch (*p++) {
case '<':
if (!in_single_quotes)
less_thans++;
break;
case '>':
if (!in_single_quotes)
less_thans--;
break;
case '\'':
// Handling the edge-case of an escaped single-quote
if (!(in_single_quotes && *(p - 2) == '\\'))
in_single_quotes = !in_single_quotes;
break;
case '\0':
return drgn_error_create(DRGN_ERROR_SYNTAX,
"invalid template arguments");
}
} while (less_thans > 0);
break;
}
fallthrough;
default:
if (isalpha(*p) || *p == '_') {
do {
p++;
} while (isalnum(*p) || *p == '_');
token->kind = identifier_token_kind(token->value,
p - token->value,
cpp);
} else if ('0' <= *p && *p <= '9') {
token->kind = C_TOKEN_NUMBER;
if (*p++ == '0' && *p == 'x') {
p++;
while (('0' <= *p && *p <= '9') ||
('a' <= *p && *p <= 'f') ||
('A' <= *p && *p <= 'F')) {
p++;
}
if (p - token->value <= 2) {
return drgn_error_create(DRGN_ERROR_SYNTAX,
"invalid number");
}
} else {
while ('0' <= *p && *p <= '9')
p++;
}
if (isalpha(*p) || *p == '_') {
return drgn_error_create(DRGN_ERROR_SYNTAX,
"invalid number");
}
} else {
return drgn_error_format(DRGN_ERROR_SYNTAX,
"invalid character \\x%02x", (unsigned char)*p);
}
break;
}
token->len = p - token->value;
lexer->p = p;
return NULL;
}
static struct drgn_error *c_token_to_u64(const struct drgn_token *token,
uint64_t *ret)
{
uint64_t x = 0;
size_t i;
assert(token->kind == C_TOKEN_NUMBER);
if (token->len > 2 && token->value[0] == '0' &&
token->value[1] == 'x') {
for (i = 2; i < token->len; i++) {
char c = token->value[i];
int digit;
if ('0' <= c && c <= '9')
digit = c - '0';
else if ('a' <= c && c <= 'f')
digit = c - 'a';
else /* ('A' <= c && c <= 'F') */
digit = c - 'A';
if (x > UINT64_MAX / 16)
goto overflow;
x *= 16;
if (x > UINT64_MAX - digit)
goto overflow;
x += digit;
}
} else if (token->value[0] == '0') {
for (i = 1; i < token->len; i++) {
int digit;
digit = token->value[i] - '0';
if (x > UINT64_MAX / 8)
goto overflow;
x *= 8;
if (x > UINT64_MAX - digit)
goto overflow;
x += digit;
}
} else {
for (i = 0; i < token->len; i++) {
int digit;
digit = token->value[i] - '0';
if (x > UINT64_MAX / 10)
goto overflow;
x *= 10;
if (x > UINT64_MAX - digit)
goto overflow;
x += digit;
}
}
*ret = x;
return NULL;
overflow:
return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT,
"number is too large");
}
enum c_type_specifier {
SPECIFIER_ERROR,
SPECIFIER_VOID,
SPECIFIER_CHAR,
SPECIFIER_SIGNED_CHAR,
SPECIFIER_UNSIGNED_CHAR,
SPECIFIER_SHORT,
SPECIFIER_SHORT_INT,
SPECIFIER_SIGNED_SHORT_INT,
SPECIFIER_UNSIGNED_SHORT_INT,
SPECIFIER_SIGNED_SHORT,
SPECIFIER_UNSIGNED_SHORT,
SPECIFIER_INT,
SPECIFIER_SIGNED_INT,
SPECIFIER_UNSIGNED_INT,
SPECIFIER_LONG,
SPECIFIER_LONG_INT,
SPECIFIER_SIGNED_LONG,
SPECIFIER_UNSIGNED_LONG,
SPECIFIER_SIGNED_LONG_INT,
SPECIFIER_UNSIGNED_LONG_INT,
SPECIFIER_LONG_LONG,
SPECIFIER_LONG_LONG_INT,
SPECIFIER_SIGNED_LONG_LONG_INT,
SPECIFIER_UNSIGNED_LONG_LONG_INT,
SPECIFIER_SIGNED_LONG_LONG,
SPECIFIER_UNSIGNED_LONG_LONG,
SPECIFIER_SIGNED,
SPECIFIER_UNSIGNED,
SPECIFIER_BOOL,
SPECIFIER_FLOAT,
SPECIFIER_DOUBLE,
SPECIFIER_LONG_DOUBLE,
SPECIFIER_NONE,
NUM_SPECIFIER_STATES,
};
static const char *specifier_spelling[NUM_SPECIFIER_STATES] = {
[SPECIFIER_VOID] = "void",
[SPECIFIER_CHAR] = "char",
[SPECIFIER_SIGNED_CHAR] = "signed char",
[SPECIFIER_UNSIGNED_CHAR] = "unsigned char",
[SPECIFIER_SHORT] = "short",
[SPECIFIER_SHORT_INT] = "short int",
[SPECIFIER_SIGNED_SHORT_INT] = "signed short int",
[SPECIFIER_UNSIGNED_SHORT_INT] = "unsigned short int",
[SPECIFIER_SIGNED_SHORT] = "signed short",
[SPECIFIER_UNSIGNED_SHORT] = "unsigned short",
[SPECIFIER_INT] = "int",
[SPECIFIER_SIGNED_INT] = "signed int",
[SPECIFIER_UNSIGNED_INT] = "unsigned int",
[SPECIFIER_LONG] = "long",
[SPECIFIER_LONG_INT] = "long int",
[SPECIFIER_SIGNED_LONG] = "signed long",
[SPECIFIER_UNSIGNED_LONG] = "unsigned long",
[SPECIFIER_SIGNED_LONG_INT] = "signed long int",
[SPECIFIER_UNSIGNED_LONG_INT] = "unsigned long int",
[SPECIFIER_LONG_LONG] = "long long",
[SPECIFIER_LONG_LONG_INT] = "long long int",
[SPECIFIER_SIGNED_LONG_LONG_INT] = "signed long long int",
[SPECIFIER_UNSIGNED_LONG_LONG_INT] = "unsigned long long int",
[SPECIFIER_SIGNED_LONG_LONG] = "signed long long",
[SPECIFIER_UNSIGNED_LONG_LONG] = "unsigned long long",
[SPECIFIER_SIGNED] = "signed",
[SPECIFIER_UNSIGNED] = "unsigned",
[SPECIFIER_BOOL] = "_Bool",
[SPECIFIER_FLOAT] = "float",
[SPECIFIER_DOUBLE] = "double",
[SPECIFIER_LONG_DOUBLE] = "long double",
};
static const enum drgn_qualifiers qualifier_from_token[MAX_QUALIFIER_TOKEN + 1] = {
[C_TOKEN_CONST] = DRGN_QUALIFIER_CONST,
[C_TOKEN_RESTRICT] = DRGN_QUALIFIER_RESTRICT,
[C_TOKEN_VOLATILE] = DRGN_QUALIFIER_VOLATILE,
[C_TOKEN_ATOMIC] = DRGN_QUALIFIER_ATOMIC,
};
static const enum c_type_specifier
specifier_transition[NUM_SPECIFIER_STATES][MAX_SPECIFIER_TOKEN + 1] = {
[SPECIFIER_NONE] = {
[C_TOKEN_VOID] = SPECIFIER_VOID,
[C_TOKEN_CHAR] = SPECIFIER_CHAR,
[C_TOKEN_SHORT] = SPECIFIER_SHORT,
[C_TOKEN_INT] = SPECIFIER_INT,
[C_TOKEN_LONG] = SPECIFIER_LONG,
[C_TOKEN_SIGNED] = SPECIFIER_SIGNED,
[C_TOKEN_UNSIGNED] = SPECIFIER_UNSIGNED,
[C_TOKEN_BOOL] = SPECIFIER_BOOL,
[C_TOKEN_FLOAT] = SPECIFIER_FLOAT,
[C_TOKEN_DOUBLE] = SPECIFIER_DOUBLE,
},
[SPECIFIER_VOID] = {},
[SPECIFIER_CHAR] = {
[C_TOKEN_SIGNED] = SPECIFIER_SIGNED_CHAR,
[C_TOKEN_UNSIGNED] = SPECIFIER_UNSIGNED_CHAR,
},
[SPECIFIER_SIGNED_CHAR] = {},
[SPECIFIER_UNSIGNED_CHAR] = {},
[SPECIFIER_SHORT] = {
[C_TOKEN_INT] = SPECIFIER_SHORT_INT,
[C_TOKEN_SIGNED] = SPECIFIER_SIGNED_SHORT,
[C_TOKEN_UNSIGNED] = SPECIFIER_UNSIGNED_SHORT,
},
[SPECIFIER_SHORT_INT] = {
[C_TOKEN_SIGNED] = SPECIFIER_SIGNED_SHORT_INT,
[C_TOKEN_UNSIGNED] = SPECIFIER_UNSIGNED_SHORT_INT,
},
[SPECIFIER_SIGNED_SHORT_INT] = {},
[SPECIFIER_UNSIGNED_SHORT_INT] = {},
[SPECIFIER_SIGNED_SHORT] = {
[C_TOKEN_INT] = SPECIFIER_SIGNED_SHORT_INT,
},
[SPECIFIER_UNSIGNED_SHORT] = {
[C_TOKEN_INT] = SPECIFIER_UNSIGNED_SHORT_INT,
},
[SPECIFIER_INT] = {
[C_TOKEN_SHORT] = SPECIFIER_SHORT_INT,
[C_TOKEN_LONG] = SPECIFIER_LONG_INT,
[C_TOKEN_SIGNED] = SPECIFIER_SIGNED_INT,
[C_TOKEN_UNSIGNED] = SPECIFIER_UNSIGNED_INT,
},
[SPECIFIER_SIGNED_INT] = {
[C_TOKEN_SHORT] = SPECIFIER_SIGNED_SHORT_INT,
[C_TOKEN_LONG] = SPECIFIER_SIGNED_LONG_INT,
},
[SPECIFIER_UNSIGNED_INT] = {
[C_TOKEN_SHORT] = SPECIFIER_UNSIGNED_SHORT_INT,
[C_TOKEN_LONG] = SPECIFIER_UNSIGNED_LONG_INT,
},
[SPECIFIER_LONG] = {
[C_TOKEN_INT] = SPECIFIER_LONG_INT,
[C_TOKEN_LONG] = SPECIFIER_LONG_LONG,
[C_TOKEN_SIGNED] = SPECIFIER_SIGNED_LONG,
[C_TOKEN_UNSIGNED] = SPECIFIER_UNSIGNED_LONG,
[C_TOKEN_DOUBLE] = SPECIFIER_LONG_DOUBLE,
},
[SPECIFIER_LONG_INT] = {
[C_TOKEN_LONG] = SPECIFIER_LONG_LONG_INT,
[C_TOKEN_SIGNED] = SPECIFIER_SIGNED_LONG_INT,
[C_TOKEN_UNSIGNED] = SPECIFIER_UNSIGNED_LONG_INT,
},
[SPECIFIER_SIGNED_LONG] = {
[C_TOKEN_LONG] = SPECIFIER_SIGNED_LONG_LONG,
[C_TOKEN_INT] = SPECIFIER_SIGNED_LONG_INT,
},
[SPECIFIER_UNSIGNED_LONG] = {
[C_TOKEN_LONG] = SPECIFIER_UNSIGNED_LONG_LONG,
[C_TOKEN_INT] = SPECIFIER_UNSIGNED_LONG_INT,
},
[SPECIFIER_SIGNED_LONG_INT] = {
[C_TOKEN_LONG] = SPECIFIER_SIGNED_LONG_LONG_INT,
},
[SPECIFIER_UNSIGNED_LONG_INT] = {
[C_TOKEN_LONG] = SPECIFIER_UNSIGNED_LONG_LONG_INT,
},
[SPECIFIER_LONG_LONG] = {
[C_TOKEN_INT] = SPECIFIER_LONG_LONG_INT,
[C_TOKEN_SIGNED] = SPECIFIER_SIGNED_LONG_LONG,
[C_TOKEN_UNSIGNED] = SPECIFIER_UNSIGNED_LONG_LONG,
},
[SPECIFIER_LONG_LONG_INT] = {
[C_TOKEN_SIGNED] = SPECIFIER_SIGNED_LONG_LONG_INT,
[C_TOKEN_UNSIGNED] = SPECIFIER_UNSIGNED_LONG_LONG_INT,
},
[SPECIFIER_SIGNED_LONG_LONG_INT] = {},
[SPECIFIER_UNSIGNED_LONG_LONG_INT] = {},
[SPECIFIER_SIGNED_LONG_LONG] = {
[C_TOKEN_INT] = SPECIFIER_SIGNED_LONG_LONG_INT,
},
[SPECIFIER_UNSIGNED_LONG_LONG] = {
[C_TOKEN_INT] = SPECIFIER_UNSIGNED_LONG_LONG_INT,
},
[SPECIFIER_SIGNED] = {
[C_TOKEN_CHAR] = SPECIFIER_SIGNED_CHAR,
[C_TOKEN_SHORT] = SPECIFIER_SIGNED_SHORT,
[C_TOKEN_INT] = SPECIFIER_SIGNED_INT,
[C_TOKEN_LONG] = SPECIFIER_SIGNED_LONG,
},
[SPECIFIER_UNSIGNED] = {
[C_TOKEN_CHAR] = SPECIFIER_UNSIGNED_CHAR,
[C_TOKEN_SHORT] = SPECIFIER_UNSIGNED_SHORT,
[C_TOKEN_INT] = SPECIFIER_UNSIGNED_INT,
[C_TOKEN_LONG] = SPECIFIER_UNSIGNED_LONG,
},
[SPECIFIER_BOOL] = {},
[SPECIFIER_FLOAT] = {},
[SPECIFIER_DOUBLE] = {
[C_TOKEN_LONG] = SPECIFIER_LONG_DOUBLE,
},
[SPECIFIER_LONG_DOUBLE] = {},
};
static const enum drgn_primitive_type specifier_kind[NUM_SPECIFIER_STATES] = {
[SPECIFIER_VOID] = DRGN_C_TYPE_VOID,
[SPECIFIER_CHAR] = DRGN_C_TYPE_CHAR,
[SPECIFIER_SIGNED_CHAR] = DRGN_C_TYPE_SIGNED_CHAR,
[SPECIFIER_UNSIGNED_CHAR] = DRGN_C_TYPE_UNSIGNED_CHAR,
[SPECIFIER_SHORT] = DRGN_C_TYPE_SHORT,
[SPECIFIER_SHORT_INT] = DRGN_C_TYPE_SHORT,
[SPECIFIER_SIGNED_SHORT_INT] = DRGN_C_TYPE_SHORT,
[SPECIFIER_UNSIGNED_SHORT_INT] = DRGN_C_TYPE_UNSIGNED_SHORT,
[SPECIFIER_SIGNED_SHORT] = DRGN_C_TYPE_SHORT,
[SPECIFIER_UNSIGNED_SHORT] = DRGN_C_TYPE_UNSIGNED_SHORT,
[SPECIFIER_INT] = DRGN_C_TYPE_INT,
[SPECIFIER_SIGNED_INT] = DRGN_C_TYPE_INT,
[SPECIFIER_UNSIGNED_INT] = DRGN_C_TYPE_UNSIGNED_INT,
[SPECIFIER_LONG] = DRGN_C_TYPE_LONG,
[SPECIFIER_LONG_INT] = DRGN_C_TYPE_LONG,
[SPECIFIER_SIGNED_LONG] = DRGN_C_TYPE_LONG,
[SPECIFIER_UNSIGNED_LONG] = DRGN_C_TYPE_UNSIGNED_LONG,
[SPECIFIER_SIGNED_LONG_INT] = DRGN_C_TYPE_LONG,
[SPECIFIER_UNSIGNED_LONG_INT] = DRGN_C_TYPE_UNSIGNED_LONG,
[SPECIFIER_LONG_LONG] = DRGN_C_TYPE_LONG_LONG,
[SPECIFIER_LONG_LONG_INT] = DRGN_C_TYPE_LONG_LONG,
[SPECIFIER_SIGNED_LONG_LONG_INT] = DRGN_C_TYPE_LONG_LONG,
[SPECIFIER_UNSIGNED_LONG_LONG_INT] = DRGN_C_TYPE_UNSIGNED_LONG_LONG,
[SPECIFIER_SIGNED_LONG_LONG] = DRGN_C_TYPE_LONG_LONG,
[SPECIFIER_UNSIGNED_LONG_LONG] = DRGN_C_TYPE_UNSIGNED_LONG_LONG,
[SPECIFIER_SIGNED] = DRGN_C_TYPE_INT,
[SPECIFIER_UNSIGNED] = DRGN_C_TYPE_UNSIGNED_INT,
[SPECIFIER_BOOL] = DRGN_C_TYPE_BOOL,
[SPECIFIER_FLOAT] = DRGN_C_TYPE_FLOAT,
[SPECIFIER_DOUBLE] = DRGN_C_TYPE_DOUBLE,
[SPECIFIER_LONG_DOUBLE] = DRGN_C_TYPE_LONG_DOUBLE,
};
enum drgn_primitive_type c_parse_specifier_list(const char *s)
{
struct drgn_error *err;
struct drgn_c_family_lexer c_family_lexer;
struct drgn_lexer *lexer = &c_family_lexer.lexer;
enum c_type_specifier specifier = SPECIFIER_NONE;
enum drgn_primitive_type primitive = DRGN_NOT_PRIMITIVE_TYPE;
c_family_lexer.cpp = false;
drgn_lexer_init(lexer, drgn_c_family_lexer_func, s);
for (;;) {
struct drgn_token token;
err = drgn_lexer_pop(lexer, &token);
if (err) {
drgn_error_destroy(err);
goto out;
}
if (MIN_SPECIFIER_TOKEN <= token.kind &&
token.kind <= MAX_SPECIFIER_TOKEN)
specifier = specifier_transition[specifier][token.kind];
else if (token.kind == C_TOKEN_EOF)
break;
else
specifier = SPECIFIER_ERROR;
if (specifier == SPECIFIER_ERROR)
goto out;
}
primitive = specifier_kind[specifier];
out:
drgn_lexer_deinit(lexer);
return primitive;
}
// The DWARF index currently includes template arguments in indexed names. So,
// to be able to find a type with template arguments, we have to look it up with
// the template arguments included. This looks for a C_TOKEN_TEMPLATE_ARGUMENTS
// token after the identifier and returns the length from the beginning of the
// identifier to the end of the template arguments.
//
// Note that this requires that the user formats the template arguments exactly
// as they appear in DWARF (which can vary between compilers). In the future, it
// might be better to properly parse and either normalize the template arguments
// or look them up as an AST somehow.
static struct drgn_error *cpp_append_to_identifier(
struct drgn_lexer *lexer, const char *identifier, size_t *len_ret)
{
struct drgn_error *err;
// Only for C++.
if (!((struct drgn_c_family_lexer *)lexer)->cpp)
return NULL;
struct drgn_token token;
do {
err = drgn_lexer_pop(lexer, &token);
} while (!err && (token.kind == C_TOKEN_IDENTIFIER ||
token.kind == C_TOKEN_COLON));
if (err)
return err;
if (token.kind != C_TOKEN_TEMPLATE_ARGUMENTS) {
err = drgn_lexer_push(lexer, &token);
if (err)
return err;
}
*len_ret = token.value + token.len - identifier;
return NULL;
}
static struct drgn_error *
c_parse_specifier_qualifier_list(struct drgn_program *prog,
struct drgn_lexer *lexer, const char *filename,
struct drgn_qualified_type *ret)
{
struct drgn_error *err;
enum c_type_specifier specifier = SPECIFIER_NONE;
enum drgn_qualifiers qualifiers = 0;
const char *identifier = NULL;
size_t identifier_len = 0;
int tag_token = C_TOKEN_EOF;
for (;;) {
struct drgn_token token;
err = drgn_lexer_pop(lexer, &token);
if (err)
return err;
/* type-qualifier */
if (MIN_QUALIFIER_TOKEN <= token.kind &&
token.kind <= MAX_QUALIFIER_TOKEN) {
qualifiers |= qualifier_from_token[token.kind];
/* type-specifier */
} else if (MIN_SPECIFIER_TOKEN <= token.kind &&
token.kind <= MAX_SPECIFIER_TOKEN) {
enum c_type_specifier prev_specifier;
if (tag_token != C_TOKEN_EOF) {
return drgn_error_format(DRGN_ERROR_SYNTAX,
"cannot combine '%s' with '%s'",
keyword_spelling[token.kind],
keyword_spelling[tag_token]);
}
if (identifier) {
return drgn_error_format(DRGN_ERROR_SYNTAX,
"cannot combine '%s' with identifier",
keyword_spelling[token.kind]);
}
prev_specifier = specifier;
specifier = specifier_transition[specifier][token.kind];
if (specifier == SPECIFIER_ERROR) {
return drgn_error_format(DRGN_ERROR_SYNTAX,
"cannot combine '%s' with '%s'",
keyword_spelling[token.kind],
specifier_spelling[prev_specifier]);
}
} else if ((token.kind == C_TOKEN_IDENTIFIER ||
token.kind == C_TOKEN_COLON) &&
specifier == SPECIFIER_NONE && !identifier) {
identifier = token.value;
identifier_len = token.len;
err = cpp_append_to_identifier(lexer, identifier,
&identifier_len);
if (err)
return err;
} else if (token.kind == C_TOKEN_STRUCT ||
token.kind == C_TOKEN_UNION ||
token.kind == C_TOKEN_CLASS ||
token.kind == C_TOKEN_ENUM) {
if (identifier) {
return drgn_error_format(DRGN_ERROR_SYNTAX,
"cannot combine '%s' with identifier",
keyword_spelling[token.kind]);
}
if (specifier != SPECIFIER_NONE) {
return drgn_error_format(DRGN_ERROR_SYNTAX,
"cannot combine '%s' with '%s'",
keyword_spelling[token.kind],
specifier_spelling[specifier]);
}
tag_token = token.kind;
err = drgn_lexer_pop(lexer, &token);
if (err)
return err;
if (!(token.kind == C_TOKEN_IDENTIFIER ||
token.kind == C_TOKEN_COLON)) {
return drgn_error_format(DRGN_ERROR_SYNTAX,
"expected identifier after '%s'",
keyword_spelling[tag_token]);
}
identifier = token.value;
identifier_len = token.len;
err = cpp_append_to_identifier(lexer, identifier,
&identifier_len);
if (err)
return err;
} else {
err = drgn_lexer_push(lexer, &token);
if (err)
return err;
break;
}
}
if (specifier == SPECIFIER_NONE) {
enum drgn_type_kind kind;
if (tag_token == C_TOKEN_STRUCT) {
kind = DRGN_TYPE_STRUCT;
} else if (tag_token == C_TOKEN_UNION) {
kind = DRGN_TYPE_UNION;
} else if (tag_token == C_TOKEN_CLASS) {
kind = DRGN_TYPE_CLASS;
} else if (tag_token == C_TOKEN_ENUM) {
kind = DRGN_TYPE_ENUM;
} else if (identifier) {
if (identifier_len == sizeof("size_t") - 1 &&
memcmp(identifier, "size_t",
sizeof("size_t") - 1) == 0) {
err = drgn_program_find_primitive_type(prog,
DRGN_C_TYPE_SIZE_T,
&ret->type);
if (err)
return err;
ret->qualifiers = 0;
goto out;
} else if (identifier_len == sizeof("ptrdiff_t") - 1 &&
memcmp(identifier, "ptrdiff_t",
sizeof("ptrdiff_t") - 1) == 0) {
err = drgn_program_find_primitive_type(prog,
DRGN_C_TYPE_PTRDIFF_T,
&ret->type);
if (err)
return err;
ret->qualifiers = 0;
goto out;
} else {
kind = DRGN_TYPE_TYPEDEF;
}
} else {
return drgn_error_create(DRGN_ERROR_SYNTAX,
"expected type specifier");
}
err = drgn_program_find_type_impl(prog, kind, identifier,
identifier_len, filename,
ret);
if (err)
return err;
} else {
err = drgn_program_find_primitive_type(prog,
specifier_kind[specifier],
&ret->type);
if (err)
return err;
ret->qualifiers = 0;
}
out:
ret->qualifiers |= qualifiers;
return NULL;
}
struct c_declarator {
/* C_TOKEN_ASTERISK or C_TOKEN_LBRACKET. */
int kind;
enum drgn_qualifiers qualifiers;
/* Only for C_TOKEN_LBRACKET. */
bool is_complete;
uint64_t length;
struct c_declarator *next;
};
/* These functions don't free the declarator list on error. */
static struct drgn_error *
c_parse_abstract_declarator(struct drgn_program *prog,
struct drgn_lexer *lexer,
struct c_declarator **outer,
struct c_declarator **inner);
static struct drgn_error *
c_parse_optional_type_qualifier_list(struct drgn_lexer *lexer,
enum drgn_qualifiers *qualifiers)
{
struct drgn_error *err;
struct drgn_token token;
*qualifiers = 0;
for (;;) {
err = drgn_lexer_pop(lexer, &token);
if (err)
return err;
if (token.kind < MIN_QUALIFIER_TOKEN ||
token.kind > MAX_QUALIFIER_TOKEN) {
err = drgn_lexer_push(lexer, &token);
if (err)
return err;
return NULL;
}
*qualifiers |= qualifier_from_token[token.kind];
}
}
static struct drgn_error *
c_parse_pointer(struct drgn_program *prog, struct drgn_lexer *lexer,
struct c_declarator **outer, struct c_declarator **inner)
{
struct drgn_error *err;
struct drgn_token token;
err = drgn_lexer_pop(lexer, &token);
if (err)
return err;
if (token.kind != C_TOKEN_ASTERISK)
return drgn_error_create(DRGN_ERROR_SYNTAX, "expected '*'");
*inner = NULL;
for (;;) {
struct c_declarator *tmp;
tmp = malloc(sizeof(*tmp));
if (!tmp)
return &drgn_enomem;
tmp->kind = C_TOKEN_ASTERISK;
tmp->next = *outer;
*outer = tmp;
err = c_parse_optional_type_qualifier_list(lexer,
&(*outer)->qualifiers);
if (err)
return err;
if (!*inner)
*inner = *outer;
err = drgn_lexer_pop(lexer, &token);
if (err)
return err;
if (token.kind != C_TOKEN_ASTERISK)
return drgn_lexer_push(lexer, &token);
}
}
static struct drgn_error *
c_parse_direct_abstract_declarator(struct drgn_program *prog,
struct drgn_lexer *lexer,
struct c_declarator **outer,
struct c_declarator **inner)
{
struct drgn_error *err;
struct drgn_token token;
*inner = NULL;
err = drgn_lexer_pop(lexer, &token);
if (err)
return err;
if (token.kind == C_TOKEN_LPAREN) {
struct drgn_token token2;
err = drgn_lexer_peek(lexer, &token2);
if (err)
return err;
if (token2.kind == C_TOKEN_ASTERISK ||
token2.kind == C_TOKEN_LPAREN ||
token2.kind == C_TOKEN_LBRACKET) {
err = c_parse_abstract_declarator(prog, lexer, outer,
inner);
if (err)
return err;
err = drgn_lexer_pop(lexer, &token2);
if (err)
return err;
if (token2.kind != C_TOKEN_RPAREN) {
return drgn_error_create(DRGN_ERROR_SYNTAX,
"expected ')'");
}
err = drgn_lexer_pop(lexer, &token);
if (err)
return err;
}
}
for (;;) {
if (token.kind == C_TOKEN_LBRACKET) {
struct c_declarator *tmp;
err = drgn_lexer_pop(lexer, &token);
if (err)
return err;
tmp = malloc(sizeof(*tmp));
if (!tmp)
return &drgn_enomem;
tmp->kind = C_TOKEN_LBRACKET;
tmp->qualifiers = 0;
if (token.kind == C_TOKEN_NUMBER) {
tmp->is_complete = true;
err = c_token_to_u64(&token, &tmp->length);
if (err) {
free(tmp);
return err;
}
err = drgn_lexer_pop(lexer, &token);
if (err) {
free(tmp);
return err;
}
} else {
tmp->is_complete = false;
}
if (*inner) {
tmp->next = (*inner)->next;
*inner = (*inner)->next = tmp;
} else {
tmp->next = *outer;
*outer = *inner = tmp;
}
if (token.kind != C_TOKEN_RBRACKET) {
return drgn_error_create(DRGN_ERROR_SYNTAX,
"expected ']'");
}
} else if (token.kind == C_TOKEN_LPAREN) {
return drgn_error_create(DRGN_ERROR_SYNTAX,
"function pointer types are not implemented");
} else {
err = drgn_lexer_push(lexer, &token);
if (err)
return err;
if (!*inner) {
return drgn_error_create(DRGN_ERROR_SYNTAX,
"expected abstract declarator");
}
return NULL;
}
err = drgn_lexer_pop(lexer, &token);
if (err)
return err;
}
}
static struct drgn_error *
c_parse_abstract_declarator(struct drgn_program *prog,
struct drgn_lexer *lexer,
struct c_declarator **outer,
struct c_declarator **inner)
{
struct drgn_error *err;
struct drgn_token token;
err = drgn_lexer_peek(lexer, &token);
if (err)
return err;
if (token.kind == C_TOKEN_ASTERISK) {
err = c_parse_pointer(prog, lexer, outer, inner);
if (err)
return err;
err = drgn_lexer_peek(lexer, &token);
if (err)
return err;
if (token.kind == C_TOKEN_LPAREN ||
token.kind == C_TOKEN_LBRACKET) {
struct c_declarator *tmp;
err = c_parse_direct_abstract_declarator(prog, lexer,
outer, &tmp);
if (err)
return err;
}
return NULL;
} else {
return c_parse_direct_abstract_declarator(prog, lexer, outer,
inner);
}
}
/* This always frees the declarator list regardless of success or failure. */
static struct drgn_error *
c_type_from_declarator(struct drgn_program *prog,
struct c_declarator *declarator,
struct drgn_qualified_type *ret)
{
struct drgn_error *err;
if (!declarator)
return NULL;
err = c_type_from_declarator(prog, declarator->next, ret);
if (err) {
free(declarator);
return err;
}
if (declarator->kind == C_TOKEN_ASTERISK) {
uint8_t address_size;
err = drgn_program_address_size(prog, &address_size);
2020-07-16 00:34:56 +01:00
if (!err) {
err = drgn_pointer_type_create(prog, *ret, address_size,
DRGN_PROGRAM_ENDIAN,
2020-07-16 00:34:56 +01:00
drgn_type_language(ret->type),
&ret->type);
}
} else if (declarator->is_complete) {
2020-07-16 00:34:56 +01:00
err = drgn_array_type_create(prog, *ret, declarator->length,
drgn_type_language(ret->type),
&ret->type);
} else {
2020-07-16 00:34:56 +01:00
err = drgn_incomplete_array_type_create(prog, *ret,
drgn_type_language(ret->type),
&ret->type);
}
if (!err)
ret->qualifiers = declarator->qualifiers;
free(declarator);
return err;
}
static struct drgn_error *c_family_find_type(const struct drgn_language *lang,
struct drgn_program *prog,
const char *name,
const char *filename,
struct drgn_qualified_type *ret)
{
struct drgn_error *err;
struct drgn_c_family_lexer c_family_lexer;
struct drgn_lexer *lexer = &c_family_lexer.lexer;
struct drgn_token token;
c_family_lexer.cpp = lang == &drgn_language_cpp;
drgn_lexer_init(lexer, drgn_c_family_lexer_func, name);
err = c_parse_specifier_qualifier_list(prog, lexer, filename, ret);
if (err)
goto out;
err = drgn_lexer_pop(lexer, &token);
if (err)
goto out;
if (token.kind != C_TOKEN_EOF) {
struct c_declarator *outer = NULL, *inner;
err = drgn_lexer_push(lexer, &token);
if (err)
return err;
err = c_parse_abstract_declarator(prog, lexer, &outer, &inner);
if (err) {
while (outer) {
struct c_declarator *next;
next = outer->next;
free(outer);
outer = next;
}
goto out;
}
err = c_type_from_declarator(prog, outer, ret);
if (err)
goto out;
err = drgn_lexer_pop(lexer, &token);
if (err)
goto out;
if (token.kind != C_TOKEN_EOF) {
err = drgn_error_create(DRGN_ERROR_SYNTAX,
"extra tokens after type name");
goto out;
}
}
err = NULL;
out:
drgn_lexer_deinit(lexer);
return err;
}
static struct drgn_error *c_family_bit_offset(struct drgn_program *prog,
struct drgn_type *type,
const char *member_designator,
uint64_t *ret)
{
struct drgn_error *err;
struct drgn_c_family_lexer c_family_lexer;
struct drgn_lexer *lexer = &c_family_lexer.lexer;
int state = INT_MIN;
uint64_t bit_offset = 0;
c_family_lexer.cpp = prog->lang == &drgn_language_cpp;
drgn_lexer_init(lexer, drgn_c_family_lexer_func, member_designator);
for (;;) {
struct drgn_token token;
err = drgn_lexer_pop(lexer, &token);
if (err)
goto out;
switch (state) {
case INT_MIN:
case C_TOKEN_DOT:
if (token.kind == C_TOKEN_IDENTIFIER) {
struct drgn_type_member *member;
uint64_t member_bit_offset;
err = drgn_type_find_member_len(type,
token.value,
token.len,
&member,
&member_bit_offset);
if (err)
goto out;
if (__builtin_add_overflow(bit_offset,
member_bit_offset,
&bit_offset)) {
err = drgn_error_create(DRGN_ERROR_OVERFLOW,
"offset is too large");
goto out;
}
struct drgn_qualified_type member_type;
err = drgn_member_type(member, &member_type,
NULL);
if (err)
goto out;
type = member_type.type;
} else if (state == C_TOKEN_DOT) {
err = drgn_error_create(DRGN_ERROR_SYNTAX,
"expected identifier after '.'");
goto out;
} else {
err = drgn_error_create(DRGN_ERROR_SYNTAX,
"expected identifier");
goto out;
}
break;
case C_TOKEN_IDENTIFIER:
case C_TOKEN_RBRACKET:
switch (token.kind) {
case C_TOKEN_EOF:
*ret = bit_offset;
err = NULL;
goto out;
case C_TOKEN_DOT:
case C_TOKEN_LBRACKET:
break;
default:
if (state == C_TOKEN_IDENTIFIER) {
err = drgn_error_create(DRGN_ERROR_SYNTAX,
"expected '.' or '[' after identifier");
goto out;
} else {
err = drgn_error_create(DRGN_ERROR_SYNTAX,
"expected '.' or '[' after ']'");
goto out;
}
}
break;
case C_TOKEN_LBRACKET:
if (token.kind == C_TOKEN_NUMBER) {
struct drgn_type *underlying_type;
struct drgn_type *element_type;
uint64_t index, bit_size, element_offset;
err = c_token_to_u64(&token, &index);
if (err)
goto out;
underlying_type = drgn_underlying_type(type);
if (drgn_type_kind(underlying_type) != DRGN_TYPE_ARRAY) {
err = drgn_type_error("'%s' is not an array",
type);
goto out;
}
element_type =
drgn_type_type(underlying_type).type;
err = drgn_type_bit_size(element_type,
&bit_size);
if (err)
goto out;
if (__builtin_mul_overflow(index, bit_size,
&element_offset) ||
__builtin_add_overflow(bit_offset,
element_offset,
&bit_offset)) {
err = drgn_error_create(DRGN_ERROR_OVERFLOW,
"offset is too large");
goto out;
}
type = element_type;
} else {
err = drgn_error_create(DRGN_ERROR_SYNTAX,
"expected number after '['");
goto out;
}
break;
case C_TOKEN_NUMBER:
if (token.kind != C_TOKEN_RBRACKET) {
err = drgn_error_create(DRGN_ERROR_SYNTAX,
"expected ']' after number");
goto out;
}
break;
default:
UNREACHABLE();
}
state = token.kind;
}
out:
drgn_lexer_deinit(lexer);
return err;
}
static struct drgn_error *c_integer_literal(struct drgn_object *res,
uint64_t uvalue)
{
static const enum drgn_primitive_type types[] = {
DRGN_C_TYPE_INT,
DRGN_C_TYPE_LONG,
DRGN_C_TYPE_LONG_LONG,
DRGN_C_TYPE_UNSIGNED_LONG_LONG,
};
struct drgn_error *err;
unsigned int bits = fls(uvalue);
struct drgn_qualified_type qualified_type;
qualified_type.qualifiers = 0;
array_for_each(type, types) {
err = drgn_program_find_primitive_type(drgn_object_program(res),
*type,
&qualified_type.type);
if (err)
return err;
if (drgn_type_is_signed(qualified_type.type) &&
bits < 8 * drgn_type_size(qualified_type.type)) {
return drgn_object_set_signed(res,
qualified_type,
uvalue, 0);
} else if (!drgn_type_is_signed(qualified_type.type) &&
bits <= 8 * drgn_type_size(qualified_type.type)) {
return drgn_object_set_unsigned(res, qualified_type,
uvalue, 0);
}
}
return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT,
"integer literal is too large");
}
static struct drgn_error *c_bool_literal(struct drgn_object *res, bool bvalue)
{
struct drgn_error *err;
struct drgn_qualified_type qualified_type;
err = drgn_program_find_primitive_type(drgn_object_program(res),
DRGN_C_TYPE_INT,
&qualified_type.type);
if (err)
return err;
qualified_type.qualifiers = 0;
return drgn_object_set_signed(res, qualified_type, bvalue, 0);
}
static struct drgn_error *c_float_literal(struct drgn_object *res,
double fvalue)
{
struct drgn_error *err;
struct drgn_qualified_type qualified_type;
err = drgn_program_find_primitive_type(drgn_object_program(res),
DRGN_C_TYPE_DOUBLE,
&qualified_type.type);
if (err)
return err;
qualified_type.qualifiers = 0;
return drgn_object_set_float(res, qualified_type, fvalue);
}
static const int c_integer_conversion_rank[] = {
[DRGN_C_TYPE_BOOL] = 0,
[DRGN_C_TYPE_CHAR] = 1,
[DRGN_C_TYPE_SIGNED_CHAR] = 1,
[DRGN_C_TYPE_UNSIGNED_CHAR] = 1,
[DRGN_C_TYPE_SHORT] = 2,
[DRGN_C_TYPE_UNSIGNED_SHORT] = 2,
[DRGN_C_TYPE_INT] = 3,
[DRGN_C_TYPE_UNSIGNED_INT] = 3,
[DRGN_C_TYPE_LONG] = 4,
[DRGN_C_TYPE_UNSIGNED_LONG] = 4,
[DRGN_C_TYPE_LONG_LONG] = 5,
[DRGN_C_TYPE_UNSIGNED_LONG_LONG] = 5,
};
static bool c_can_represent_all_values(struct drgn_type *type1,
uint64_t bit_field_size1,
struct drgn_type *type2,
uint64_t bit_field_size2)
{
uint64_t width1, width2;
bool is_signed1, is_signed2;
if (drgn_type_kind(type1) == DRGN_TYPE_BOOL) {
width1 = 1;
is_signed1 = false;
} else {
width1 = (bit_field_size1 ? bit_field_size1 :
8 * drgn_type_size(type1));
is_signed1 = drgn_type_is_signed(type1);
}
if (drgn_type_kind(type2) == DRGN_TYPE_BOOL) {
width2 = 1;
is_signed2 = false;
} else {
width2 = (bit_field_size2 ? bit_field_size2 :
8 * drgn_type_size(type2));
is_signed2 = drgn_type_is_signed(type2);
}
if (is_signed1 == is_signed2)
return width1 >= width2;
else if (is_signed1 && !is_signed2)
return width1 > width2;
else
return false;
}
static struct drgn_error *c_integer_promotions(struct drgn_program *prog,
struct drgn_operand_type *type)
{
struct drgn_error *err;
enum drgn_primitive_type primitive;
struct drgn_type *int_type;
switch (drgn_type_kind(type->underlying_type)) {
case DRGN_TYPE_ENUM:
/* Convert the enum to its compatible type. */
type->type = type->underlying_type =
drgn_type_type(type->underlying_type).type;
if (!type->type) {
return drgn_error_format(DRGN_ERROR_INVALID_ARGUMENT,
"operand cannot have incomplete enum type");
}
break;
case DRGN_TYPE_INT:
case DRGN_TYPE_BOOL:
break;
default:
return NULL;
}
primitive = drgn_type_primitive(type->underlying_type);
/*
* Integer promotions are performed on types whose integer conversion
* rank is less than or equal to the rank of int and unsigned int.
*
* If this isn't a standard integer type, then we don't know the rank,
* so we may need to promote it. According to the C standard, "the rank
* of a signed integer type shall be greater than the rank of any signed
* integer type with less precision", and "the rank of any standard
* integer type shall be greater than the rank of any extended integer
* type with the same width". If an extended signed integer type has
* less precision than int, or the same width as int, then all of its
* values can be represented by int (and likewise for an extended
* unsigned integer type and unsigned int). Therefore, an extended
* integer type should be promoted iff all of its values can be
* represented by int or unsigned int.
*
* Integer promotions are also performed on bit fields. The C standard
* only requires that bit fields of type _Bool, int, or unsigned int are
* supported, so it does not define how integer promotions should affect
* a bit field which cannot be represented by int or unsigned int. Clang
* promotes it to the full width, but GCC does not. We implement the GCC
* behavior of preserving the width.
*/
if (primitive >= array_size(c_integer_conversion_rank) ||
type->bit_field_size) {
err = drgn_program_find_primitive_type(prog, DRGN_C_TYPE_INT,
&int_type);
if (err)
return err;
if (c_can_represent_all_values(int_type, 0,
type->underlying_type,
type->bit_field_size)) {
type->type = type->underlying_type = int_type;
type->bit_field_size = 0;
return NULL;
}
err = drgn_program_find_primitive_type(prog,
DRGN_C_TYPE_UNSIGNED_INT,
&int_type);
if (err)
return err;
if (c_can_represent_all_values(int_type, 0,
type->underlying_type,
type->bit_field_size)) {
type->type = type->underlying_type = int_type;
type->bit_field_size = 0;
}
return NULL;
}
if (primitive == DRGN_C_TYPE_INT ||
primitive == DRGN_C_TYPE_UNSIGNED_INT ||
c_integer_conversion_rank[primitive] >
c_integer_conversion_rank[DRGN_C_TYPE_INT])
return NULL;
/*
* If int can represent all values of the original type, then the result
* is int. Otherwise, the result is unsigned int.
*/
err = drgn_program_find_primitive_type(prog, DRGN_C_TYPE_INT,
&int_type);
if (err)
return err;
if (c_can_represent_all_values(int_type, 0, type->underlying_type, 0)) {
type->type = int_type;
} else {
err = drgn_program_find_primitive_type(prog,
DRGN_C_TYPE_UNSIGNED_INT,
&type->type);
if (err)
return err;
}
type->underlying_type = type->type;
return NULL;
}
static struct drgn_error *
c_corresponding_unsigned_type(struct drgn_program *prog,
enum drgn_primitive_type type,
struct drgn_type **ret)
{
switch (type) {
/*
* char, signed char, and short are promoted to int, so we don't need to
* handle them here.
*/
case DRGN_C_TYPE_INT:
return drgn_program_find_primitive_type(prog,
DRGN_C_TYPE_UNSIGNED_INT,
ret);
case DRGN_C_TYPE_LONG:
return drgn_program_find_primitive_type(prog,
DRGN_C_TYPE_UNSIGNED_LONG,
ret);
case DRGN_C_TYPE_LONG_LONG:
return drgn_program_find_primitive_type(prog,
DRGN_C_TYPE_UNSIGNED_LONG_LONG,
ret);
default:
UNREACHABLE();
}
}
static struct drgn_error *c_common_real_type(struct drgn_program *prog,
struct drgn_operand_type *type1,
struct drgn_operand_type *type2,
struct drgn_operand_type *ret)
{
struct drgn_error *err;
enum drgn_primitive_type primitive1, primitive2;
bool is_float1, is_float2;
bool is_signed1, is_signed2;
int rank_cmp;
ret->qualifiers = 0;
/*
* Strictly, the rules are:
*
* If either operand is long double, then the result is long double.
* Otherwise, if either operand is double, then the result is double.
* Otherwise, if either operand is float, then the result is float.
*
* However, we also have to handle other floating types not in the
* standard. Thus, the result is always the larger type, with ties
* broken in the order unknown > long double > double > float.
*/
is_float1 = drgn_type_kind(type1->underlying_type) == DRGN_TYPE_FLOAT;
is_float2 = drgn_type_kind(type2->underlying_type) == DRGN_TYPE_FLOAT;
if (is_float1 && is_float2) {
uint64_t size1, size2;
size1 = drgn_type_size(type1->underlying_type);
size2 = drgn_type_size(type2->underlying_type);
if (size1 > size2)
goto ret1;
else if (size2 > size1)
goto ret2;
else if (drgn_type_primitive(type1->underlying_type) >
drgn_type_primitive(type2->underlying_type))
goto ret1;
else
goto ret2;
} else if (is_float1) {
goto ret1;
} else if (is_float2) {
goto ret2;
}
/*
* Otherwise, the integer promotions are performed before applying the
* following rules.
*/
err = c_integer_promotions(prog, type1);
if (err)
return err;
err = c_integer_promotions(prog, type2);
if (err)
return err;
is_signed1 = drgn_type_is_signed(type1->underlying_type);
is_signed2 = drgn_type_is_signed(type2->underlying_type);
/*
* The C standard only requires that bit fields of type _Bool, int, or
* unsigned int are supported, which are always promoted to int or
* unsigned int, so it does not define how to find the common real type
* when one or both of the operands are bit fields. GCC seems to use the
* wider operand, or the unsigned operand if they have equal width. As
* usual, we pick type2 if the two types are equivalent.
*/
if (type1->bit_field_size || type2->bit_field_size) {
uint64_t width1, width2;
width1 = (type1->bit_field_size ? type1->bit_field_size :
8 * drgn_type_size(type1->type));
width2 = (type2->bit_field_size ? type2->bit_field_size :
8 * drgn_type_size(type2->type));
if (width1 < width2 ||
(width1 == width2 && (!is_signed2 || is_signed1)))
goto ret2;
else
goto ret1;
}
primitive1 = drgn_type_primitive(type1->underlying_type);
primitive2 = drgn_type_primitive(type2->underlying_type);
if (primitive1 != DRGN_NOT_PRIMITIVE_TYPE &&
primitive2 != DRGN_NOT_PRIMITIVE_TYPE) {
/*
* If both operands have the same type, then no further
* conversions are needed.
*
* We can return either type1 or type2 here; it only makes a
* difference for typedefs. Arbitrarily pick type2 because
* that's what GCC seems to do (Clang always throws away the
* typedef).
*/
if (primitive1 == primitive2)
goto ret2;
/* Ranks are small, so this won't overflow. */
rank_cmp = (c_integer_conversion_rank[primitive1] -
c_integer_conversion_rank[primitive2]);
} else {
/*
* We don't know the rank of non-standard integer types.
* However, we can usually compare their ranks, because
* according to the C standard, "the rank of a signed integer
* type shall be greater than the rank of any signed integer
* type with less precision", "the rank of any unsigned integer
* type shall equal the rank of the corresponding signed integer
* type", and "the rank of any standard integer type shall be
* greater than the rank of any extended integer type with the
* same width". The only case where we can't is if both types
* are non-standard and have the same size; we treat them as
* having equal rank in this case.
*/
uint64_t size1, size2;
size1 = drgn_type_size(type1->underlying_type);
size2 = drgn_type_size(type2->underlying_type);
if (size1 == size2 && primitive1 == DRGN_NOT_PRIMITIVE_TYPE &&
primitive2 == DRGN_NOT_PRIMITIVE_TYPE)
rank_cmp = 0;
else if ((size1 == size2 && primitive2 != DRGN_NOT_PRIMITIVE_TYPE) ||
size1 < size2)
rank_cmp = -1;
else
rank_cmp = 1;
}
/*
* Otherwise, if both operands have signed integer types or both have
* unsigned integer types, then the result is the type of the operand
* with the greater rank.
*/
if (is_signed1 == is_signed2) {
if (rank_cmp > 0)
goto ret1;
else
goto ret2;
}
/*
* Otherwise, if the operand that has unsigned integer type has rank
* greater or equal to the rank of the type of the other operand, then
* the result is the unsigned integer type.
*/
if (!is_signed1 && rank_cmp >= 0)
goto ret1;
else if (!is_signed2 && rank_cmp <= 0)
goto ret2;
/*
* Otherwise, if the type of the operand with signed integer type can
* represent all of the values of the type of the operand with unsigned
* integer type, then the result is the signed integer type.
*/
if (is_signed1 && c_can_represent_all_values(type1->underlying_type, 0,
type2->underlying_type, 0))
goto ret1;
if (is_signed2 && c_can_represent_all_values(type2->underlying_type, 0,
type1->underlying_type, 0))
goto ret2;
/*
* Otherwise, the result is the unsigned integer type corresponding to
* the type of the operand with signed integer type.
*
* Note that this case is not reached for non-standard types: if the
* types have different signs and the signed integer type has greater
* rank, then it must have greater size and thus be able to represent
* all values of the unsigned integer type.
*/
err = c_corresponding_unsigned_type(prog,
is_signed1 ? primitive1 : primitive2,
&ret->type);
if (err)
return err;
ret->underlying_type = ret->type;
ret->bit_field_size = 0;
return NULL;
ret1:
*ret = *type1;
return NULL;
ret2:
*ret = *type2;
return NULL;
}
static struct drgn_error *c_operand_type(const struct drgn_object *obj,
struct drgn_operand_type *type_ret,
bool *is_pointer_ret,
uint64_t *referenced_size_ret)
{
struct drgn_error *err;
*type_ret = drgn_object_operand_type(obj);
switch (drgn_type_kind(type_ret->underlying_type)) {
2020-07-16 00:34:56 +01:00
case DRGN_TYPE_ARRAY: {
uint8_t address_size;
err = drgn_program_address_size(drgn_object_program(obj),
&address_size);
2020-07-16 00:34:56 +01:00
if (err)
return err;
err = drgn_pointer_type_create(drgn_object_program(obj),
2020-07-16 00:34:56 +01:00
drgn_type_type(type_ret->underlying_type),
address_size,
DRGN_PROGRAM_ENDIAN,
2020-07-16 00:34:56 +01:00
drgn_type_language(type_ret->underlying_type),
&type_ret->type);
if (err)
return err;
type_ret->underlying_type = type_ret->type;
break;
2020-07-16 00:34:56 +01:00
}
case DRGN_TYPE_FUNCTION: {
struct drgn_qualified_type function_type = {
.type = type_ret->underlying_type,
.qualifiers = type_ret->qualifiers,
};
uint8_t address_size;
err = drgn_program_address_size(drgn_object_program(obj),
&address_size);
2020-07-16 00:34:56 +01:00
if (err)
return err;
err = drgn_pointer_type_create(drgn_object_program(obj),
function_type, address_size,
DRGN_PROGRAM_ENDIAN,
2020-07-16 00:34:56 +01:00
drgn_type_language(type_ret->underlying_type),
&type_ret->type);
if (err)
return err;
type_ret->underlying_type = type_ret->type;
break;
}
default:
err = drgn_type_with_byte_order(&type_ret->type,
&type_ret->underlying_type,
DRGN_PROGRAM_ENDIAN);
if (err)
return err;
break;
}
type_ret->qualifiers = 0;
if (is_pointer_ret) {
struct drgn_type *type = type_ret->underlying_type;
*is_pointer_ret = drgn_type_kind(type) == DRGN_TYPE_POINTER;
if (*is_pointer_ret && referenced_size_ret) {
struct drgn_type *referenced_type =
drgn_underlying_type(drgn_type_type(type).type);
if (drgn_type_kind(referenced_type) == DRGN_TYPE_VOID) {
*referenced_size_ret = 1;
} else {
err = drgn_type_sizeof(referenced_type,
referenced_size_ret);
if (err)
return err;
}
}
}
return NULL;
}
static struct drgn_error *c_op_cast(struct drgn_object *res,
struct drgn_qualified_type qualified_type,
const struct drgn_object *obj)
{
struct drgn_error *err;
struct drgn_operand_type type;
err = c_operand_type(obj, &type, NULL, NULL);
if (err)
return err;
return drgn_op_cast(res, qualified_type, obj, &type);
}
/*
* It's too expensive to check that two pointer types are compatible, so we just
* check that they refer to the same kind of type with equal size.
*/
static bool c_pointers_similar(const struct drgn_operand_type *lhs_type,
const struct drgn_operand_type *rhs_type,
uint64_t lhs_size, uint64_t rhs_size)
{
struct drgn_type *lhs_referenced_type, *rhs_referenced_type;
lhs_referenced_type = drgn_type_type(lhs_type->underlying_type).type;
rhs_referenced_type = drgn_type_type(rhs_type->underlying_type).type;
return (drgn_type_kind(lhs_referenced_type) ==
drgn_type_kind(rhs_referenced_type) && lhs_size == rhs_size);
}
static struct drgn_error *c_op_bool(const struct drgn_object *obj, bool *ret)
{
struct drgn_error *err;
struct drgn_type *underlying_type;
underlying_type = drgn_underlying_type(obj->type);
if (drgn_type_kind(underlying_type) == DRGN_TYPE_ARRAY) {
*ret = true;
return NULL;
}
if (!drgn_type_is_scalar(underlying_type)) {
return drgn_qualified_type_error("cannot convert '%s' to bool",
drgn_object_qualified_type(obj));
}
err = drgn_object_is_zero(obj, ret);
if (err)
return err;
*ret = !*ret;
return NULL;
}
static struct drgn_error *c_op_cmp(const struct drgn_object *lhs,
const struct drgn_object *rhs, int *ret)
{
struct drgn_error *err;
struct drgn_operand_type lhs_type, rhs_type;
bool lhs_pointer, rhs_pointer;
err = c_operand_type(lhs, &lhs_type, &lhs_pointer, NULL);
if (err)
return err;
err = c_operand_type(rhs, &rhs_type, &rhs_pointer, NULL);
if (err)
return err;
if (lhs_pointer && rhs_pointer) {
return drgn_op_cmp_pointers(lhs, rhs, ret);
} else if (lhs_pointer || rhs_pointer) {
goto type_error;
} else {
struct drgn_operand_type type;
if (!drgn_type_is_arithmetic(lhs_type.underlying_type) ||
!drgn_type_is_arithmetic(rhs_type.underlying_type))
goto type_error;
err = c_common_real_type(drgn_object_program(lhs), &lhs_type,
&rhs_type, &type);
if (err)
return err;
return drgn_op_cmp_impl(lhs, rhs, &type, ret);
}
type_error:
return drgn_error_binary_op("comparison", &lhs_type, &rhs_type);
}
static struct drgn_error *c_op_add(struct drgn_object *res,
const struct drgn_object *lhs,
const struct drgn_object *rhs)
{
struct drgn_error *err;
struct drgn_operand_type lhs_type, rhs_type;
bool lhs_pointer, rhs_pointer;
uint64_t lhs_size, rhs_size;
err = c_operand_type(lhs, &lhs_type, &lhs_pointer, &lhs_size);
if (err)
return err;
err = c_operand_type(rhs, &rhs_type, &rhs_pointer, &rhs_size);
if (err)
return err;
if (lhs_pointer) {
if (!drgn_type_is_integer(rhs_type.underlying_type))
goto type_error;
return drgn_op_add_to_pointer(res, &lhs_type, lhs_size, false, lhs, rhs);
} else if (rhs_pointer) {
if (!drgn_type_is_integer(lhs_type.underlying_type))
goto type_error;
return drgn_op_add_to_pointer(res, &rhs_type, rhs_size, false, rhs, lhs);
} else {
struct drgn_operand_type type;
if (!drgn_type_is_arithmetic(lhs_type.underlying_type) ||
!drgn_type_is_arithmetic(rhs_type.underlying_type))
goto type_error;
err = c_common_real_type(drgn_object_program(lhs), &lhs_type,
&rhs_type, &type);
if (err)
return err;
return drgn_op_add_impl(res, &type, lhs, rhs);
}
type_error:
return drgn_error_binary_op("binary +", &lhs_type, &rhs_type);
}
static struct drgn_error *c_op_sub(struct drgn_object *res,
const struct drgn_object *lhs,
const struct drgn_object *rhs)
{
struct drgn_error *err;
struct drgn_operand_type lhs_type, rhs_type;
bool lhs_pointer, rhs_pointer;
uint64_t lhs_size, rhs_size;
err = c_operand_type(lhs, &lhs_type, &lhs_pointer, &lhs_size);
if (err)
return err;
err = c_operand_type(rhs, &rhs_type, &rhs_pointer, &rhs_size);
if (err)
return err;
if (lhs_pointer && rhs_pointer) {
struct drgn_operand_type type = {};
err = drgn_program_find_primitive_type(drgn_object_program(lhs),
DRGN_C_TYPE_PTRDIFF_T,
&type.type);
if (err)
return err;
type.underlying_type = drgn_underlying_type(type.type);
if (!c_pointers_similar(&lhs_type, &rhs_type, lhs_size,
rhs_size))
goto type_error;
return drgn_op_sub_pointers(res, &type, lhs_size, lhs, rhs);
} else if (lhs_pointer) {
if (!drgn_type_is_integer(rhs_type.underlying_type))
goto type_error;
return drgn_op_add_to_pointer(res, &lhs_type, lhs_size, true,
lhs, rhs);
} else {
struct drgn_operand_type type;
if (!drgn_type_is_arithmetic(lhs_type.underlying_type) ||
!drgn_type_is_arithmetic(rhs_type.underlying_type))
goto type_error;
err = c_common_real_type(drgn_object_program(lhs), &lhs_type,
&rhs_type, &type);
if (err)
return err;
return drgn_op_sub_impl(res, &type, lhs, rhs);
}
type_error:
return drgn_error_binary_op("binary -", &lhs_type, &rhs_type);
}
#define BINARY_OP(op_name, op, check) \
static struct drgn_error *c_op_##op_name(struct drgn_object *res, \
const struct drgn_object *lhs, \
const struct drgn_object *rhs) \
{ \
struct drgn_error *err; \
\
struct drgn_operand_type lhs_type, rhs_type, type; \
err = c_operand_type(lhs, &lhs_type, NULL, NULL); \
if (err) \
return err; \
err = c_operand_type(rhs, &rhs_type, NULL, NULL); \
if (err) \
return err; \
if (!drgn_type_is_##check(lhs_type.underlying_type) || \
!drgn_type_is_##check(rhs_type.underlying_type)) \
return drgn_error_binary_op("binary "#op, &lhs_type, \
&rhs_type); \
\
err = c_common_real_type(drgn_object_program(lhs), &lhs_type, \
&rhs_type, &type); \
if (err) \
return err; \
\
return drgn_op_##op_name##_impl(res, &type, lhs, rhs); \
}
BINARY_OP(mul, *, arithmetic)
BINARY_OP(div, /, arithmetic)
BINARY_OP(mod, %, integer)
BINARY_OP(and, &, integer)
BINARY_OP(or, |, integer)
BINARY_OP(xor, ^, integer)
#undef BINARY_OP
#define SHIFT_OP(op_name, op) \
static struct drgn_error *c_op_##op_name(struct drgn_object *res, \
const struct drgn_object *lhs, \
const struct drgn_object *rhs) \
{ \
struct drgn_error *err; \
\
struct drgn_operand_type lhs_type, rhs_type; \
err = c_operand_type(lhs, &lhs_type, NULL, NULL); \
if (err) \
return err; \
err = c_operand_type(rhs, &rhs_type, NULL, NULL); \
if (err) \
return err; \
if (!drgn_type_is_integer(lhs_type.underlying_type) || \
!drgn_type_is_integer(rhs_type.underlying_type)) \
return drgn_error_binary_op("binary " #op, &lhs_type, \
&rhs_type); \
\
err = c_integer_promotions(drgn_object_program(lhs), &lhs_type); \
if (err) \
return err; \
err = c_integer_promotions(drgn_object_program(lhs), &rhs_type); \
if (err) \
return err; \
\
return drgn_op_##op_name##_impl(res, lhs, &lhs_type, rhs, &rhs_type); \
}
SHIFT_OP(lshift, <<)
SHIFT_OP(rshift, >>)
#undef SHIFT_OP
#define UNARY_OP(op_name, op, check) \
static struct drgn_error *c_op_##op_name(struct drgn_object *res, \
const struct drgn_object *obj) \
{ \
struct drgn_error *err; \
\
struct drgn_operand_type type; \
err = c_operand_type(obj, &type, NULL, NULL); \
if (err) \
return err; \
if (!drgn_type_is_##check(type.underlying_type)) \
return drgn_error_unary_op("unary " #op, &type); \
\
err = c_integer_promotions(drgn_object_program(obj), &type); \
if (err) \
return err; \
\
return drgn_op_##op_name##_impl(res, &type, obj); \
}
UNARY_OP(pos, +, arithmetic)
UNARY_OP(neg, -, arithmetic)
UNARY_OP(not, ~, integer)
#undef UNARY_OP
LIBDRGN_PUBLIC const struct drgn_language drgn_language_c = {
.name = "C",
.number = DRGN_LANGUAGE_C,
.has_namespaces = false,
.format_type_name = c_format_type_name,
.format_type = c_format_type,
.format_object = c_format_object,
.find_type = c_family_find_type,
.bit_offset = c_family_bit_offset,
.integer_literal = c_integer_literal,
.bool_literal = c_bool_literal,
.float_literal = c_float_literal,
.op_cast = c_op_cast,
.op_bool = c_op_bool,
.op_cmp = c_op_cmp,
.op_add = c_op_add,
.op_sub = c_op_sub,
.op_mul = c_op_mul,
.op_div = c_op_div,
.op_mod = c_op_mod,
.op_lshift = c_op_lshift,
.op_rshift = c_op_rshift,
.op_and = c_op_and,
.op_or = c_op_or,
.op_xor = c_op_xor,
.op_pos = c_op_pos,
.op_neg = c_op_neg,
.op_not = c_op_not,
};
LIBDRGN_PUBLIC const struct drgn_language drgn_language_cpp = {
.name = "C++",
.number = DRGN_LANGUAGE_CPP,
.has_namespaces = true,
.format_type_name = c_format_type_name,
.format_type = c_format_type,
.format_object = c_format_object,
.find_type = c_family_find_type,
.bit_offset = c_family_bit_offset,
.integer_literal = c_integer_literal,
.bool_literal = c_bool_literal,
.float_literal = c_float_literal,
.op_cast = c_op_cast,
.op_bool = c_op_bool,
.op_cmp = c_op_cmp,
.op_add = c_op_add,
.op_sub = c_op_sub,
.op_mul = c_op_mul,
.op_div = c_op_div,
.op_mod = c_op_mod,
.op_lshift = c_op_lshift,
.op_rshift = c_op_rshift,
.op_and = c_op_and,
.op_or = c_op_or,
.op_xor = c_op_xor,
.op_pos = c_op_pos,
.op_neg = c_op_neg,
.op_not = c_op_not,
};