From ab58a5bff0851b7c439ae4348c185beb1cd79451 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 10 May 2019 15:10:04 -0700 Subject: [PATCH] libdrgn: determine default size_t and ptrdiff_t more intelligently Currently, size_t and ptrdiff_t default to typedefs of the default unsigned long and long, respectively, regardless of what the program actually defines unsigned long or long as. Instead, make them refer the whatever integer type (long, long long, or int) is the same size as the word size. --- libdrgn/type_index.c | 127 +++++++++++++++++++++++------------------- libdrgn/type_index.h | 2 + tests/test_program.py | 46 +++++++++++++-- 3 files changed, 113 insertions(+), 62 deletions(-) diff --git a/libdrgn/type_index.c b/libdrgn/type_index.c index 647c1dbf..2f365244 100644 --- a/libdrgn/type_index.c +++ b/libdrgn/type_index.c @@ -171,15 +171,15 @@ void drgn_type_index_remove_finder(struct drgn_type_index *tindex) tindex->finders = finder; } -/* Default long, unsigned long, size_t, and ptrdiff_t are 64 bits. */ +/* Default long and unsigned long are 64 bits. */ static struct drgn_type default_primitive_types[DRGN_PRIMITIVE_TYPE_NUM]; -/* 32-bit version of long, unsigned long, size_t, and ptrdiff_t. */ -static struct drgn_type default_primitive_types_32bit[4]; +/* 32-bit versions of long and unsigned long. */ +static struct drgn_type default_long_32bit; +static struct drgn_type default_unsigned_long_32bit; __attribute__((constructor(200))) static void default_primitive_types_init(void) { - struct drgn_qualified_type qualified_type; size_t i; drgn_int_type_init(&default_primitive_types[DRGN_C_TYPE_CHAR], @@ -227,43 +227,24 @@ static void default_primitive_types_init(void) drgn_float_type_init(&default_primitive_types[DRGN_C_TYPE_LONG_DOUBLE], drgn_primitive_type_spellings[DRGN_C_TYPE_LONG_DOUBLE][0], 16); - qualified_type.type = &default_primitive_types[DRGN_C_TYPE_UNSIGNED_LONG]; - qualified_type.qualifiers = 0; - drgn_typedef_type_init(&default_primitive_types[DRGN_C_TYPE_SIZE_T], - drgn_primitive_type_spellings[DRGN_C_TYPE_SIZE_T][0], - qualified_type); - qualified_type.type = &default_primitive_types[DRGN_C_TYPE_LONG]; - drgn_typedef_type_init(&default_primitive_types[DRGN_C_TYPE_PTRDIFF_T], - drgn_primitive_type_spellings[DRGN_C_TYPE_PTRDIFF_T][0], - qualified_type); for (i = 0; i < ARRAY_SIZE(default_primitive_types); i++) { - if (drgn_primitive_type_kind[i] == DRGN_TYPE_VOID) + if (drgn_primitive_type_kind[i] == DRGN_TYPE_VOID || + i == DRGN_C_TYPE_SIZE_T || i == DRGN_C_TYPE_PTRDIFF_T) continue; assert(drgn_type_primitive(&default_primitive_types[i]) == i); } - drgn_int_type_init(&default_primitive_types_32bit[0], + drgn_int_type_init(&default_long_32bit, drgn_primitive_type_spellings[DRGN_C_TYPE_LONG][0], 4, true); - drgn_int_type_init(&default_primitive_types_32bit[1], + assert(drgn_type_primitive(&default_long_32bit) == + DRGN_C_TYPE_LONG); + + drgn_int_type_init(&default_unsigned_long_32bit, drgn_primitive_type_spellings[DRGN_C_TYPE_UNSIGNED_LONG][0], 4, false); - qualified_type.type = &default_primitive_types_32bit[1]; - drgn_typedef_type_init(&default_primitive_types_32bit[2], - drgn_primitive_type_spellings[DRGN_C_TYPE_SIZE_T][0], - qualified_type); - qualified_type.type = &default_primitive_types_32bit[0]; - drgn_typedef_type_init(&default_primitive_types_32bit[3], - drgn_primitive_type_spellings[DRGN_C_TYPE_PTRDIFF_T][0], - qualified_type); - assert(drgn_type_primitive(&default_primitive_types_32bit[0]) == - DRGN_C_TYPE_LONG); - assert(drgn_type_primitive(&default_primitive_types_32bit[1]) == + assert(drgn_type_primitive(&default_unsigned_long_32bit) == DRGN_C_TYPE_UNSIGNED_LONG); - assert(drgn_type_primitive(&default_primitive_types_32bit[2]) == - DRGN_C_TYPE_SIZE_T); - assert(drgn_type_primitive(&default_primitive_types_32bit[3]) == - DRGN_C_TYPE_PTRDIFF_T); } /* @@ -337,33 +318,67 @@ drgn_type_index_find_primitive(struct drgn_type_index *tindex, } } - switch (type) { - case DRGN_C_TYPE_LONG: - i = 0; - break; - case DRGN_C_TYPE_UNSIGNED_LONG: - i = 1; - break; - case DRGN_C_TYPE_SIZE_T: - i = 2; - break; - case DRGN_C_TYPE_PTRDIFF_T: - i = 3; - break; - default: - *ret = &default_primitive_types[type]; - goto out; + /* long and unsigned long default to the word size. */ + if (type == DRGN_C_TYPE_LONG || type == DRGN_C_TYPE_UNSIGNED_LONG) { + if (!tindex->word_size) { + return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, + "word size has not been set"); + } + if (tindex->word_size == 4) { + *ret = (type == DRGN_C_TYPE_LONG ? + &default_long_32bit : + &default_unsigned_long_32bit); + goto out; + } + } + /* + * size_t and ptrdiff_t default to typedefs of whatever integer type + * matches the word size. + */ + if (type == DRGN_C_TYPE_SIZE_T || type == DRGN_C_TYPE_PTRDIFF_T) { + static enum drgn_primitive_type integer_types[2][3] = { + { + DRGN_C_TYPE_UNSIGNED_LONG, + DRGN_C_TYPE_UNSIGNED_LONG_LONG, + DRGN_C_TYPE_UNSIGNED_INT, + }, + { + DRGN_C_TYPE_LONG, + DRGN_C_TYPE_LONG_LONG, + DRGN_C_TYPE_INT, + }, + }; + + if (!tindex->word_size) { + return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, + "word size has not been set"); + } + for (i = 0; i < 3; i++) { + enum drgn_primitive_type integer_type; + + integer_type = integer_types[type == DRGN_C_TYPE_PTRDIFF_T][i]; + err = drgn_type_index_find_primitive(tindex, + integer_type, + &qualified_type.type); + if (err) + return err; + if (drgn_type_size(qualified_type.type) == + tindex->word_size) { + qualified_type.qualifiers = 0; + *ret = (type == DRGN_C_TYPE_SIZE_T ? + &tindex->default_size_t : + &tindex->default_ptrdiff_t); + drgn_typedef_type_init(*ret, spellings[0], + qualified_type); + goto out; + } + } + return drgn_error_format(DRGN_ERROR_INVALID_ARGUMENT, + "no suitable integer type for %s", + spellings[0]); } - if (!tindex->word_size) { - return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, - "word size has not been set"); - } - - if (tindex->word_size == 4) - *ret = &default_primitive_types_32bit[i]; - else - *ret = &default_primitive_types[type]; + *ret = &default_primitive_types[type]; out: tindex->primitive_types[type] = *ret; diff --git a/libdrgn/type_index.h b/libdrgn/type_index.h index aa42563f..b84bc287 100644 --- a/libdrgn/type_index.h +++ b/libdrgn/type_index.h @@ -94,6 +94,8 @@ struct drgn_type_index { struct drgn_type_finder *finders; /** Cache of primitive types. */ struct drgn_type *primitive_types[DRGN_PRIMITIVE_TYPE_NUM]; + struct drgn_type default_size_t; + struct drgn_type default_ptrdiff_t; /** Cache of created pointer types. */ struct drgn_pointer_type_set pointer_types; /** Cache of created array types. */ diff --git a/tests/test_program.py b/tests/test_program.py index 9623d1c5..e1fd990e 100644 --- a/tests/test_program.py +++ b/tests/test_program.py @@ -30,6 +30,7 @@ from tests.elf import ET, PT from tests.elfwriter import ElfSection, create_elf_file +MOCK_32BIT_ARCH = Architecture.IS_LITTLE_ENDIAN MOCK_ARCH = Architecture.IS_64_BIT | Architecture.IS_LITTLE_ENDIAN @@ -43,7 +44,7 @@ def mock_memory_read(data, address, count, physical, offset): return data[offset:offset + count] -def mock_program(*, arch=MOCK_ARCH, segments=None, types=None, symbols=None): +def mock_program(arch=MOCK_ARCH, *, segments=None, types=None, symbols=None): def mock_find_type(kind, name, filename): if filename: return None @@ -227,11 +228,7 @@ class TestTypes(unittest.TestCase): yield ' '.join(perm) for word_size in [8, 4]: - if word_size == 8: - arch = MOCK_ARCH | Architecture.IS_64_BIT - else: - arch = MOCK_ARCH & ~Architecture.IS_64_BIT - prog = mock_program(arch=arch) + prog = mock_program(MOCK_ARCH if word_size == 8 else MOCK_32BIT_ARCH) self.assertEqual(prog.type('_Bool'), bool_type('_Bool', 1)) self.assertEqual(prog.type('char'), int_type('char', 1, True)) for spelling in spellings(['signed', 'char']): @@ -289,6 +286,43 @@ class TestTypes(unittest.TestCase): self.assertEqual(prog.type('unsigned long'), int_type('unsigned long', 8, False)) + def test_size_t_and_ptrdiff_t(self): + # 64-bit architecture with 4-byte long/unsigned long. + prog = mock_program(types=[ + int_type('long', 4, True), + int_type('unsigned long', 4, False), + ]) + self.assertEqual(prog.type('size_t'), + typedef_type('size_t', prog.type('unsigned long long'))) + self.assertEqual(prog.type('ptrdiff_t'), + typedef_type('ptrdiff_t', prog.type('long long'))) + + # 32-bit architecture with 8-byte long/unsigned long. + prog = mock_program(MOCK_32BIT_ARCH, types=[ + int_type('long', 8, True), + int_type('unsigned long', 8, False), + ]) + self.assertEqual(prog.type('size_t'), + typedef_type('size_t', prog.type('unsigned int'))) + self.assertEqual(prog.type('ptrdiff_t'), + typedef_type('ptrdiff_t', prog.type('int'))) + + # Nonsense sizes. + prog = mock_program(types=[ + int_type('int', 1, True), + int_type('unsigned int', 1, False), + int_type('long', 1, True), + int_type('unsigned long', 1, False), + int_type('long long', 2, True), + int_type('unsigned long long', 2, False), + ]) + self.assertRaisesRegex(ValueError, + 'no suitable integer type for size_t', + prog.type, 'size_t') + self.assertRaisesRegex(ValueError, + 'no suitable integer type for ptrdiff_t', + prog.type, 'ptrdiff_t') + def test_tagged_type(self): prog = mock_program(types=[point_type, option_type, color_type]) self.assertEqual(prog.type('struct point'), point_type)