drgn/libdrgn/lexer.h
Omar Sandoval 75c3679147 Rewrite drgn core in C
The current mixed Python/C implementation works well, but it has a
couple of important limitations:

- It's too slow for some common use cases, like iterating over large
  data structures.
- It can't be reused in utilities written in other languages.

This replaces the internals with a new library written in C, libdrgn. It
includes Python bindings with mostly the same public interface as
before, with some important improvements:

- Types are now represented by a single Type class rather than the messy
  polymorphism in the Python implementation.
- Qualifiers are a bitmask instead of a set of strings.
- Bit fields are not considered a separate type.
- The lvalue/rvalue terminology is replaced with reference/value.
- Structure, union, and array values are better supported.
- Function objects are supported.
- Program distinguishes between lookups of variables, constants, and
  functions.

The C rewrite is about 6x as fast as the original Python when using the
Python bindings, and about 8x when using the C API directly.

Currently, the exposed API in C is fairly conservative. In the future,
the memory reader, type index, and object index APIs will probably be
exposed for more flexibility.
2019-04-02 14:12:07 -07:00

132 lines
3.1 KiB
C

// Copyright 2018-2019 - Omar Sandoval
// SPDX-License-Identifier: GPL-3.0+
/**
* @file
*
* Lexer interface.
*
* See @ref Lexer.
*/
#ifndef DRGN_LEXER_H
#define DRGN_LEXER_H
#include <stddef.h>
/**
* @ingroup Internals
*
* @defgroup Lexer Lexer
*
* Lexical analysis.
*
* This is a convenient interface for lexical analysis. @ref drgn_lexer provides
* the abstraction of a stack of tokens (@ref drgn_token) on top of a raw @ref
* drgn_lexer_func.
*
* @{
*/
struct drgn_error;
struct drgn_lexer;
struct drgn_token;
/**
* Lexer function.
*
* A lexer function does the work of lexing the next token in a string. It
* should initialize the passed in token and advance @ref drgn_lexer::p.
*/
typedef struct drgn_error *(*drgn_lexer_func)(struct drgn_lexer *,
struct drgn_token *);
/** Lexical token. */
struct drgn_token {
/** Kind of token as defined by the lexer function. */
int kind;
/**
* String value of the token (i.e., the lexeme).
*
* This points to the contents of the original string, so it isn't
* null-terminated.
*/
const char *value;
/** Length of the token value. */
size_t len;
};
/**
* Lexer instance.
*
* A lexer comprises a lexer function, a position, and a stack of tokens. Tokens
* can be pushed and popped onto the stack. When the stack is empty, a pop calls
* the lexer function instead.
*/
struct drgn_lexer {
/** Lexer function. */
drgn_lexer_func func;
/** Current position in the string. */
const char *p;
/** Stack of tokens. */
struct drgn_token *stack;
/** Number of tokens on the stack. */
size_t stack_len;
/** Allocated size of the stack. */
size_t stack_capacity;
};
/**
* Initialize a @ref drgn_lexer from a lexer function and a string.
*
* @param[in] lexer Lexer to initialize.
* @param[in] func Lexer function.
* @param[in] str String to lex.
*/
void drgn_lexer_init(struct drgn_lexer *lexer, drgn_lexer_func func,
const char *str);
/**
* Free memory allocated by a @ref drgn_lexer.
*
* @param[in] lexer Lexer to deinitialize.
*/
void drgn_lexer_deinit(struct drgn_lexer *lexer);
/**
* Return the next token from a @ref drgn_lexer.
*
* If there are tokens on the stack, this pops and returns the top token.
* Otherwise, this calls the lexer function to get the next token.
*
* @return @c NULL on success, non-@c NULL on error.
*/
struct drgn_error *drgn_lexer_pop(struct drgn_lexer *lexer,
struct drgn_token *token);
/**
* Push a token onto the stack of a @ref drgn_lexer.
*
* This token must have been returned by @ref drgn_lexer_pop().
*
* @return @c NULL on success, non-@c NULL on error.
*/
struct drgn_error *drgn_lexer_push(struct drgn_lexer *lexer,
const struct drgn_token *token);
/**
* Return the next token from a @ref drgn_lexer and leave it on top of the
* stack.
*
* This is equivalent to a call to @ref drgn_lexer_pop() immediately followed by
* a call to @ref drgn_lexer_push().
*
* @return @c NULL on success, non-@c NULL on error.
*/
struct drgn_error *drgn_lexer_peek(struct drgn_lexer *lexer,
struct drgn_token *token);
/** @} */
#endif /* DRGN_LEXER_H */