Skip to content

Commit

Permalink
Merge pull request #405 from eed3si9n/wip/array
Browse files Browse the repository at this point in the history
refactor(scanner): use new array header for stack
  • Loading branch information
eed3si9n authored Jul 17, 2024
2 parents 599d12b + 3fb3931 commit a13f2d1
Show file tree
Hide file tree
Showing 4 changed files with 137 additions and 186 deletions.
3 changes: 0 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,6 @@ jobs:
with:
node-version: 20

- name: Test C stack code
run: gcc test/test-stack.c -o a.out && ./a.out

- name: Generate parser from scratch and test it
if: ${{ runner.os == 'Linux' || needs.changedfiles.outputs.c }}
shell: bash
Expand Down
184 changes: 137 additions & 47 deletions src/scanner.c
Original file line number Diff line number Diff line change
@@ -1,9 +1,17 @@
#include "stack.h"
#include "tree_sitter/alloc.h"
#include "tree_sitter/array.h"
#include "tree_sitter/parser.h"
#include <stdio.h>
#include <string.h>

#include <wctype.h>

// #define DEBUG

#ifdef DEBUG
#define LOG(...) fprintf(stderr, __VA_ARGS__)
#else
#define LOG(...)
#endif

enum TokenType {
AUTOMATIC_SEMICOLON,
INDENT,
Expand All @@ -22,26 +30,82 @@ enum TokenType {
WITH,
};

typedef struct {
Array(int16_t) indents;
int16_t last_indentation_size;
int16_t last_newline_count;
int16_t last_column;
} Scanner;

void *tree_sitter_scala_external_scanner_create() {
return createStack();
Scanner *scanner = ts_calloc(1, sizeof(Scanner));
array_init(&scanner->indents);
scanner->last_indentation_size = -1;
scanner->last_column = -1;
return scanner;
}

void tree_sitter_scala_external_scanner_destroy(void *payload) {
free(payload);
Scanner *scanner = payload;
array_delete(&scanner->indents);
ts_free(scanner);
}

unsigned tree_sitter_scala_external_scanner_serialize(void *payload, char *buffer) {
return serialiseStack(payload, buffer);
Scanner *scanner = (Scanner*)payload;

if ((scanner->indents.size + 3) * sizeof(int16_t) > TREE_SITTER_SERIALIZATION_BUFFER_SIZE) {
return 0;
}

size_t size = 0;
*(int16_t *)&buffer[size] = scanner->last_indentation_size;
size += sizeof(int16_t);
*(int16_t *)&buffer[size] = scanner->last_newline_count;
size += sizeof(int16_t);
*(int16_t *)&buffer[size] = scanner->last_column;
size += sizeof(int16_t);

for (unsigned i = 0; i < scanner->indents.size; i++) {
*(int16_t *)&buffer[size] = scanner->indents.contents[i];
size += sizeof(int16_t);
}

return size;
}

void tree_sitter_scala_external_scanner_deserialize(void *payload, const char *buffer,
unsigned length) {
deserialiseStack(payload, buffer, length);
Scanner *scanner = (Scanner*)payload;
array_clear(&scanner->indents);
scanner->last_indentation_size = -1;
scanner->last_column = -1;
scanner->last_newline_count = 0;

if (length == 0) {
return;
}

size_t size = 0;

scanner->last_indentation_size = *(int16_t *)&buffer[size];
size += sizeof(int16_t);
scanner->last_newline_count = *(int16_t *)&buffer[size];
size += sizeof(int16_t);
scanner->last_column = *(int16_t *)&buffer[size];
size += sizeof(int16_t);

while (size < length) {
array_push(&scanner->indents, *(int16_t *)&buffer[size]);
size += sizeof(int16_t);
}

assert(size == length);
}

static void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); }

static void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); }

static bool scan_string_content(TSLexer *lexer, bool is_multiline, bool has_interpolation) {
unsigned closing_quote_count = 0;
Expand Down Expand Up @@ -102,7 +166,7 @@ static bool detect_comment_start(TSLexer *lexer) {
}

static bool scan_word(TSLexer *lexer, const char* const word) {
for (int i = 0; word[i] != '\0'; i++) {
for (uint8_t i = 0; word[i] != '\0'; i++) {
if (lexer->lookahead != word[i]) {
return false;
}
Expand All @@ -111,12 +175,20 @@ static bool scan_word(TSLexer *lexer, const char* const word) {
return !iswalnum(lexer->lookahead);
}

static inline void debug_indents(Scanner *scanner) {
LOG(" indents(%d): ", scanner->indents.size);
for (unsigned i = 0; i < scanner->indents.size; i++) {
LOG("%d ", scanner->indents.contents[i]);
}
LOG("\n");
}

bool tree_sitter_scala_external_scanner_scan(void *payload, TSLexer *lexer,
const bool *valid_symbols) {
ScannerStack *stack = (ScannerStack *)payload;
int prev = peekStack(stack);
int newline_count = 0;
int indentation_size = 0;
Scanner *scanner = (Scanner *)payload;
int16_t prev = scanner->indents.size > 0 ? *array_back(&scanner->indents) : -1;
int16_t newline_count = 0;
int16_t indentation_size = 0;

while (iswspace(lexer->lookahead)) {
if (lexer->lookahead == '\n') {
Expand All @@ -130,35 +202,47 @@ bool tree_sitter_scala_external_scanner_scan(void *payload, TSLexer *lexer,
}

// Before advancing the lexer, check if we can double outdent
if (valid_symbols[OUTDENT] &&
(lexer->lookahead == 0 ||
if (
valid_symbols[OUTDENT] &&
(
(prev != -1) &&
lexer->lookahead == ')' ||
lexer->lookahead == ']' ||
lexer->lookahead == '}'
) || (
stack->last_indentation_size != -1 &&
prev != -1 &&
stack->last_indentation_size < prev))) {
popStack(stack);
lexer->lookahead == 0 ||
(
prev != -1 &&
(
lexer->lookahead == ')' ||
lexer->lookahead == ']' ||
lexer->lookahead == '}'
)
) ||
(
scanner->last_indentation_size != -1 &&
prev != -1 &&
scanner->last_indentation_size < prev
)
)
) {
if (scanner->indents.size > 0) {
array_pop(&scanner->indents);
}
LOG(" pop\n");
LOG(" OUTDENT\n");
lexer->result_symbol = OUTDENT;
return true;
}
stack->last_indentation_size = -1;

printStack(stack, " before");
scanner->last_indentation_size = -1;

if (valid_symbols[INDENT] &&
if (
valid_symbols[INDENT] &&
newline_count > 0 &&
(isEmptyStack(stack) ||
indentation_size > peekStack(stack))) {
(
scanner->indents.size == 0 ||
indentation_size > *array_back(&scanner->indents)
)
) {
if (detect_comment_start(lexer)) {
return false;
}
pushStack(stack, indentation_size);
array_push(&scanner->indents, indentation_size);
lexer->result_symbol = INDENT;
LOG(" INDENT\n");
return true;
Expand All @@ -167,38 +251,44 @@ bool tree_sitter_scala_external_scanner_scan(void *payload, TSLexer *lexer,
// This saves the indentation_size and newline_count so it can be used
// in subsequent calls for multiple outdent or autosemicolon.
if (valid_symbols[OUTDENT] &&
(lexer->lookahead == 0 || (
(lexer->lookahead == 0 ||
(
newline_count > 0 &&
prev != -1 &&
indentation_size < prev))) {
popStack(stack);
indentation_size < prev
)
)
) {
if (scanner->indents.size > 0) {
array_pop(&scanner->indents);
}
LOG(" pop\n");
LOG(" OUTDENT\n");
lexer->result_symbol = OUTDENT;
lexer->mark_end(lexer);
if (detect_comment_start(lexer)) {
return false;
}
stack->last_indentation_size = indentation_size;
stack->last_newline_count = newline_count;
scanner->last_indentation_size = indentation_size;
scanner->last_newline_count = newline_count;
if (lexer->eof(lexer)) {
stack->last_column = -1;
scanner->last_column = -1;
} else {
stack->last_column = (int)lexer->get_column(lexer);
scanner->last_column = (int16_t)lexer->get_column(lexer);
}
return true;
}

// Recover newline_count from the outdent reset
bool is_eof = lexer->eof(lexer);
if (stack->last_newline_count > 0 &&
((is_eof && stack->last_column == -1) ||
(!is_eof && lexer->get_column(lexer) == stack->last_column))) {
newline_count += stack->last_newline_count;
if (
scanner->last_newline_count > 0 &&
(is_eof && scanner->last_column == -1) ||
(!is_eof && lexer->get_column(lexer) == (uint32_t)scanner->last_column)
) {
newline_count += scanner->last_newline_count;
}
stack->last_newline_count = 0;

printStack(stack, " after");
scanner->last_newline_count = 0;

if (valid_symbols[AUTOMATIC_SEMICOLON] && newline_count > 0) {
// AUTOMATIC_SEMICOLON should not be issued in the middle of expressions
Expand Down Expand Up @@ -240,7 +330,7 @@ bool tree_sitter_scala_external_scanner_scan(void *payload, TSLexer *lexer,
}
skip(lexer);
}
// If some code is present at the same line after comment end,
// If some code is present at the same line after comment end,
// we should still produce AUTOMATIC_SEMICOLON, e.g. in
// val a = 1
// /* comment */ val b = 2
Expand Down
90 changes: 0 additions & 90 deletions src/stack.h

This file was deleted.

Loading

0 comments on commit a13f2d1

Please sign in to comment.