Skip to content

Commit

Permalink
Fix missing error context in error collection crashing sqlparse
Browse files Browse the repository at this point in the history
  • Loading branch information
surister committed Oct 16, 2024
1 parent e4f46ec commit 56c9fca
Show file tree
Hide file tree
Showing 5 changed files with 81 additions and 7 deletions.
1 change: 1 addition & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
## Unreleased
- Export `Statement` in both Python and Javascript target
- Fixed query parsing when expression includes special characters like `\n`, `\r`, or `\t`
- Fixed sqlparse crash on missing error context

## 2024/09/18 v0.0.7
- Improve error matching on single statement
Expand Down
18 changes: 15 additions & 3 deletions cratedb_sqlparse_js/cratedb_sqlparse/parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -124,11 +124,22 @@ class ExceptionCollectorListener extends ErrorListener {

syntaxError(recognizer, offendingSymbol, line, column, msg, e) {
super.syntaxError(recognizer, offendingSymbol, line, column, msg, e);
const error = new ParseError(
e.ctx.parser.getTokenStream().getText(new Interval(
let query;

if (e) {
query = e.ctx.parser.getTokenStream().getText(new Interval(
e.ctx.start,
e.offendingToken.tokenIndex)
),
)

} else {
const min_to_check = Math.max(1, offendingSymbol.tokenIndex - 2)
const tokens = recognizer.getTokenStream().tokens.slice(min_to_check, offendingSymbol.tokenIndex + 1)
query = tokens.map((el) => el.text).join("")
}

const error = new ParseError(
query,
msg,
offendingSymbol,
e
Expand Down Expand Up @@ -221,6 +232,7 @@ export function sqlparse(query, raise_exception = false) {
const statementsContext = tree.children.filter((children) => children instanceof SqlBaseParser.StatementContext)

let statements = []

for (const statementContext of statementsContext) {
let stmt = new Statement(statementContext)

Expand Down
34 changes: 33 additions & 1 deletion cratedb_sqlparse_js/tests/exceptions.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,14 @@ test('Error should be collected and not thrown by default', () => {
expect(() => stmts).not.toThrowError()
})

test('Several Errors should be collected and not thrown by default', () => {
test('Single error should be collected', () => {
const stmt = sqlparse("SELECT A,B,C,D FROM tbl1 WHERE A ? '%A'")
expect(stmt[0].exception).toBeDefined()
expect(stmt[0].exception.msg).toBe("mismatched input '?' expecting {<EOF>, ';'}")
expect(stmt[0].exception.query).toBe("SELECT A,B,C,D FROM tbl1 WHERE A ?")
})

test('Several errors should be collected and not thrown by default', () => {
const stmts = sqlparse(`
SELECT A FROM tbl1 where;
SELECT 1;
Expand Down Expand Up @@ -73,6 +80,31 @@ test('Exception message is correct', () => {
})


test('White or special characters should not avoid exception catching', () => {
// https://github.com/crate/cratedb-sqlparse/issues/67
const stmts = [
`SELECT 1\n limit `,
`SELECT 1\r limit `,
`SELECT 1\t limit `,
`SELECT 1 limit `
]
for (const stmt in stmts) {
let r = sqlparse(stmt)
expect(r[0].exception).toBeDefined();
}
})

test('Missing token error should not panic', ()=> {
// See https://github.com/crate/cratedb-sqlparse/issues/66
sqlparse(`
CREATE TABLE t01 (
"x" OBJECT (DYNAMIC),
"y" OBJECT (DYNAMIC) AS ("z" ARRAY(OBJECT (DYNAMIC))
);
`)
})


test('Whitetest or special characters should not avoid exception catching', () => {
// https://github.com/crate/cratedb-sqlparse/issues/67
const stmts = [
Expand Down
22 changes: 19 additions & 3 deletions cratedb_sqlparse_py/cratedb_sqlparse/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,13 +114,29 @@ def __init__(self):
self.errors = []

def syntaxError(self, recognizer, offendingSymbol, line, column, msg, e):
if e:
query = recognizer.getTokenStream().getText(e.ctx.start, offendingSymbol.tokenIndex)

else:
# If antlr4 doesn't give us an error object, we heuristically create a query, or a piece of it
# so we increase the chances of it being correctly assigned.
# It means that theoretically if you input two wrong queries that antlr4 manages
# to distinguish as two different statements (which is hard already), and both are similar
# the errors could be matched wrongly. Still pretty rare, and it is very hard to come up with
# an actual query that does it.

# The newly generated query will be either the offendingToken + one token to the left
# or offendingToken + two tokens to the left, if the second is possible it takes precedence.
min_token_to_check = max(1, offendingSymbol.tokenIndex - 2)
tokens = recognizer.getTokenStream().tokens[min_token_to_check : offendingSymbol.tokenIndex + 1]
query = "".join(token.text for token in tokens)

error = ParsingException(
msg=msg,
offending_token=offendingSymbol,
e=e,
query=e.ctx.parser.getTokenStream().getText(e.ctx.start, e.offendingToken.tokenIndex),
e=e if e else type("NotViableInput", (Exception,), {})(),
query=query,
)

self.errors.append(error)


Expand Down
13 changes: 13 additions & 0 deletions cratedb_sqlparse_py/tests/test_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,16 @@ def test_sqlparse_catches_exception():
"""
for stmt in stmts:
assert sqlparse(stmt)[0].exception


def test_sqlparse_should_not_panic():
from cratedb_sqlparse import sqlparse

sqlparse("""
CREATE TABLE t01 (
"x" OBJECT (DYNAMIC),
"y" OBJECT (DYNAMIC) AS ("z" ARRAY(OBJECT (DYNAMIC))
);
""")[0]

# That's it, it shouldn't raise a runtime Exception.

0 comments on commit 56c9fca

Please sign in to comment.