HiveQL Parser. Parse HiveQL code and print AST in JSON format if success(exit 0), else print well formed syntax error message(exit 1).
$ mvn package
To build standalone jar, use:
$ mvn clean compile assembly:single
$ javar -jar /path/to/hiveql-parser.jar /path/to/your-code.sql
$ java -jar hiveql-parser.jar <(echo "select count(*) as count, myfield from &0rz") 2>/dev/null
[1,39]: line 1:39 cannot recognize input near '&' '0rz' '<EOF>' in join source
$ java -jar hiveql-parser.jar <(echo "select count(*) as count, myfield from 0rz") 2>/dev/null | jq .
{
"startIndex": 0,
"stopIndex": 18,
"childIndex": -1,
"children": [
{
"startIndex": 0,
"stopIndex": 16,
"token": {
"type": 860,
"line": 0,
"charPositionInLine": -1,
"channel": 0,
"text": "TOK_QUERY",
"index": -1,
"start": 0,
"stop": 0
},
"childIndex": 0,
"children": [
{
"startIndex": 14,
"stopIndex": 16,
"token": {
"type": 748,
"line": 0,
"charPositionInLine": -1,
"channel": 0,
"text": "TOK_FROM",
"index": -1,
"start": 0,
"stop": 0
},
"childIndex": 0,
"children": [
{
"startIndex": 16,
"stopIndex": 16,
"token": {
"type": 954,
"line": 0,
"charPositionInLine": -1,
"channel": 0,
"text": "TOK_TABREF",
"index": -1,
"start": 0,
"stop": 0
},
"childIndex": 0,
"children": [
{
"startIndex": 16,
"stopIndex": 16,
"token": {
"type": 953,
"line": 0,
"charPositionInLine": -1,
"channel": 0,
"text": "TOK_TABNAME",
"index": -1,
"start": 0,
"stop": 0
},
"childIndex": 0,
"children": [
{
"startIndex": 16,
"stopIndex": 16,
"token": {
"type": 26,
"line": 1,
"charPositionInLine": 39,
"channel": 0,
"index": 16,
"start": 39,
"stop": 41
},
"childIndex": 0,
"children": [],
"toString": "0rz",
"toStringTree": "0rz"
}
],
"toString": "TOK_TABNAME",
"toStringTree": "(tok_tabname 0rz)"
}
],
"toString": "TOK_TABREF",
"toStringTree": "(tok_tabref (tok_tabname 0rz))"
}
],
"toString": "TOK_FROM",
"toStringTree": "(tok_from (tok_tabref (tok_tabname 0rz)))"
},
{
"startIndex": -1,
"stopIndex": 12,
"token": {
"type": 772,
"line": 0,
"charPositionInLine": -1,
"channel": 0,
"text": "TOK_INSERT",
"index": -1,
"start": 0,
"stop": 0
},
"childIndex": 1,
"children": [
{
"startIndex": -1,
"stopIndex": -1,
"token": {
"type": 726,
"line": 0,
"charPositionInLine": -1,
"channel": 0,
"text": "TOK_DESTINATION",
"index": -1,
"start": 0,
"stop": 0
},
"childIndex": 0,
"children": [
{
"startIndex": -1,
"stopIndex": -1,
"token": {
"type": 727,
"line": 0,
"charPositionInLine": -1,
"channel": 0,
"text": "TOK_DIR",
"index": -1,
"start": 0,
"stop": 0
},
"childIndex": 0,
"children": [
{
"startIndex": -1,
"stopIndex": -1,
"token": {
"type": 963,
"line": 0,
"charPositionInLine": -1,
"channel": 0,
"text": "TOK_TMP_FILE",
"index": -1,
"start": 0,
"stop": 0
},
"childIndex": 0,
"children": [],
"toString": "TOK_TMP_FILE",
"toStringTree": "tok_tmp_file"
}
],
"toString": "TOK_DIR",
"toStringTree": "(tok_dir tok_tmp_file)"
}
],
"toString": "TOK_DESTINATION",
"toStringTree": "(tok_destination (tok_dir tok_tmp_file))"
},
{
"startIndex": 0,
"stopIndex": 12,
"token": {
"type": 878,
"line": 0,
"charPositionInLine": -1,
"channel": 0,
"text": "TOK_SELECT",
"index": -1,
"start": 0,
"stop": 0
},
"childIndex": 1,
"children": [
{
"startIndex": 2,
"stopIndex": 9,
"token": {
"type": 880,
"line": 0,
"charPositionInLine": -1,
"channel": 0,
"text": "TOK_SELEXPR",
"index": -1,
"start": 0,
"stop": 0
},
"childIndex": 0,
"children": [
{
"startIndex": 2,
"stopIndex": 5,
"token": {
"type": 752,
"line": 0,
"charPositionInLine": -1,
"channel": 0,
"text": "TOK_FUNCTIONSTAR",
"index": -1,
"start": 0,
"stop": 0
},
"childIndex": 0,
"children": [
{
"startIndex": 2,
"stopIndex": 2,
"token": {
"type": 26,
"line": 1,
"charPositionInLine": 7,
"channel": 0,
"index": 2,
"start": 7,
"stop": 11
},
"childIndex": 0,
"children": [],
"toString": "count",
"toStringTree": "count"
}
],
"toString": "TOK_FUNCTIONSTAR",
"toStringTree": "(tok_functionstar count)"
},
{
"startIndex": 9,
"stopIndex": 9,
"token": {
"type": 26,
"line": 1,
"charPositionInLine": 19,
"channel": 0,
"index": 9,
"start": 19,
"stop": 23
},
"childIndex": 1,
"children": [],
"toString": "count",
"toStringTree": "count"
}
],
"toString": "TOK_SELEXPR",
"toStringTree": "(tok_selexpr (tok_functionstar count) count)"
},
{
"startIndex": 12,
"stopIndex": 12,
"token": {
"type": 880,
"line": 0,
"charPositionInLine": -1,
"channel": 0,
"text": "TOK_SELEXPR",
"index": -1,
"start": 0,
"stop": 0
},
"childIndex": 1,
"children": [
{
"startIndex": 12,
"stopIndex": 12,
"token": {
"type": 950,
"line": 0,
"charPositionInLine": -1,
"channel": 0,
"text": "TOK_TABLE_OR_COL",
"index": -1,
"start": 0,
"stop": 0
},
"childIndex": 0,
"children": [
{
"startIndex": 12,
"stopIndex": 12,
"token": {
"type": 26,
"line": 1,
"charPositionInLine": 26,
"channel": 0,
"index": 12,
"start": 26,
"stop": 32
},
"childIndex": 0,
"children": [],
"toString": "myfield",
"toStringTree": "myfield"
}
],
"toString": "TOK_TABLE_OR_COL",
"toStringTree": "(tok_table_or_col myfield)"
}
],
"toString": "TOK_SELEXPR",
"toStringTree": "(tok_selexpr (tok_table_or_col myfield))"
}
],
"toString": "TOK_SELECT",
"toStringTree": "(tok_select (tok_selexpr (tok_functionstar count) count) (tok_selexpr (tok_table_or_col myfield)))"
}
],
"toString": "TOK_INSERT",
"toStringTree": "(tok_insert (tok_destination (tok_dir tok_tmp_file)) (tok_select (tok_selexpr (tok_functionstar count) count) (tok_selexpr (tok_table_or_col myfield))))"
}
],
"toString": "TOK_QUERY",
"toStringTree": "(tok_query (tok_from (tok_tabref (tok_tabname 0rz))) (tok_insert (tok_destination (tok_dir tok_tmp_file)) (tok_select (tok_selexpr (tok_functionstar count) count) (tok_selexpr (tok_table_or_col myfield)))))"
},
{
"startIndex": 18,
"stopIndex": 18,
"token": {
"type": -1,
"line": 2,
"charPositionInLine": 0,
"channel": 0,
"index": 18,
"start": 43,
"stop": 43
},
"childIndex": 1,
"children": [],
"toString": "<EOF>",
"toStringTree": "<eof>"
}
],
"toString": "nil",
"toStringTree": "(tok_query (tok_from (tok_tabref (tok_tabname 0rz))) (tok_insert (tok_destination (tok_dir tok_tmp_file)) (tok_select (tok_selexpr (tok_functionstar count) count) (tok_selexpr (tok_table_or_col myfield))))) <eof>"
}