Skip to content

Commit 4f35ad2

Browse files
committed
WIP
1 parent fbe1332 commit 4f35ad2

23 files changed

+449
-337
lines changed

doc/token-types.md

Lines changed: 82 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -2,44 +2,85 @@
22
Token type hierarchy:
33
---------------------
44

5-
TokenType constants:
6-
7-
- **WHITESPACE**
8-
- **COMMENT**
9-
- **BLOCK_COMMENT** - `/* ... * /`
10-
- **OPTIONAL_COMMENT** - `/*! ... * /`
11-
- **HINT_COMMENT** - `/*+ ... * /`
12-
- **DOUBLE_HYPHEN_COMMENT** - `-- ...`
13-
- **DOUBLE_SLASH_COMMENT** - `// ...`
14-
- **HASH_COMMENT** - `# ...`
15-
- **NAME**
16-
- **UNQUOTED_NAME** - `table1` etc.
17-
- **KEYWORD** - `datetime` etc.
18-
- **RESERVED** - `SELECT` etc.
19-
- **OPERATOR** - `AND`, `OR` etc.
20-
- **DOUBLE_QUOTED_STRING** - `"table1"` (standard, MySQL in ANSI_STRINGS mode)
21-
- **BACKTICK_QUOTED_STRING** - `` `table1` `` (MySQL, PostgreSQL, Sqlite)
22-
- **SQUARE_BRACKETED_STRING** - `[table1]` (MSSQL, SqLite)
23-
- **AT_VARIABLE** - `@var`, `@@global`, `@'192.168.0.1'` (also includes host names)
24-
- **SINGLE_QUOTED_STRING** - `@'var'`
25-
- **DOUBLE_QUOTED_STRING** - `@"var"`
26-
- **BACKTICK_QUOTED_STRING** - `` @`var` ``
27-
- **VALUE**
28-
- **STRING**
29-
- **SINGLE_QUOTED_STRING** - `'string'` (standard)
30-
- **DOUBLE_QUOTED_STRING** - `"string"` (MySQL in default mode)
31-
* **DOLLAR_QUOTED_STRING** - `$foo$table1$foo$` (PostgreSQL)
32-
- **NUMBER**
33-
- **INT**
34-
- **UINT**
35-
- **BINARY_LITERAL**
36-
- **HEXADECIMAL_LITERAL**
37-
- **UUID** - e.g. `3E11FA47-71CA-11E1-9E33-C80AA9429562`
38-
- **SYMBOL** - `(`, `)`, `[`, `]`, `{`, `}`, `.`, `,`, `;`
39-
- **OPERATOR** - `+`, `||` etc.
40-
- **PLACEHOLDER** - placeholder for a parameter
41-
- **QUESTION_MARK_PLACEHOLDER** - `?` (SQL, Doctrine, Laravel)
42-
- **NUMBERED_QUESTION_MARK_PLACEHOLDER** - `?123` (Doctrine)
43-
- **DOUBLE_COLON_PLACEHOLDER** - `:foo` (Doctrine, Laravel)
44-
- **DELIMITER** - default `;`
45-
- **DELIMITER_DEFINITION**
5+
Token type and additional token info are packed into 31 bit int
6+
7+
### TokenType constants (unique):
8+
9+
- **1K WHITESPACE**
10+
- COMMENT
11+
- **2K LINE_COMMENT**
12+
- DOUBLE_HYPHEN_COMMENT - `-- ...`
13+
- DOUBLE_SLASH_COMMENT - `// ...`
14+
- HASH_COMMENT - `# ...`
15+
- **4K BLOCK_COMMENT** - `/* ... * /`
16+
- OPTIONAL_COMMENT - `/*! ... * /`
17+
- HINT_COMMENT - `/*+ ... * /`
18+
- NAME
19+
- **32K UNQUOTED_NAME** - `table1` etc.
20+
- **16K KEYWORD** - `datetime` etc.
21+
- **8K RESERVED** - `SELECT` etc.
22+
- **512 OPERATOR** - `AND`, `OR` etc.
23+
- **64K QUOTED_NAME**
24+
- DOUBLE_QUOTED_NAME - `"table1"` (standard, MySQL in ANSI_STRINGS mode)
25+
- BACKTICK_QUOTED_NAME - `` `table1` `` (MySQL, PostgreSQL, Sqlite)
26+
- SQUARE_BRACKETED_NAME - `[table1]` (MSSQL, SqLite)
27+
- **128K AT_VARIABLE** - `@var`, `@@global`, `@'192.168.0.1'` (also includes host names)
28+
- SINGLE_QUOTED_AT_VAR - `@'var'`
29+
- DOUBLE_QUOTED_AT_VAR - `@"var"`
30+
- BACKTICK_QUOTED_AT_VAR - `` @`var` ``
31+
- VALUE
32+
- **1M NUMBER**
33+
- **512K INT**
34+
- **256K UINT**
35+
- **2M STRING**
36+
- SINGLE_QUOTED_STRING - `'string'` (standard)
37+
- DOUBLE_QUOTED_STRING - `"string"` (MySQL in default mode)
38+
- DOLLAR_QUOTED_STRING - `$$table1$$` (PostgreSQL)
39+
- **4M BIT_STRING**
40+
- BINARY_LITERAL
41+
- OCTAL_LITERAL (PostgreSQL)
42+
- HEXADECIMAL_LITERAL
43+
- **8M UUID** - e.g. `3E11FA47-71CA-11E1-9E33-C80AA9429562`
44+
- **16M SYMBOL** - `(`, `)`, `[`, `]`, `{`, `}`, `.`, `,`, `;`
45+
- **512 OPERATOR** - `+`, `||` etc.
46+
- OPTIMIZER_HINT_START - `/*+`
47+
- OPTIMIZER_HINT_END - `*/`
48+
- N/A **CHARSET_INTRODUCER** - `N`
49+
- N/A DOLLAR_QUOTE - `$foo$` (PostgreSQL)
50+
- **32M PLACEHOLDER** - placeholder for a parameter
51+
- QUESTION_MARK_PLACEHOLDER - `?` (SQL, Doctrine, Laravel)
52+
- NUMBERED_QUESTION_MARK_PLACEHOLDER - `?123` (Doctrine)
53+
- DOUBLE_COLON_PLACEHOLDER - `:foo` (Doctrine, Laravel)
54+
- **64M DELIMITER** - default `;`
55+
- **128M DELIMITER_DEFINITION**
56+
- **256M END**
57+
- **1G INVALID**
58+
59+
values 512 and 512M are free fow now
60+
61+
### Token info (non unique):
62+
63+
Comment type:
64+
- **1** DOUBLE_HYPHEN_COMMENT
65+
- **2** DOUBLE_SLASH_COMMENT
66+
- **4** HASH_COMMENT
67+
- **8** OPTIONAL_COMMENT
68+
- **16** OPTIMIZER_HINT_COMMENT
69+
70+
Quoting:
71+
- **1** SINGLE_QUOTED `'`
72+
- **2** DOUBLE_QUOTED `"`
73+
- **4** BACKTICK_QUOTED `` ` ``
74+
- **8** SQUARE_BRACKETED `[]`
75+
- **16** DOUBLE_DOLLAR_QUOTED `$$`
76+
- **32** DOLLAR_TAG_QUOTED `$foo$string value$foo$`
77+
- **64** (reserved)
78+
79+
Base:
80+
- **1** (reserved for single quoted literals)
81+
- **2** (reserved for double quoted literals)
82+
- **4** binary
83+
- **8** octal (PostgreSQL)
84+
- **16** hexadecimal
85+
- **32** (base 32?)
86+
- **64** (base 64?)

sources/Parser/Ddl/TableCommandsParser.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -620,7 +620,7 @@ public function parseAlterTable(TokenList $tokenList): AlterTableCommand
620620
$tokenList->expectKeyword(Keyword::PARTITION);
621621
$position = $tokenList->getPosition();
622622
$oldPartitions = $newPartitions = null;
623-
if ($tokenList->has(TokenType::NAME)) {
623+
if ($tokenList->has(TokenType::QUOTED_NAME | TokenType::UNQUOTED_NAME)) {
624624
$oldPartitions = $this->parsePartitionNames($tokenList->rewind($position));
625625
if ($oldPartitions === null) {
626626
$tokenList->missing('Expected specific partition names, found "ALL".');

sources/Parser/Dml/DeleteCommandParser.php

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
use SqlFtw\Sql\Expression\QualifiedName;
2121
use SqlFtw\Sql\Expression\SimpleName;
2222
use SqlFtw\Sql\Keyword;
23+
use SqlFtw\Sql\Symbol;
2324

2425
class DeleteCommandParser
2526
{
@@ -64,7 +65,7 @@ public function parseDelete(TokenList $tokenList, ?WithClause $with = null): Del
6465
$tokenList->expectKeyword(Keyword::DELETE);
6566

6667
$optimizerHints = null;
67-
if ($tokenList->has(TokenType::OPTIMIZER_HINT_START)) {
68+
if ($tokenList->hasSymbol(Symbol::OPTIMIZER_HINT_START)) {
6869
$optimizerHints = $this->optimizerHintParser->parseHints($tokenList->rewind(-1));
6970
}
7071

sources/Parser/Dml/InsertCommandParser.php

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
use SqlFtw\Sql\Expression\Operator;
3131
use SqlFtw\Sql\Keyword;
3232
use SqlFtw\Sql\SubqueryType;
33+
use SqlFtw\Sql\Symbol;
3334

3435
class InsertCommandParser
3536
{
@@ -101,7 +102,7 @@ public function parseInsert(TokenList $tokenList): InsertCommand
101102
$tokenList->expectKeyword(Keyword::INSERT);
102103

103104
$optimizerHints = null;
104-
if ($tokenList->has(TokenType::OPTIMIZER_HINT_START)) {
105+
if ($tokenList->hasSymbol(Symbol::OPTIMIZER_HINT_START)) {
105106
$optimizerHints = $this->optimizerHintParser->parseHints($tokenList->rewind(-1));
106107
}
107108

@@ -185,7 +186,7 @@ public function parseReplace(TokenList $tokenList): ReplaceCommand
185186
$tokenList->expectKeyword(Keyword::REPLACE);
186187

187188
$optimizerHints = null;
188-
if ($tokenList->has(TokenType::OPTIMIZER_HINT_START)) {
189+
if ($tokenList->hasSymbol(Symbol::OPTIMIZER_HINT_START)) {
189190
$optimizerHints = $this->optimizerHintParser->parseHints($tokenList->rewind(-1));
190191
}
191192

sources/Parser/Dml/OptimizerHintParser.php

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
use SqlFtw\Sql\Expression\QualifiedName;
4242
use SqlFtw\Sql\Expression\SimpleName;
4343
use SqlFtw\Sql\MysqlVariable;
44+
use SqlFtw\Sql\Symbol;
4445
use function substr;
4546

4647
class OptimizerHintParser
@@ -124,7 +125,7 @@ public function __construct(ExpressionParser $expressionParser)
124125
*/
125126
public function parseHints(TokenList $tokenList): ?array
126127
{
127-
$tokenList->expect(TokenType::OPTIMIZER_HINT_START);
128+
$tokenList->expectSymbol(Symbol::OPTIMIZER_HINT_START);
128129

129130
$hints = [];
130131
do {
@@ -133,7 +134,7 @@ public function parseHints(TokenList $tokenList): ?array
133134
} catch (InvalidTokenException $e) {
134135
// fallback to regular comment (ignored)
135136
while ($token = $tokenList->get()) {
136-
if ($token->type === TokenType::OPTIMIZER_HINT_END) {
137+
if ($token->type === TokenType::SYMBOL && $token->value === Symbol::OPTIMIZER_HINT_END) {
137138
// todo: parser warning
138139
return null;
139140
}
@@ -270,7 +271,7 @@ public function parseHints(TokenList $tokenList): ?array
270271
if ($open) {
271272
$tokenList->expectSymbol(')');
272273
}
273-
} while (!$tokenList->has(TokenType::OPTIMIZER_HINT_END));
274+
} while (!$tokenList->hasSymbol(Symbol::OPTIMIZER_HINT_END));
274275

275276
return $hints;
276277
}

sources/Parser/Dml/QueryParser.php

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969
use SqlFtw\Sql\Keyword;
7070
use SqlFtw\Sql\Order;
7171
use SqlFtw\Sql\SubqueryType;
72+
use SqlFtw\Sql\Symbol;
7273
use function array_pop;
7374
use function count;
7475
use function in_array;
@@ -401,7 +402,7 @@ public function parseSelect(TokenList $tokenList, ?WithClause $with = null): Que
401402
$tokenList->expectKeyword(Keyword::SELECT);
402403

403404
$optimizerHints = null;
404-
if ($tokenList->has(TokenType::OPTIMIZER_HINT_START)) {
405+
if ($tokenList->hasSymbol(Symbol::OPTIMIZER_HINT_START)) {
405406
$optimizerHints = $this->optimizerHintParser->parseHints($tokenList->rewind(-1));
406407
}
407408

sources/Parser/Dml/UpdateCommandParser.php

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
use SqlFtw\Sql\Dml\WithClause;
1919
use SqlFtw\Sql\Expression\Operator;
2020
use SqlFtw\Sql\Keyword;
21+
use SqlFtw\Sql\Symbol;
2122

2223
class UpdateCommandParser
2324
{
@@ -55,7 +56,7 @@ public function parseUpdate(TokenList $tokenList, ?WithClause $with = null): Upd
5556
$tokenList->expectKeyword(Keyword::UPDATE);
5657

5758
$optimizerHints = null;
58-
if ($tokenList->has(TokenType::OPTIMIZER_HINT_START)) {
59+
if ($tokenList->hasSymbol(Symbol::OPTIMIZER_HINT_START)) {
5960
$optimizerHints = $this->optimizerHintParser->parseHints($tokenList->rewind(-1));
6061
}
6162

sources/Parser/ExpressionParser.php

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -85,10 +85,12 @@
8585
use SqlFtw\Sql\Order;
8686
use SqlFtw\Sql\SqlMode;
8787
use SqlFtw\Sql\SubqueryType;
88+
use SqlFtw\Sql\Symbol;
8889
use function array_values;
8990
use function count;
9091
use function in_array;
9192
use function ltrim;
93+
use function rl;
9294
use function sprintf;
9395
use function strcasecmp;
9496
use function strlen;
@@ -644,17 +646,17 @@ private function parseSimpleExpressionLeft(TokenList $tokenList): RootNode
644646
}
645647

646648
$position = $tokenList->getPosition();
647-
$token = $tokenList->expect(TokenType::VALUE | TokenType::NAME);
649+
$token = $tokenList->expect(TokenType::VALUES | TokenType::NAMES);
648650

649-
if (($token->type & TokenType::BINARY_LITERAL) !== 0) {
651+
if (($token->type & TokenType::BINARY_LITERAL) === TokenType::BINARY_LITERAL) {
650652
return new BinaryLiteral($token->value);
651653
} elseif (($token->type & TokenType::UINT) !== 0) {
652654
return new UintLiteral($token->value);
653655
} elseif (($token->type & TokenType::INT) !== 0) {
654656
return new IntLiteral($token->value);
655657
} elseif (($token->type & TokenType::NUMBER) !== 0) {
656658
return new NumericLiteral($token->value);
657-
} elseif (($token->type & TokenType::SYMBOL) !== 0 && $token->value === '\\N') {
659+
} elseif (($token->type & TokenType::SYMBOL) !== 0 && $token->value === Symbol::OLD_NULL_SYMBOL) {
658660
return new NullLiteral();
659661
} elseif (($token->type & TokenType::KEYWORD) !== 0) {
660662
$upper = strtoupper($token->value);
@@ -921,7 +923,7 @@ public function parseLiteral(TokenList $tokenList): Literal
921923
return $value;
922924
}
923925

924-
$token = $tokenList->expect(TokenType::VALUE | TokenType::KEYWORD);
926+
$token = $tokenList->expect(TokenType::VALUES | TokenType::KEYWORD);
925927

926928
if (($token->type & TokenType::KEYWORD) !== 0) {
927929
$upper = strtoupper($token->value);
@@ -942,7 +944,7 @@ public function parseLiteral(TokenList $tokenList): Literal
942944
} else {
943945
$tokenList->missingAnyKeyword(Keyword::NULL, Keyword::TRUE, Keyword::FALSE, Keyword::DEFAULT, Keyword::ON, Keyword::OFF, Keyword::ALL, Keyword::NONE);
944946
}
945-
} elseif (($token->type & TokenType::BINARY_LITERAL) !== 0) {
947+
} elseif (($token->type & TokenType::BINARY_LITERAL) === TokenType::BINARY_LITERAL) {
946948
return new BinaryLiteral($token->value);
947949
} elseif (($token->type & TokenType::UINT) !== 0) {
948950
return new UintLiteral($token->value);
@@ -1016,13 +1018,13 @@ private function parsePlaceholder(TokenList $tokenList, bool $ifAllowedAnywhere
10161018
throw new ParserException("Placeholder {$token->value} is not allowed here.", $tokenList);
10171019
}
10181020

1019-
if (($token->type & TokenType::QUESTION_MARK_PLACEHOLDER) !== 0 && (($extensions & ClientSideExtension::ALLOW_QUESTION_MARK_PLACEHOLDERS_OUTSIDE_PREPARED_STATEMENTS) !== 0 || $tokenList->inPrepared())) {
1021+
if (($token->type & TokenType::QUESTION_MARK_PLACEHOLDER) === TokenType::QUESTION_MARK_PLACEHOLDER && (($extensions & ClientSideExtension::ALLOW_QUESTION_MARK_PLACEHOLDERS_OUTSIDE_PREPARED_STATEMENTS) !== 0 || $tokenList->inPrepared())) {
10201022
// param_marker
10211023
return new QuestionMarkPlaceholder();
1022-
} elseif (($token->type & TokenType::NUMBERED_QUESTION_MARK_PLACEHOLDER) !== 0 && ($extensions & ClientSideExtension::ALLOW_NUMBERED_QUESTION_MARK_PLACEHOLDERS) !== 0) {
1024+
} elseif (($token->type & TokenType::NUMBERED_QUESTION_MARK_PLACEHOLDER) === TokenType::NUMBERED_QUESTION_MARK_PLACEHOLDER && ($extensions & ClientSideExtension::ALLOW_NUMBERED_QUESTION_MARK_PLACEHOLDERS) !== 0) {
10231025
// ?123
10241026
return new NumberedQuestionMarkPlaceholder($token->value);
1025-
} elseif (($token->type & TokenType::DOUBLE_COLON_PLACEHOLDER) !== 0 && ($extensions & ClientSideExtension::ALLOW_NAMED_DOUBLE_COLON_PLACEHOLDERS) !== 0) {
1027+
} elseif (($token->type & TokenType::DOUBLE_COLON_PLACEHOLDER) === TokenType::DOUBLE_COLON_PLACEHOLDER && ($extensions & ClientSideExtension::ALLOW_NAMED_DOUBLE_COLON_PLACEHOLDERS) !== 0) {
10261028
// :var
10271029
return new DoubleColonPlaceholder($token->value);
10281030
} else {
@@ -1082,7 +1084,7 @@ public function parseOrderBy(TokenList $tokenList, bool $nameOnly = false): arra
10821084
public function parseLimitOrOffsetValue(TokenList $tokenList)
10831085
{
10841086
if ($tokenList->inRoutine() !== null) {
1085-
$token = $tokenList->get(TokenType::NAME, TokenType::AT_VARIABLE);
1087+
$token = $tokenList->get(TokenType::QUOTED_NAME | TokenType::UNQUOTED_NAME);
10861088
if ($token !== null) {
10871089
return new SimpleName($token->value);
10881090
}

0 commit comments

Comments
 (0)