sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 return lambda self, this: self._parse_escape( 47 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 48 ) 49 50 51def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 52 # Default argument order is base, expression 53 this = seq_get(args, 0) 54 expression = seq_get(args, 1) 55 56 if expression: 57 if not dialect.LOG_BASE_FIRST: 58 this, expression = expression, this 59 return exp.Log(this=this, expression=expression) 60 61 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 62 63 64def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 65 def _builder(args: t.List, dialect: Dialect) -> E: 66 expression = expr_type( 67 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 68 ) 69 if len(args) > 2 and expr_type is exp.JSONExtract: 70 expression.set("expressions", args[2:]) 71 72 return expression 73 74 return _builder 75 76 77def build_mod(args: t.List) -> exp.Mod: 78 this = seq_get(args, 0) 79 expression = seq_get(args, 1) 80 81 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 82 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 83 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 84 85 return exp.Mod(this=this, expression=expression) 86 87 88class _Parser(type): 89 def __new__(cls, clsname, bases, attrs): 90 klass = super().__new__(cls, clsname, bases, attrs) 91 92 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 93 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 94 95 return klass 96 97 98class Parser(metaclass=_Parser): 99 """ 100 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 101 102 Args: 103 error_level: The desired error level. 104 Default: ErrorLevel.IMMEDIATE 105 error_message_context: The amount of context to capture from a query string when displaying 106 the error message (in number of characters). 107 Default: 100 108 max_errors: Maximum number of error messages to include in a raised ParseError. 109 This is only relevant if error_level is ErrorLevel.RAISE. 110 Default: 3 111 """ 112 113 FUNCTIONS: t.Dict[str, t.Callable] = { 114 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 115 "CONCAT": lambda args, dialect: exp.Concat( 116 expressions=args, 117 safe=not dialect.STRICT_STRING_CONCAT, 118 coalesce=dialect.CONCAT_COALESCE, 119 ), 120 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 121 expressions=args, 122 safe=not dialect.STRICT_STRING_CONCAT, 123 coalesce=dialect.CONCAT_COALESCE, 124 ), 125 "DATE_TO_DATE_STR": lambda args: exp.Cast( 126 this=seq_get(args, 0), 127 to=exp.DataType(this=exp.DataType.Type.TEXT), 128 ), 129 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 130 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 131 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 132 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 133 "LIKE": build_like, 134 "LOG": build_logarithm, 135 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 136 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 137 "MOD": build_mod, 138 "TIME_TO_TIME_STR": lambda args: exp.Cast( 139 this=seq_get(args, 0), 140 to=exp.DataType(this=exp.DataType.Type.TEXT), 141 ), 142 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 143 this=exp.Cast( 144 this=seq_get(args, 0), 145 to=exp.DataType(this=exp.DataType.Type.TEXT), 146 ), 147 start=exp.Literal.number(1), 148 length=exp.Literal.number(10), 149 ), 150 "VAR_MAP": build_var_map, 151 } 152 153 NO_PAREN_FUNCTIONS = { 154 TokenType.CURRENT_DATE: exp.CurrentDate, 155 TokenType.CURRENT_DATETIME: exp.CurrentDate, 156 TokenType.CURRENT_TIME: exp.CurrentTime, 157 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 158 TokenType.CURRENT_USER: exp.CurrentUser, 159 } 160 161 STRUCT_TYPE_TOKENS = { 162 TokenType.NESTED, 163 TokenType.OBJECT, 164 TokenType.STRUCT, 165 } 166 167 NESTED_TYPE_TOKENS = { 168 TokenType.ARRAY, 169 TokenType.LOWCARDINALITY, 170 TokenType.MAP, 171 TokenType.NULLABLE, 172 *STRUCT_TYPE_TOKENS, 173 } 174 175 ENUM_TYPE_TOKENS = { 176 TokenType.ENUM, 177 TokenType.ENUM8, 178 TokenType.ENUM16, 179 } 180 181 AGGREGATE_TYPE_TOKENS = { 182 TokenType.AGGREGATEFUNCTION, 183 TokenType.SIMPLEAGGREGATEFUNCTION, 184 } 185 186 TYPE_TOKENS = { 187 TokenType.BIT, 188 TokenType.BOOLEAN, 189 TokenType.TINYINT, 190 TokenType.UTINYINT, 191 TokenType.SMALLINT, 192 TokenType.USMALLINT, 193 TokenType.INT, 194 TokenType.UINT, 195 TokenType.BIGINT, 196 TokenType.UBIGINT, 197 TokenType.INT128, 198 TokenType.UINT128, 199 TokenType.INT256, 200 TokenType.UINT256, 201 TokenType.MEDIUMINT, 202 TokenType.UMEDIUMINT, 203 TokenType.FIXEDSTRING, 204 TokenType.FLOAT, 205 TokenType.DOUBLE, 206 TokenType.CHAR, 207 TokenType.NCHAR, 208 TokenType.VARCHAR, 209 TokenType.NVARCHAR, 210 TokenType.BPCHAR, 211 TokenType.TEXT, 212 TokenType.MEDIUMTEXT, 213 TokenType.LONGTEXT, 214 TokenType.MEDIUMBLOB, 215 TokenType.LONGBLOB, 216 TokenType.BINARY, 217 TokenType.VARBINARY, 218 TokenType.JSON, 219 TokenType.JSONB, 220 TokenType.INTERVAL, 221 TokenType.TINYBLOB, 222 TokenType.TINYTEXT, 223 TokenType.TIME, 224 TokenType.TIMETZ, 225 TokenType.TIMESTAMP, 226 TokenType.TIMESTAMP_S, 227 TokenType.TIMESTAMP_MS, 228 TokenType.TIMESTAMP_NS, 229 TokenType.TIMESTAMPTZ, 230 TokenType.TIMESTAMPLTZ, 231 TokenType.TIMESTAMPNTZ, 232 TokenType.DATETIME, 233 TokenType.DATETIME64, 234 TokenType.DATE, 235 TokenType.DATE32, 236 TokenType.INT4RANGE, 237 TokenType.INT4MULTIRANGE, 238 TokenType.INT8RANGE, 239 TokenType.INT8MULTIRANGE, 240 TokenType.NUMRANGE, 241 TokenType.NUMMULTIRANGE, 242 TokenType.TSRANGE, 243 TokenType.TSMULTIRANGE, 244 TokenType.TSTZRANGE, 245 TokenType.TSTZMULTIRANGE, 246 TokenType.DATERANGE, 247 TokenType.DATEMULTIRANGE, 248 TokenType.DECIMAL, 249 TokenType.UDECIMAL, 250 TokenType.BIGDECIMAL, 251 TokenType.UUID, 252 TokenType.GEOGRAPHY, 253 TokenType.GEOMETRY, 254 TokenType.HLLSKETCH, 255 TokenType.HSTORE, 256 TokenType.PSEUDO_TYPE, 257 TokenType.SUPER, 258 TokenType.SERIAL, 259 TokenType.SMALLSERIAL, 260 TokenType.BIGSERIAL, 261 TokenType.XML, 262 TokenType.YEAR, 263 TokenType.UNIQUEIDENTIFIER, 264 TokenType.USERDEFINED, 265 TokenType.MONEY, 266 TokenType.SMALLMONEY, 267 TokenType.ROWVERSION, 268 TokenType.IMAGE, 269 TokenType.VARIANT, 270 TokenType.OBJECT, 271 TokenType.OBJECT_IDENTIFIER, 272 TokenType.INET, 273 TokenType.IPADDRESS, 274 TokenType.IPPREFIX, 275 TokenType.IPV4, 276 TokenType.IPV6, 277 TokenType.UNKNOWN, 278 TokenType.NULL, 279 TokenType.NAME, 280 TokenType.TDIGEST, 281 *ENUM_TYPE_TOKENS, 282 *NESTED_TYPE_TOKENS, 283 *AGGREGATE_TYPE_TOKENS, 284 } 285 286 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 287 TokenType.BIGINT: TokenType.UBIGINT, 288 TokenType.INT: TokenType.UINT, 289 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 290 TokenType.SMALLINT: TokenType.USMALLINT, 291 TokenType.TINYINT: TokenType.UTINYINT, 292 TokenType.DECIMAL: TokenType.UDECIMAL, 293 } 294 295 SUBQUERY_PREDICATES = { 296 TokenType.ANY: exp.Any, 297 TokenType.ALL: exp.All, 298 TokenType.EXISTS: exp.Exists, 299 TokenType.SOME: exp.Any, 300 } 301 302 RESERVED_TOKENS = { 303 *Tokenizer.SINGLE_TOKENS.values(), 304 TokenType.SELECT, 305 } - {TokenType.IDENTIFIER} 306 307 DB_CREATABLES = { 308 TokenType.DATABASE, 309 TokenType.SCHEMA, 310 TokenType.TABLE, 311 TokenType.VIEW, 312 TokenType.MODEL, 313 TokenType.DICTIONARY, 314 TokenType.SEQUENCE, 315 TokenType.STORAGE_INTEGRATION, 316 } 317 318 CREATABLES = { 319 TokenType.COLUMN, 320 TokenType.CONSTRAINT, 321 TokenType.FUNCTION, 322 TokenType.INDEX, 323 TokenType.PROCEDURE, 324 TokenType.FOREIGN_KEY, 325 *DB_CREATABLES, 326 } 327 328 # Tokens that can represent identifiers 329 ID_VAR_TOKENS = { 330 TokenType.VAR, 331 TokenType.ANTI, 332 TokenType.APPLY, 333 TokenType.ASC, 334 TokenType.ASOF, 335 TokenType.AUTO_INCREMENT, 336 TokenType.BEGIN, 337 TokenType.BPCHAR, 338 TokenType.CACHE, 339 TokenType.CASE, 340 TokenType.COLLATE, 341 TokenType.COMMAND, 342 TokenType.COMMENT, 343 TokenType.COMMIT, 344 TokenType.CONSTRAINT, 345 TokenType.COPY, 346 TokenType.DEFAULT, 347 TokenType.DELETE, 348 TokenType.DESC, 349 TokenType.DESCRIBE, 350 TokenType.DICTIONARY, 351 TokenType.DIV, 352 TokenType.END, 353 TokenType.EXECUTE, 354 TokenType.ESCAPE, 355 TokenType.FALSE, 356 TokenType.FIRST, 357 TokenType.FILTER, 358 TokenType.FINAL, 359 TokenType.FORMAT, 360 TokenType.FULL, 361 TokenType.IDENTIFIER, 362 TokenType.IS, 363 TokenType.ISNULL, 364 TokenType.INTERVAL, 365 TokenType.KEEP, 366 TokenType.KILL, 367 TokenType.LEFT, 368 TokenType.LOAD, 369 TokenType.MERGE, 370 TokenType.NATURAL, 371 TokenType.NEXT, 372 TokenType.OFFSET, 373 TokenType.OPERATOR, 374 TokenType.ORDINALITY, 375 TokenType.OVERLAPS, 376 TokenType.OVERWRITE, 377 TokenType.PARTITION, 378 TokenType.PERCENT, 379 TokenType.PIVOT, 380 TokenType.PRAGMA, 381 TokenType.RANGE, 382 TokenType.RECURSIVE, 383 TokenType.REFERENCES, 384 TokenType.REFRESH, 385 TokenType.REPLACE, 386 TokenType.RIGHT, 387 TokenType.ROW, 388 TokenType.ROWS, 389 TokenType.SEMI, 390 TokenType.SET, 391 TokenType.SETTINGS, 392 TokenType.SHOW, 393 TokenType.TEMPORARY, 394 TokenType.TOP, 395 TokenType.TRUE, 396 TokenType.TRUNCATE, 397 TokenType.UNIQUE, 398 TokenType.UNPIVOT, 399 TokenType.UPDATE, 400 TokenType.USE, 401 TokenType.VOLATILE, 402 TokenType.WINDOW, 403 *CREATABLES, 404 *SUBQUERY_PREDICATES, 405 *TYPE_TOKENS, 406 *NO_PAREN_FUNCTIONS, 407 } 408 409 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 410 411 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 412 TokenType.ANTI, 413 TokenType.APPLY, 414 TokenType.ASOF, 415 TokenType.FULL, 416 TokenType.LEFT, 417 TokenType.LOCK, 418 TokenType.NATURAL, 419 TokenType.OFFSET, 420 TokenType.RIGHT, 421 TokenType.SEMI, 422 TokenType.WINDOW, 423 } 424 425 ALIAS_TOKENS = ID_VAR_TOKENS 426 427 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 428 429 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 430 431 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 432 433 FUNC_TOKENS = { 434 TokenType.COLLATE, 435 TokenType.COMMAND, 436 TokenType.CURRENT_DATE, 437 TokenType.CURRENT_DATETIME, 438 TokenType.CURRENT_TIMESTAMP, 439 TokenType.CURRENT_TIME, 440 TokenType.CURRENT_USER, 441 TokenType.FILTER, 442 TokenType.FIRST, 443 TokenType.FORMAT, 444 TokenType.GLOB, 445 TokenType.IDENTIFIER, 446 TokenType.INDEX, 447 TokenType.ISNULL, 448 TokenType.ILIKE, 449 TokenType.INSERT, 450 TokenType.LIKE, 451 TokenType.MERGE, 452 TokenType.OFFSET, 453 TokenType.PRIMARY_KEY, 454 TokenType.RANGE, 455 TokenType.REPLACE, 456 TokenType.RLIKE, 457 TokenType.ROW, 458 TokenType.UNNEST, 459 TokenType.VAR, 460 TokenType.LEFT, 461 TokenType.RIGHT, 462 TokenType.SEQUENCE, 463 TokenType.DATE, 464 TokenType.DATETIME, 465 TokenType.TABLE, 466 TokenType.TIMESTAMP, 467 TokenType.TIMESTAMPTZ, 468 TokenType.TRUNCATE, 469 TokenType.WINDOW, 470 TokenType.XOR, 471 *TYPE_TOKENS, 472 *SUBQUERY_PREDICATES, 473 } 474 475 CONJUNCTION = { 476 TokenType.AND: exp.And, 477 TokenType.OR: exp.Or, 478 } 479 480 EQUALITY = { 481 TokenType.COLON_EQ: exp.PropertyEQ, 482 TokenType.EQ: exp.EQ, 483 TokenType.NEQ: exp.NEQ, 484 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 485 } 486 487 COMPARISON = { 488 TokenType.GT: exp.GT, 489 TokenType.GTE: exp.GTE, 490 TokenType.LT: exp.LT, 491 TokenType.LTE: exp.LTE, 492 } 493 494 BITWISE = { 495 TokenType.AMP: exp.BitwiseAnd, 496 TokenType.CARET: exp.BitwiseXor, 497 TokenType.PIPE: exp.BitwiseOr, 498 } 499 500 TERM = { 501 TokenType.DASH: exp.Sub, 502 TokenType.PLUS: exp.Add, 503 TokenType.MOD: exp.Mod, 504 TokenType.COLLATE: exp.Collate, 505 } 506 507 FACTOR = { 508 TokenType.DIV: exp.IntDiv, 509 TokenType.LR_ARROW: exp.Distance, 510 TokenType.SLASH: exp.Div, 511 TokenType.STAR: exp.Mul, 512 } 513 514 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 515 516 TIMES = { 517 TokenType.TIME, 518 TokenType.TIMETZ, 519 } 520 521 TIMESTAMPS = { 522 TokenType.TIMESTAMP, 523 TokenType.TIMESTAMPTZ, 524 TokenType.TIMESTAMPLTZ, 525 *TIMES, 526 } 527 528 SET_OPERATIONS = { 529 TokenType.UNION, 530 TokenType.INTERSECT, 531 TokenType.EXCEPT, 532 } 533 534 JOIN_METHODS = { 535 TokenType.ASOF, 536 TokenType.NATURAL, 537 TokenType.POSITIONAL, 538 } 539 540 JOIN_SIDES = { 541 TokenType.LEFT, 542 TokenType.RIGHT, 543 TokenType.FULL, 544 } 545 546 JOIN_KINDS = { 547 TokenType.INNER, 548 TokenType.OUTER, 549 TokenType.CROSS, 550 TokenType.SEMI, 551 TokenType.ANTI, 552 } 553 554 JOIN_HINTS: t.Set[str] = set() 555 556 LAMBDAS = { 557 TokenType.ARROW: lambda self, expressions: self.expression( 558 exp.Lambda, 559 this=self._replace_lambda( 560 self._parse_conjunction(), 561 {node.name for node in expressions}, 562 ), 563 expressions=expressions, 564 ), 565 TokenType.FARROW: lambda self, expressions: self.expression( 566 exp.Kwarg, 567 this=exp.var(expressions[0].name), 568 expression=self._parse_conjunction(), 569 ), 570 } 571 572 COLUMN_OPERATORS = { 573 TokenType.DOT: None, 574 TokenType.DCOLON: lambda self, this, to: self.expression( 575 exp.Cast if self.STRICT_CAST else exp.TryCast, 576 this=this, 577 to=to, 578 ), 579 TokenType.ARROW: lambda self, this, path: self.expression( 580 exp.JSONExtract, 581 this=this, 582 expression=self.dialect.to_json_path(path), 583 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 584 ), 585 TokenType.DARROW: lambda self, this, path: self.expression( 586 exp.JSONExtractScalar, 587 this=this, 588 expression=self.dialect.to_json_path(path), 589 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 590 ), 591 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 592 exp.JSONBExtract, 593 this=this, 594 expression=path, 595 ), 596 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 597 exp.JSONBExtractScalar, 598 this=this, 599 expression=path, 600 ), 601 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 602 exp.JSONBContains, 603 this=this, 604 expression=key, 605 ), 606 } 607 608 EXPRESSION_PARSERS = { 609 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 610 exp.Column: lambda self: self._parse_column(), 611 exp.Condition: lambda self: self._parse_conjunction(), 612 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 613 exp.Expression: lambda self: self._parse_expression(), 614 exp.From: lambda self: self._parse_from(joins=True), 615 exp.Group: lambda self: self._parse_group(), 616 exp.Having: lambda self: self._parse_having(), 617 exp.Identifier: lambda self: self._parse_id_var(), 618 exp.Join: lambda self: self._parse_join(), 619 exp.Lambda: lambda self: self._parse_lambda(), 620 exp.Lateral: lambda self: self._parse_lateral(), 621 exp.Limit: lambda self: self._parse_limit(), 622 exp.Offset: lambda self: self._parse_offset(), 623 exp.Order: lambda self: self._parse_order(), 624 exp.Ordered: lambda self: self._parse_ordered(), 625 exp.Properties: lambda self: self._parse_properties(), 626 exp.Qualify: lambda self: self._parse_qualify(), 627 exp.Returning: lambda self: self._parse_returning(), 628 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 629 exp.Table: lambda self: self._parse_table_parts(), 630 exp.TableAlias: lambda self: self._parse_table_alias(), 631 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 632 exp.Where: lambda self: self._parse_where(), 633 exp.Window: lambda self: self._parse_named_window(), 634 exp.With: lambda self: self._parse_with(), 635 "JOIN_TYPE": lambda self: self._parse_join_parts(), 636 } 637 638 STATEMENT_PARSERS = { 639 TokenType.ALTER: lambda self: self._parse_alter(), 640 TokenType.BEGIN: lambda self: self._parse_transaction(), 641 TokenType.CACHE: lambda self: self._parse_cache(), 642 TokenType.COMMENT: lambda self: self._parse_comment(), 643 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 644 TokenType.COPY: lambda self: self._parse_copy(), 645 TokenType.CREATE: lambda self: self._parse_create(), 646 TokenType.DELETE: lambda self: self._parse_delete(), 647 TokenType.DESC: lambda self: self._parse_describe(), 648 TokenType.DESCRIBE: lambda self: self._parse_describe(), 649 TokenType.DROP: lambda self: self._parse_drop(), 650 TokenType.INSERT: lambda self: self._parse_insert(), 651 TokenType.KILL: lambda self: self._parse_kill(), 652 TokenType.LOAD: lambda self: self._parse_load(), 653 TokenType.MERGE: lambda self: self._parse_merge(), 654 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 655 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 656 TokenType.REFRESH: lambda self: self._parse_refresh(), 657 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 658 TokenType.SET: lambda self: self._parse_set(), 659 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 660 TokenType.UNCACHE: lambda self: self._parse_uncache(), 661 TokenType.UPDATE: lambda self: self._parse_update(), 662 TokenType.USE: lambda self: self.expression( 663 exp.Use, 664 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 665 this=self._parse_table(schema=False), 666 ), 667 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 668 } 669 670 UNARY_PARSERS = { 671 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 672 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 673 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 674 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 675 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 676 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 677 } 678 679 STRING_PARSERS = { 680 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 681 exp.RawString, this=token.text 682 ), 683 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 684 exp.National, this=token.text 685 ), 686 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 687 TokenType.STRING: lambda self, token: self.expression( 688 exp.Literal, this=token.text, is_string=True 689 ), 690 TokenType.UNICODE_STRING: lambda self, token: self.expression( 691 exp.UnicodeString, 692 this=token.text, 693 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 694 ), 695 } 696 697 NUMERIC_PARSERS = { 698 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 699 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 700 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 701 TokenType.NUMBER: lambda self, token: self.expression( 702 exp.Literal, this=token.text, is_string=False 703 ), 704 } 705 706 PRIMARY_PARSERS = { 707 **STRING_PARSERS, 708 **NUMERIC_PARSERS, 709 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 710 TokenType.NULL: lambda self, _: self.expression(exp.Null), 711 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 712 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 713 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 714 TokenType.STAR: lambda self, _: self.expression( 715 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 716 ), 717 } 718 719 PLACEHOLDER_PARSERS = { 720 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 721 TokenType.PARAMETER: lambda self: self._parse_parameter(), 722 TokenType.COLON: lambda self: ( 723 self.expression(exp.Placeholder, this=self._prev.text) 724 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 725 else None 726 ), 727 } 728 729 RANGE_PARSERS = { 730 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 731 TokenType.GLOB: binary_range_parser(exp.Glob), 732 TokenType.ILIKE: binary_range_parser(exp.ILike), 733 TokenType.IN: lambda self, this: self._parse_in(this), 734 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 735 TokenType.IS: lambda self, this: self._parse_is(this), 736 TokenType.LIKE: binary_range_parser(exp.Like), 737 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 738 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 739 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 740 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 741 } 742 743 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 744 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 745 "AUTO": lambda self: self._parse_auto_property(), 746 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 747 "BACKUP": lambda self: self.expression( 748 exp.BackupProperty, this=self._parse_var(any_token=True) 749 ), 750 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 751 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 752 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 753 "CHECKSUM": lambda self: self._parse_checksum(), 754 "CLUSTER BY": lambda self: self._parse_cluster(), 755 "CLUSTERED": lambda self: self._parse_clustered_by(), 756 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 757 exp.CollateProperty, **kwargs 758 ), 759 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 760 "CONTAINS": lambda self: self._parse_contains_property(), 761 "COPY": lambda self: self._parse_copy_property(), 762 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 763 "DEFINER": lambda self: self._parse_definer(), 764 "DETERMINISTIC": lambda self: self.expression( 765 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 766 ), 767 "DISTKEY": lambda self: self._parse_distkey(), 768 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 769 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 770 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 771 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 772 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 773 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 774 "FREESPACE": lambda self: self._parse_freespace(), 775 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 776 "HEAP": lambda self: self.expression(exp.HeapProperty), 777 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 778 "IMMUTABLE": lambda self: self.expression( 779 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 780 ), 781 "INHERITS": lambda self: self.expression( 782 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 783 ), 784 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 785 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 786 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 787 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 788 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 789 "LIKE": lambda self: self._parse_create_like(), 790 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 791 "LOCK": lambda self: self._parse_locking(), 792 "LOCKING": lambda self: self._parse_locking(), 793 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 794 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 795 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 796 "MODIFIES": lambda self: self._parse_modifies_property(), 797 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 798 "NO": lambda self: self._parse_no_property(), 799 "ON": lambda self: self._parse_on_property(), 800 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 801 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 802 "PARTITION": lambda self: self._parse_partitioned_of(), 803 "PARTITION BY": lambda self: self._parse_partitioned_by(), 804 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 805 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 806 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 807 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 808 "READS": lambda self: self._parse_reads_property(), 809 "REMOTE": lambda self: self._parse_remote_with_connection(), 810 "RETURNS": lambda self: self._parse_returns(), 811 "ROW": lambda self: self._parse_row(), 812 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 813 "SAMPLE": lambda self: self.expression( 814 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 815 ), 816 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 817 "SETTINGS": lambda self: self.expression( 818 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 819 ), 820 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 821 "SORTKEY": lambda self: self._parse_sortkey(), 822 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 823 "STABLE": lambda self: self.expression( 824 exp.StabilityProperty, this=exp.Literal.string("STABLE") 825 ), 826 "STORED": lambda self: self._parse_stored(), 827 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 828 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 829 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 830 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 831 "TO": lambda self: self._parse_to_table(), 832 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 833 "TRANSFORM": lambda self: self.expression( 834 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 835 ), 836 "TTL": lambda self: self._parse_ttl(), 837 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 838 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 839 "VOLATILE": lambda self: self._parse_volatile_property(), 840 "WITH": lambda self: self._parse_with_property(), 841 } 842 843 CONSTRAINT_PARSERS = { 844 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 845 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 846 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 847 "CHARACTER SET": lambda self: self.expression( 848 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 849 ), 850 "CHECK": lambda self: self.expression( 851 exp.CheckColumnConstraint, 852 this=self._parse_wrapped(self._parse_conjunction), 853 enforced=self._match_text_seq("ENFORCED"), 854 ), 855 "COLLATE": lambda self: self.expression( 856 exp.CollateColumnConstraint, this=self._parse_var() 857 ), 858 "COMMENT": lambda self: self.expression( 859 exp.CommentColumnConstraint, this=self._parse_string() 860 ), 861 "COMPRESS": lambda self: self._parse_compress(), 862 "CLUSTERED": lambda self: self.expression( 863 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 864 ), 865 "NONCLUSTERED": lambda self: self.expression( 866 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 867 ), 868 "DEFAULT": lambda self: self.expression( 869 exp.DefaultColumnConstraint, this=self._parse_bitwise() 870 ), 871 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 872 "EPHEMERAL": lambda self: self.expression( 873 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 874 ), 875 "EXCLUDE": lambda self: self.expression( 876 exp.ExcludeColumnConstraint, this=self._parse_index_params() 877 ), 878 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 879 "FORMAT": lambda self: self.expression( 880 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 881 ), 882 "GENERATED": lambda self: self._parse_generated_as_identity(), 883 "IDENTITY": lambda self: self._parse_auto_increment(), 884 "INLINE": lambda self: self._parse_inline(), 885 "LIKE": lambda self: self._parse_create_like(), 886 "NOT": lambda self: self._parse_not_constraint(), 887 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 888 "ON": lambda self: ( 889 self._match(TokenType.UPDATE) 890 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 891 ) 892 or self.expression(exp.OnProperty, this=self._parse_id_var()), 893 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 894 "PERIOD": lambda self: self._parse_period_for_system_time(), 895 "PRIMARY KEY": lambda self: self._parse_primary_key(), 896 "REFERENCES": lambda self: self._parse_references(match=False), 897 "TITLE": lambda self: self.expression( 898 exp.TitleColumnConstraint, this=self._parse_var_or_string() 899 ), 900 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 901 "UNIQUE": lambda self: self._parse_unique(), 902 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 903 "WITH": lambda self: self.expression( 904 exp.Properties, expressions=self._parse_wrapped_properties() 905 ), 906 } 907 908 ALTER_PARSERS = { 909 "ADD": lambda self: self._parse_alter_table_add(), 910 "ALTER": lambda self: self._parse_alter_table_alter(), 911 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 912 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 913 "DROP": lambda self: self._parse_alter_table_drop(), 914 "RENAME": lambda self: self._parse_alter_table_rename(), 915 } 916 917 ALTER_ALTER_PARSERS = { 918 "DISTKEY": lambda self: self._parse_alter_diststyle(), 919 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 920 "SORTKEY": lambda self: self._parse_alter_sortkey(), 921 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 922 } 923 924 SCHEMA_UNNAMED_CONSTRAINTS = { 925 "CHECK", 926 "EXCLUDE", 927 "FOREIGN KEY", 928 "LIKE", 929 "PERIOD", 930 "PRIMARY KEY", 931 "UNIQUE", 932 } 933 934 NO_PAREN_FUNCTION_PARSERS = { 935 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 936 "CASE": lambda self: self._parse_case(), 937 "IF": lambda self: self._parse_if(), 938 "NEXT": lambda self: self._parse_next_value_for(), 939 } 940 941 INVALID_FUNC_NAME_TOKENS = { 942 TokenType.IDENTIFIER, 943 TokenType.STRING, 944 } 945 946 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 947 948 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 949 950 FUNCTION_PARSERS = { 951 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 952 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 953 "DECODE": lambda self: self._parse_decode(), 954 "EXTRACT": lambda self: self._parse_extract(), 955 "JSON_OBJECT": lambda self: self._parse_json_object(), 956 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 957 "JSON_TABLE": lambda self: self._parse_json_table(), 958 "MATCH": lambda self: self._parse_match_against(), 959 "OPENJSON": lambda self: self._parse_open_json(), 960 "POSITION": lambda self: self._parse_position(), 961 "PREDICT": lambda self: self._parse_predict(), 962 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 963 "STRING_AGG": lambda self: self._parse_string_agg(), 964 "SUBSTRING": lambda self: self._parse_substring(), 965 "TRIM": lambda self: self._parse_trim(), 966 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 967 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 968 } 969 970 QUERY_MODIFIER_PARSERS = { 971 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 972 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 973 TokenType.WHERE: lambda self: ("where", self._parse_where()), 974 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 975 TokenType.HAVING: lambda self: ("having", self._parse_having()), 976 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 977 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 978 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 979 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 980 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 981 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 982 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 983 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 984 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 985 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 986 TokenType.CLUSTER_BY: lambda self: ( 987 "cluster", 988 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 989 ), 990 TokenType.DISTRIBUTE_BY: lambda self: ( 991 "distribute", 992 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 993 ), 994 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 995 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 996 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 997 } 998 999 SET_PARSERS = { 1000 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1001 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1002 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1003 "TRANSACTION": lambda self: self._parse_set_transaction(), 1004 } 1005 1006 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1007 1008 TYPE_LITERAL_PARSERS = { 1009 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1010 } 1011 1012 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1013 1014 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1015 1016 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1017 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1018 "ISOLATION": ( 1019 ("LEVEL", "REPEATABLE", "READ"), 1020 ("LEVEL", "READ", "COMMITTED"), 1021 ("LEVEL", "READ", "UNCOMITTED"), 1022 ("LEVEL", "SERIALIZABLE"), 1023 ), 1024 "READ": ("WRITE", "ONLY"), 1025 } 1026 1027 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1028 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1029 ) 1030 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1031 1032 CREATE_SEQUENCE: OPTIONS_TYPE = { 1033 "SCALE": ("EXTEND", "NOEXTEND"), 1034 "SHARD": ("EXTEND", "NOEXTEND"), 1035 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1036 **dict.fromkeys( 1037 ( 1038 "SESSION", 1039 "GLOBAL", 1040 "KEEP", 1041 "NOKEEP", 1042 "ORDER", 1043 "NOORDER", 1044 "NOCACHE", 1045 "CYCLE", 1046 "NOCYCLE", 1047 "NOMINVALUE", 1048 "NOMAXVALUE", 1049 "NOSCALE", 1050 "NOSHARD", 1051 ), 1052 tuple(), 1053 ), 1054 } 1055 1056 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1057 1058 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1059 1060 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1061 1062 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1063 1064 CLONE_KEYWORDS = {"CLONE", "COPY"} 1065 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1066 1067 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1068 1069 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1070 1071 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1072 1073 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1074 1075 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1076 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1077 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1078 1079 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1080 1081 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1082 1083 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1084 1085 DISTINCT_TOKENS = {TokenType.DISTINCT} 1086 1087 NULL_TOKENS = {TokenType.NULL} 1088 1089 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1090 1091 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1092 1093 STRICT_CAST = True 1094 1095 PREFIXED_PIVOT_COLUMNS = False 1096 IDENTIFY_PIVOT_STRINGS = False 1097 1098 LOG_DEFAULTS_TO_LN = False 1099 1100 # Whether ADD is present for each column added by ALTER TABLE 1101 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1102 1103 # Whether the table sample clause expects CSV syntax 1104 TABLESAMPLE_CSV = False 1105 1106 # The default method used for table sampling 1107 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1108 1109 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1110 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1111 1112 # Whether the TRIM function expects the characters to trim as its first argument 1113 TRIM_PATTERN_FIRST = False 1114 1115 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1116 STRING_ALIASES = False 1117 1118 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1119 MODIFIERS_ATTACHED_TO_UNION = True 1120 UNION_MODIFIERS = {"order", "limit", "offset"} 1121 1122 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1123 NO_PAREN_IF_COMMANDS = True 1124 1125 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1126 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1127 1128 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1129 # If this is True and '(' is not found, the keyword will be treated as an identifier 1130 VALUES_FOLLOWED_BY_PAREN = True 1131 1132 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1133 SUPPORTS_IMPLICIT_UNNEST = False 1134 1135 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1136 INTERVAL_SPANS = True 1137 1138 # Whether a PARTITION clause can follow a table reference 1139 SUPPORTS_PARTITION_SELECTION = False 1140 1141 __slots__ = ( 1142 "error_level", 1143 "error_message_context", 1144 "max_errors", 1145 "dialect", 1146 "sql", 1147 "errors", 1148 "_tokens", 1149 "_index", 1150 "_curr", 1151 "_next", 1152 "_prev", 1153 "_prev_comments", 1154 ) 1155 1156 # Autofilled 1157 SHOW_TRIE: t.Dict = {} 1158 SET_TRIE: t.Dict = {} 1159 1160 def __init__( 1161 self, 1162 error_level: t.Optional[ErrorLevel] = None, 1163 error_message_context: int = 100, 1164 max_errors: int = 3, 1165 dialect: DialectType = None, 1166 ): 1167 from sqlglot.dialects import Dialect 1168 1169 self.error_level = error_level or ErrorLevel.IMMEDIATE 1170 self.error_message_context = error_message_context 1171 self.max_errors = max_errors 1172 self.dialect = Dialect.get_or_raise(dialect) 1173 self.reset() 1174 1175 def reset(self): 1176 self.sql = "" 1177 self.errors = [] 1178 self._tokens = [] 1179 self._index = 0 1180 self._curr = None 1181 self._next = None 1182 self._prev = None 1183 self._prev_comments = None 1184 1185 def parse( 1186 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1187 ) -> t.List[t.Optional[exp.Expression]]: 1188 """ 1189 Parses a list of tokens and returns a list of syntax trees, one tree 1190 per parsed SQL statement. 1191 1192 Args: 1193 raw_tokens: The list of tokens. 1194 sql: The original SQL string, used to produce helpful debug messages. 1195 1196 Returns: 1197 The list of the produced syntax trees. 1198 """ 1199 return self._parse( 1200 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1201 ) 1202 1203 def parse_into( 1204 self, 1205 expression_types: exp.IntoType, 1206 raw_tokens: t.List[Token], 1207 sql: t.Optional[str] = None, 1208 ) -> t.List[t.Optional[exp.Expression]]: 1209 """ 1210 Parses a list of tokens into a given Expression type. If a collection of Expression 1211 types is given instead, this method will try to parse the token list into each one 1212 of them, stopping at the first for which the parsing succeeds. 1213 1214 Args: 1215 expression_types: The expression type(s) to try and parse the token list into. 1216 raw_tokens: The list of tokens. 1217 sql: The original SQL string, used to produce helpful debug messages. 1218 1219 Returns: 1220 The target Expression. 1221 """ 1222 errors = [] 1223 for expression_type in ensure_list(expression_types): 1224 parser = self.EXPRESSION_PARSERS.get(expression_type) 1225 if not parser: 1226 raise TypeError(f"No parser registered for {expression_type}") 1227 1228 try: 1229 return self._parse(parser, raw_tokens, sql) 1230 except ParseError as e: 1231 e.errors[0]["into_expression"] = expression_type 1232 errors.append(e) 1233 1234 raise ParseError( 1235 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1236 errors=merge_errors(errors), 1237 ) from errors[-1] 1238 1239 def _parse( 1240 self, 1241 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1242 raw_tokens: t.List[Token], 1243 sql: t.Optional[str] = None, 1244 ) -> t.List[t.Optional[exp.Expression]]: 1245 self.reset() 1246 self.sql = sql or "" 1247 1248 total = len(raw_tokens) 1249 chunks: t.List[t.List[Token]] = [[]] 1250 1251 for i, token in enumerate(raw_tokens): 1252 if token.token_type == TokenType.SEMICOLON: 1253 if token.comments: 1254 chunks.append([token]) 1255 1256 if i < total - 1: 1257 chunks.append([]) 1258 else: 1259 chunks[-1].append(token) 1260 1261 expressions = [] 1262 1263 for tokens in chunks: 1264 self._index = -1 1265 self._tokens = tokens 1266 self._advance() 1267 1268 expressions.append(parse_method(self)) 1269 1270 if self._index < len(self._tokens): 1271 self.raise_error("Invalid expression / Unexpected token") 1272 1273 self.check_errors() 1274 1275 return expressions 1276 1277 def check_errors(self) -> None: 1278 """Logs or raises any found errors, depending on the chosen error level setting.""" 1279 if self.error_level == ErrorLevel.WARN: 1280 for error in self.errors: 1281 logger.error(str(error)) 1282 elif self.error_level == ErrorLevel.RAISE and self.errors: 1283 raise ParseError( 1284 concat_messages(self.errors, self.max_errors), 1285 errors=merge_errors(self.errors), 1286 ) 1287 1288 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1289 """ 1290 Appends an error in the list of recorded errors or raises it, depending on the chosen 1291 error level setting. 1292 """ 1293 token = token or self._curr or self._prev or Token.string("") 1294 start = token.start 1295 end = token.end + 1 1296 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1297 highlight = self.sql[start:end] 1298 end_context = self.sql[end : end + self.error_message_context] 1299 1300 error = ParseError.new( 1301 f"{message}. Line {token.line}, Col: {token.col}.\n" 1302 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1303 description=message, 1304 line=token.line, 1305 col=token.col, 1306 start_context=start_context, 1307 highlight=highlight, 1308 end_context=end_context, 1309 ) 1310 1311 if self.error_level == ErrorLevel.IMMEDIATE: 1312 raise error 1313 1314 self.errors.append(error) 1315 1316 def expression( 1317 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1318 ) -> E: 1319 """ 1320 Creates a new, validated Expression. 1321 1322 Args: 1323 exp_class: The expression class to instantiate. 1324 comments: An optional list of comments to attach to the expression. 1325 kwargs: The arguments to set for the expression along with their respective values. 1326 1327 Returns: 1328 The target expression. 1329 """ 1330 instance = exp_class(**kwargs) 1331 instance.add_comments(comments) if comments else self._add_comments(instance) 1332 return self.validate_expression(instance) 1333 1334 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1335 if expression and self._prev_comments: 1336 expression.add_comments(self._prev_comments) 1337 self._prev_comments = None 1338 1339 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1340 """ 1341 Validates an Expression, making sure that all its mandatory arguments are set. 1342 1343 Args: 1344 expression: The expression to validate. 1345 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1346 1347 Returns: 1348 The validated expression. 1349 """ 1350 if self.error_level != ErrorLevel.IGNORE: 1351 for error_message in expression.error_messages(args): 1352 self.raise_error(error_message) 1353 1354 return expression 1355 1356 def _find_sql(self, start: Token, end: Token) -> str: 1357 return self.sql[start.start : end.end + 1] 1358 1359 def _is_connected(self) -> bool: 1360 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1361 1362 def _advance(self, times: int = 1) -> None: 1363 self._index += times 1364 self._curr = seq_get(self._tokens, self._index) 1365 self._next = seq_get(self._tokens, self._index + 1) 1366 1367 if self._index > 0: 1368 self._prev = self._tokens[self._index - 1] 1369 self._prev_comments = self._prev.comments 1370 else: 1371 self._prev = None 1372 self._prev_comments = None 1373 1374 def _retreat(self, index: int) -> None: 1375 if index != self._index: 1376 self._advance(index - self._index) 1377 1378 def _warn_unsupported(self) -> None: 1379 if len(self._tokens) <= 1: 1380 return 1381 1382 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1383 # interested in emitting a warning for the one being currently processed. 1384 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1385 1386 logger.warning( 1387 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1388 ) 1389 1390 def _parse_command(self) -> exp.Command: 1391 self._warn_unsupported() 1392 return self.expression( 1393 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1394 ) 1395 1396 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1397 """ 1398 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1399 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1400 the parser state accordingly 1401 """ 1402 index = self._index 1403 error_level = self.error_level 1404 1405 self.error_level = ErrorLevel.IMMEDIATE 1406 try: 1407 this = parse_method() 1408 except ParseError: 1409 this = None 1410 finally: 1411 if not this or retreat: 1412 self._retreat(index) 1413 self.error_level = error_level 1414 1415 return this 1416 1417 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1418 start = self._prev 1419 exists = self._parse_exists() if allow_exists else None 1420 1421 self._match(TokenType.ON) 1422 1423 materialized = self._match_text_seq("MATERIALIZED") 1424 kind = self._match_set(self.CREATABLES) and self._prev 1425 if not kind: 1426 return self._parse_as_command(start) 1427 1428 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1429 this = self._parse_user_defined_function(kind=kind.token_type) 1430 elif kind.token_type == TokenType.TABLE: 1431 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1432 elif kind.token_type == TokenType.COLUMN: 1433 this = self._parse_column() 1434 else: 1435 this = self._parse_id_var() 1436 1437 self._match(TokenType.IS) 1438 1439 return self.expression( 1440 exp.Comment, 1441 this=this, 1442 kind=kind.text, 1443 expression=self._parse_string(), 1444 exists=exists, 1445 materialized=materialized, 1446 ) 1447 1448 def _parse_to_table( 1449 self, 1450 ) -> exp.ToTableProperty: 1451 table = self._parse_table_parts(schema=True) 1452 return self.expression(exp.ToTableProperty, this=table) 1453 1454 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1455 def _parse_ttl(self) -> exp.Expression: 1456 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1457 this = self._parse_bitwise() 1458 1459 if self._match_text_seq("DELETE"): 1460 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1461 if self._match_text_seq("RECOMPRESS"): 1462 return self.expression( 1463 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1464 ) 1465 if self._match_text_seq("TO", "DISK"): 1466 return self.expression( 1467 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1468 ) 1469 if self._match_text_seq("TO", "VOLUME"): 1470 return self.expression( 1471 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1472 ) 1473 1474 return this 1475 1476 expressions = self._parse_csv(_parse_ttl_action) 1477 where = self._parse_where() 1478 group = self._parse_group() 1479 1480 aggregates = None 1481 if group and self._match(TokenType.SET): 1482 aggregates = self._parse_csv(self._parse_set_item) 1483 1484 return self.expression( 1485 exp.MergeTreeTTL, 1486 expressions=expressions, 1487 where=where, 1488 group=group, 1489 aggregates=aggregates, 1490 ) 1491 1492 def _parse_statement(self) -> t.Optional[exp.Expression]: 1493 if self._curr is None: 1494 return None 1495 1496 if self._match_set(self.STATEMENT_PARSERS): 1497 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1498 1499 if self._match_set(self.dialect.tokenizer.COMMANDS): 1500 return self._parse_command() 1501 1502 expression = self._parse_expression() 1503 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1504 return self._parse_query_modifiers(expression) 1505 1506 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1507 start = self._prev 1508 temporary = self._match(TokenType.TEMPORARY) 1509 materialized = self._match_text_seq("MATERIALIZED") 1510 1511 kind = self._match_set(self.CREATABLES) and self._prev.text 1512 if not kind: 1513 return self._parse_as_command(start) 1514 1515 if_exists = exists or self._parse_exists() 1516 table = self._parse_table_parts( 1517 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1518 ) 1519 1520 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1521 1522 if self._match(TokenType.L_PAREN, advance=False): 1523 expressions = self._parse_wrapped_csv(self._parse_types) 1524 else: 1525 expressions = None 1526 1527 return self.expression( 1528 exp.Drop, 1529 comments=start.comments, 1530 exists=if_exists, 1531 this=table, 1532 expressions=expressions, 1533 kind=kind.upper(), 1534 temporary=temporary, 1535 materialized=materialized, 1536 cascade=self._match_text_seq("CASCADE"), 1537 constraints=self._match_text_seq("CONSTRAINTS"), 1538 purge=self._match_text_seq("PURGE"), 1539 cluster=cluster, 1540 ) 1541 1542 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1543 return ( 1544 self._match_text_seq("IF") 1545 and (not not_ or self._match(TokenType.NOT)) 1546 and self._match(TokenType.EXISTS) 1547 ) 1548 1549 def _parse_create(self) -> exp.Create | exp.Command: 1550 # Note: this can't be None because we've matched a statement parser 1551 start = self._prev 1552 comments = self._prev_comments 1553 1554 replace = ( 1555 start.token_type == TokenType.REPLACE 1556 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1557 or self._match_pair(TokenType.OR, TokenType.ALTER) 1558 ) 1559 1560 unique = self._match(TokenType.UNIQUE) 1561 1562 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1563 self._advance() 1564 1565 properties = None 1566 create_token = self._match_set(self.CREATABLES) and self._prev 1567 1568 if not create_token: 1569 # exp.Properties.Location.POST_CREATE 1570 properties = self._parse_properties() 1571 create_token = self._match_set(self.CREATABLES) and self._prev 1572 1573 if not properties or not create_token: 1574 return self._parse_as_command(start) 1575 1576 exists = self._parse_exists(not_=True) 1577 this = None 1578 expression: t.Optional[exp.Expression] = None 1579 indexes = None 1580 no_schema_binding = None 1581 begin = None 1582 end = None 1583 clone = None 1584 1585 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1586 nonlocal properties 1587 if properties and temp_props: 1588 properties.expressions.extend(temp_props.expressions) 1589 elif temp_props: 1590 properties = temp_props 1591 1592 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1593 this = self._parse_user_defined_function(kind=create_token.token_type) 1594 1595 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1596 extend_props(self._parse_properties()) 1597 1598 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1599 1600 if not expression: 1601 if self._match(TokenType.COMMAND): 1602 expression = self._parse_as_command(self._prev) 1603 else: 1604 begin = self._match(TokenType.BEGIN) 1605 return_ = self._match_text_seq("RETURN") 1606 1607 if self._match(TokenType.STRING, advance=False): 1608 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1609 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1610 expression = self._parse_string() 1611 extend_props(self._parse_properties()) 1612 else: 1613 expression = self._parse_statement() 1614 1615 end = self._match_text_seq("END") 1616 1617 if return_: 1618 expression = self.expression(exp.Return, this=expression) 1619 elif create_token.token_type == TokenType.INDEX: 1620 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1621 if not self._match(TokenType.ON): 1622 index = self._parse_id_var() 1623 anonymous = False 1624 else: 1625 index = None 1626 anonymous = True 1627 1628 this = self._parse_index(index=index, anonymous=anonymous) 1629 elif create_token.token_type in self.DB_CREATABLES: 1630 table_parts = self._parse_table_parts( 1631 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1632 ) 1633 1634 # exp.Properties.Location.POST_NAME 1635 self._match(TokenType.COMMA) 1636 extend_props(self._parse_properties(before=True)) 1637 1638 this = self._parse_schema(this=table_parts) 1639 1640 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1641 extend_props(self._parse_properties()) 1642 1643 self._match(TokenType.ALIAS) 1644 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1645 # exp.Properties.Location.POST_ALIAS 1646 extend_props(self._parse_properties()) 1647 1648 if create_token.token_type == TokenType.SEQUENCE: 1649 expression = self._parse_types() 1650 extend_props(self._parse_properties()) 1651 else: 1652 expression = self._parse_ddl_select() 1653 1654 if create_token.token_type == TokenType.TABLE: 1655 # exp.Properties.Location.POST_EXPRESSION 1656 extend_props(self._parse_properties()) 1657 1658 indexes = [] 1659 while True: 1660 index = self._parse_index() 1661 1662 # exp.Properties.Location.POST_INDEX 1663 extend_props(self._parse_properties()) 1664 1665 if not index: 1666 break 1667 else: 1668 self._match(TokenType.COMMA) 1669 indexes.append(index) 1670 elif create_token.token_type == TokenType.VIEW: 1671 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1672 no_schema_binding = True 1673 1674 shallow = self._match_text_seq("SHALLOW") 1675 1676 if self._match_texts(self.CLONE_KEYWORDS): 1677 copy = self._prev.text.lower() == "copy" 1678 clone = self.expression( 1679 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1680 ) 1681 1682 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1683 return self._parse_as_command(start) 1684 1685 return self.expression( 1686 exp.Create, 1687 comments=comments, 1688 this=this, 1689 kind=create_token.text.upper(), 1690 replace=replace, 1691 unique=unique, 1692 expression=expression, 1693 exists=exists, 1694 properties=properties, 1695 indexes=indexes, 1696 no_schema_binding=no_schema_binding, 1697 begin=begin, 1698 end=end, 1699 clone=clone, 1700 ) 1701 1702 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1703 seq = exp.SequenceProperties() 1704 1705 options = [] 1706 index = self._index 1707 1708 while self._curr: 1709 self._match(TokenType.COMMA) 1710 if self._match_text_seq("INCREMENT"): 1711 self._match_text_seq("BY") 1712 self._match_text_seq("=") 1713 seq.set("increment", self._parse_term()) 1714 elif self._match_text_seq("MINVALUE"): 1715 seq.set("minvalue", self._parse_term()) 1716 elif self._match_text_seq("MAXVALUE"): 1717 seq.set("maxvalue", self._parse_term()) 1718 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1719 self._match_text_seq("=") 1720 seq.set("start", self._parse_term()) 1721 elif self._match_text_seq("CACHE"): 1722 # T-SQL allows empty CACHE which is initialized dynamically 1723 seq.set("cache", self._parse_number() or True) 1724 elif self._match_text_seq("OWNED", "BY"): 1725 # "OWNED BY NONE" is the default 1726 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1727 else: 1728 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1729 if opt: 1730 options.append(opt) 1731 else: 1732 break 1733 1734 seq.set("options", options if options else None) 1735 return None if self._index == index else seq 1736 1737 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1738 # only used for teradata currently 1739 self._match(TokenType.COMMA) 1740 1741 kwargs = { 1742 "no": self._match_text_seq("NO"), 1743 "dual": self._match_text_seq("DUAL"), 1744 "before": self._match_text_seq("BEFORE"), 1745 "default": self._match_text_seq("DEFAULT"), 1746 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1747 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1748 "after": self._match_text_seq("AFTER"), 1749 "minimum": self._match_texts(("MIN", "MINIMUM")), 1750 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1751 } 1752 1753 if self._match_texts(self.PROPERTY_PARSERS): 1754 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1755 try: 1756 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1757 except TypeError: 1758 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1759 1760 return None 1761 1762 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1763 return self._parse_wrapped_csv(self._parse_property) 1764 1765 def _parse_property(self) -> t.Optional[exp.Expression]: 1766 if self._match_texts(self.PROPERTY_PARSERS): 1767 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1768 1769 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1770 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1771 1772 if self._match_text_seq("COMPOUND", "SORTKEY"): 1773 return self._parse_sortkey(compound=True) 1774 1775 if self._match_text_seq("SQL", "SECURITY"): 1776 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1777 1778 index = self._index 1779 key = self._parse_column() 1780 1781 if not self._match(TokenType.EQ): 1782 self._retreat(index) 1783 return self._parse_sequence_properties() 1784 1785 return self.expression( 1786 exp.Property, 1787 this=key.to_dot() if isinstance(key, exp.Column) else key, 1788 value=self._parse_bitwise() or self._parse_var(any_token=True), 1789 ) 1790 1791 def _parse_stored(self) -> exp.FileFormatProperty: 1792 self._match(TokenType.ALIAS) 1793 1794 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1795 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1796 1797 return self.expression( 1798 exp.FileFormatProperty, 1799 this=( 1800 self.expression( 1801 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1802 ) 1803 if input_format or output_format 1804 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1805 ), 1806 ) 1807 1808 def _parse_unquoted_field(self): 1809 field = self._parse_field() 1810 if isinstance(field, exp.Identifier) and not field.quoted: 1811 field = exp.var(field) 1812 1813 return field 1814 1815 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1816 self._match(TokenType.EQ) 1817 self._match(TokenType.ALIAS) 1818 1819 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1820 1821 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1822 properties = [] 1823 while True: 1824 if before: 1825 prop = self._parse_property_before() 1826 else: 1827 prop = self._parse_property() 1828 if not prop: 1829 break 1830 for p in ensure_list(prop): 1831 properties.append(p) 1832 1833 if properties: 1834 return self.expression(exp.Properties, expressions=properties) 1835 1836 return None 1837 1838 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1839 return self.expression( 1840 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1841 ) 1842 1843 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1844 if self._index >= 2: 1845 pre_volatile_token = self._tokens[self._index - 2] 1846 else: 1847 pre_volatile_token = None 1848 1849 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1850 return exp.VolatileProperty() 1851 1852 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1853 1854 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1855 self._match_pair(TokenType.EQ, TokenType.ON) 1856 1857 prop = self.expression(exp.WithSystemVersioningProperty) 1858 if self._match(TokenType.L_PAREN): 1859 self._match_text_seq("HISTORY_TABLE", "=") 1860 prop.set("this", self._parse_table_parts()) 1861 1862 if self._match(TokenType.COMMA): 1863 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1864 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1865 1866 self._match_r_paren() 1867 1868 return prop 1869 1870 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1871 if self._match(TokenType.L_PAREN, advance=False): 1872 return self._parse_wrapped_properties() 1873 1874 if self._match_text_seq("JOURNAL"): 1875 return self._parse_withjournaltable() 1876 1877 if self._match_texts(self.VIEW_ATTRIBUTES): 1878 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1879 1880 if self._match_text_seq("DATA"): 1881 return self._parse_withdata(no=False) 1882 elif self._match_text_seq("NO", "DATA"): 1883 return self._parse_withdata(no=True) 1884 1885 if not self._next: 1886 return None 1887 1888 return self._parse_withisolatedloading() 1889 1890 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1891 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1892 self._match(TokenType.EQ) 1893 1894 user = self._parse_id_var() 1895 self._match(TokenType.PARAMETER) 1896 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1897 1898 if not user or not host: 1899 return None 1900 1901 return exp.DefinerProperty(this=f"{user}@{host}") 1902 1903 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1904 self._match(TokenType.TABLE) 1905 self._match(TokenType.EQ) 1906 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1907 1908 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1909 return self.expression(exp.LogProperty, no=no) 1910 1911 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1912 return self.expression(exp.JournalProperty, **kwargs) 1913 1914 def _parse_checksum(self) -> exp.ChecksumProperty: 1915 self._match(TokenType.EQ) 1916 1917 on = None 1918 if self._match(TokenType.ON): 1919 on = True 1920 elif self._match_text_seq("OFF"): 1921 on = False 1922 1923 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1924 1925 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1926 return self.expression( 1927 exp.Cluster, 1928 expressions=( 1929 self._parse_wrapped_csv(self._parse_ordered) 1930 if wrapped 1931 else self._parse_csv(self._parse_ordered) 1932 ), 1933 ) 1934 1935 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1936 self._match_text_seq("BY") 1937 1938 self._match_l_paren() 1939 expressions = self._parse_csv(self._parse_column) 1940 self._match_r_paren() 1941 1942 if self._match_text_seq("SORTED", "BY"): 1943 self._match_l_paren() 1944 sorted_by = self._parse_csv(self._parse_ordered) 1945 self._match_r_paren() 1946 else: 1947 sorted_by = None 1948 1949 self._match(TokenType.INTO) 1950 buckets = self._parse_number() 1951 self._match_text_seq("BUCKETS") 1952 1953 return self.expression( 1954 exp.ClusteredByProperty, 1955 expressions=expressions, 1956 sorted_by=sorted_by, 1957 buckets=buckets, 1958 ) 1959 1960 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1961 if not self._match_text_seq("GRANTS"): 1962 self._retreat(self._index - 1) 1963 return None 1964 1965 return self.expression(exp.CopyGrantsProperty) 1966 1967 def _parse_freespace(self) -> exp.FreespaceProperty: 1968 self._match(TokenType.EQ) 1969 return self.expression( 1970 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1971 ) 1972 1973 def _parse_mergeblockratio( 1974 self, no: bool = False, default: bool = False 1975 ) -> exp.MergeBlockRatioProperty: 1976 if self._match(TokenType.EQ): 1977 return self.expression( 1978 exp.MergeBlockRatioProperty, 1979 this=self._parse_number(), 1980 percent=self._match(TokenType.PERCENT), 1981 ) 1982 1983 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1984 1985 def _parse_datablocksize( 1986 self, 1987 default: t.Optional[bool] = None, 1988 minimum: t.Optional[bool] = None, 1989 maximum: t.Optional[bool] = None, 1990 ) -> exp.DataBlocksizeProperty: 1991 self._match(TokenType.EQ) 1992 size = self._parse_number() 1993 1994 units = None 1995 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1996 units = self._prev.text 1997 1998 return self.expression( 1999 exp.DataBlocksizeProperty, 2000 size=size, 2001 units=units, 2002 default=default, 2003 minimum=minimum, 2004 maximum=maximum, 2005 ) 2006 2007 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2008 self._match(TokenType.EQ) 2009 always = self._match_text_seq("ALWAYS") 2010 manual = self._match_text_seq("MANUAL") 2011 never = self._match_text_seq("NEVER") 2012 default = self._match_text_seq("DEFAULT") 2013 2014 autotemp = None 2015 if self._match_text_seq("AUTOTEMP"): 2016 autotemp = self._parse_schema() 2017 2018 return self.expression( 2019 exp.BlockCompressionProperty, 2020 always=always, 2021 manual=manual, 2022 never=never, 2023 default=default, 2024 autotemp=autotemp, 2025 ) 2026 2027 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2028 index = self._index 2029 no = self._match_text_seq("NO") 2030 concurrent = self._match_text_seq("CONCURRENT") 2031 2032 if not self._match_text_seq("ISOLATED", "LOADING"): 2033 self._retreat(index) 2034 return None 2035 2036 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2037 return self.expression( 2038 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2039 ) 2040 2041 def _parse_locking(self) -> exp.LockingProperty: 2042 if self._match(TokenType.TABLE): 2043 kind = "TABLE" 2044 elif self._match(TokenType.VIEW): 2045 kind = "VIEW" 2046 elif self._match(TokenType.ROW): 2047 kind = "ROW" 2048 elif self._match_text_seq("DATABASE"): 2049 kind = "DATABASE" 2050 else: 2051 kind = None 2052 2053 if kind in ("DATABASE", "TABLE", "VIEW"): 2054 this = self._parse_table_parts() 2055 else: 2056 this = None 2057 2058 if self._match(TokenType.FOR): 2059 for_or_in = "FOR" 2060 elif self._match(TokenType.IN): 2061 for_or_in = "IN" 2062 else: 2063 for_or_in = None 2064 2065 if self._match_text_seq("ACCESS"): 2066 lock_type = "ACCESS" 2067 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2068 lock_type = "EXCLUSIVE" 2069 elif self._match_text_seq("SHARE"): 2070 lock_type = "SHARE" 2071 elif self._match_text_seq("READ"): 2072 lock_type = "READ" 2073 elif self._match_text_seq("WRITE"): 2074 lock_type = "WRITE" 2075 elif self._match_text_seq("CHECKSUM"): 2076 lock_type = "CHECKSUM" 2077 else: 2078 lock_type = None 2079 2080 override = self._match_text_seq("OVERRIDE") 2081 2082 return self.expression( 2083 exp.LockingProperty, 2084 this=this, 2085 kind=kind, 2086 for_or_in=for_or_in, 2087 lock_type=lock_type, 2088 override=override, 2089 ) 2090 2091 def _parse_partition_by(self) -> t.List[exp.Expression]: 2092 if self._match(TokenType.PARTITION_BY): 2093 return self._parse_csv(self._parse_conjunction) 2094 return [] 2095 2096 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2097 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2098 if self._match_text_seq("MINVALUE"): 2099 return exp.var("MINVALUE") 2100 if self._match_text_seq("MAXVALUE"): 2101 return exp.var("MAXVALUE") 2102 return self._parse_bitwise() 2103 2104 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2105 expression = None 2106 from_expressions = None 2107 to_expressions = None 2108 2109 if self._match(TokenType.IN): 2110 this = self._parse_wrapped_csv(self._parse_bitwise) 2111 elif self._match(TokenType.FROM): 2112 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2113 self._match_text_seq("TO") 2114 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2115 elif self._match_text_seq("WITH", "(", "MODULUS"): 2116 this = self._parse_number() 2117 self._match_text_seq(",", "REMAINDER") 2118 expression = self._parse_number() 2119 self._match_r_paren() 2120 else: 2121 self.raise_error("Failed to parse partition bound spec.") 2122 2123 return self.expression( 2124 exp.PartitionBoundSpec, 2125 this=this, 2126 expression=expression, 2127 from_expressions=from_expressions, 2128 to_expressions=to_expressions, 2129 ) 2130 2131 # https://www.postgresql.org/docs/current/sql-createtable.html 2132 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2133 if not self._match_text_seq("OF"): 2134 self._retreat(self._index - 1) 2135 return None 2136 2137 this = self._parse_table(schema=True) 2138 2139 if self._match(TokenType.DEFAULT): 2140 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2141 elif self._match_text_seq("FOR", "VALUES"): 2142 expression = self._parse_partition_bound_spec() 2143 else: 2144 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2145 2146 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2147 2148 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2149 self._match(TokenType.EQ) 2150 return self.expression( 2151 exp.PartitionedByProperty, 2152 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2153 ) 2154 2155 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2156 if self._match_text_seq("AND", "STATISTICS"): 2157 statistics = True 2158 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2159 statistics = False 2160 else: 2161 statistics = None 2162 2163 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2164 2165 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2166 if self._match_text_seq("SQL"): 2167 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2168 return None 2169 2170 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2171 if self._match_text_seq("SQL", "DATA"): 2172 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2173 return None 2174 2175 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2176 if self._match_text_seq("PRIMARY", "INDEX"): 2177 return exp.NoPrimaryIndexProperty() 2178 if self._match_text_seq("SQL"): 2179 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2180 return None 2181 2182 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2183 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2184 return exp.OnCommitProperty() 2185 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2186 return exp.OnCommitProperty(delete=True) 2187 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2188 2189 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2190 if self._match_text_seq("SQL", "DATA"): 2191 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2192 return None 2193 2194 def _parse_distkey(self) -> exp.DistKeyProperty: 2195 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2196 2197 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2198 table = self._parse_table(schema=True) 2199 2200 options = [] 2201 while self._match_texts(("INCLUDING", "EXCLUDING")): 2202 this = self._prev.text.upper() 2203 2204 id_var = self._parse_id_var() 2205 if not id_var: 2206 return None 2207 2208 options.append( 2209 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2210 ) 2211 2212 return self.expression(exp.LikeProperty, this=table, expressions=options) 2213 2214 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2215 return self.expression( 2216 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2217 ) 2218 2219 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2220 self._match(TokenType.EQ) 2221 return self.expression( 2222 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2223 ) 2224 2225 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2226 self._match_text_seq("WITH", "CONNECTION") 2227 return self.expression( 2228 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2229 ) 2230 2231 def _parse_returns(self) -> exp.ReturnsProperty: 2232 value: t.Optional[exp.Expression] 2233 is_table = self._match(TokenType.TABLE) 2234 2235 if is_table: 2236 if self._match(TokenType.LT): 2237 value = self.expression( 2238 exp.Schema, 2239 this="TABLE", 2240 expressions=self._parse_csv(self._parse_struct_types), 2241 ) 2242 if not self._match(TokenType.GT): 2243 self.raise_error("Expecting >") 2244 else: 2245 value = self._parse_schema(exp.var("TABLE")) 2246 else: 2247 value = self._parse_types() 2248 2249 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2250 2251 def _parse_describe(self) -> exp.Describe: 2252 kind = self._match_set(self.CREATABLES) and self._prev.text 2253 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2254 if self._match(TokenType.DOT): 2255 style = None 2256 self._retreat(self._index - 2) 2257 this = self._parse_table(schema=True) 2258 properties = self._parse_properties() 2259 expressions = properties.expressions if properties else None 2260 return self.expression( 2261 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2262 ) 2263 2264 def _parse_insert(self) -> exp.Insert: 2265 comments = ensure_list(self._prev_comments) 2266 hint = self._parse_hint() 2267 overwrite = self._match(TokenType.OVERWRITE) 2268 ignore = self._match(TokenType.IGNORE) 2269 local = self._match_text_seq("LOCAL") 2270 alternative = None 2271 is_function = None 2272 2273 if self._match_text_seq("DIRECTORY"): 2274 this: t.Optional[exp.Expression] = self.expression( 2275 exp.Directory, 2276 this=self._parse_var_or_string(), 2277 local=local, 2278 row_format=self._parse_row_format(match_row=True), 2279 ) 2280 else: 2281 if self._match(TokenType.OR): 2282 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2283 2284 self._match(TokenType.INTO) 2285 comments += ensure_list(self._prev_comments) 2286 self._match(TokenType.TABLE) 2287 is_function = self._match(TokenType.FUNCTION) 2288 2289 this = ( 2290 self._parse_table(schema=True, parse_partition=True) 2291 if not is_function 2292 else self._parse_function() 2293 ) 2294 2295 returning = self._parse_returning() 2296 2297 return self.expression( 2298 exp.Insert, 2299 comments=comments, 2300 hint=hint, 2301 is_function=is_function, 2302 this=this, 2303 stored=self._match_text_seq("STORED") and self._parse_stored(), 2304 by_name=self._match_text_seq("BY", "NAME"), 2305 exists=self._parse_exists(), 2306 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2307 and self._parse_conjunction(), 2308 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2309 conflict=self._parse_on_conflict(), 2310 returning=returning or self._parse_returning(), 2311 overwrite=overwrite, 2312 alternative=alternative, 2313 ignore=ignore, 2314 ) 2315 2316 def _parse_kill(self) -> exp.Kill: 2317 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2318 2319 return self.expression( 2320 exp.Kill, 2321 this=self._parse_primary(), 2322 kind=kind, 2323 ) 2324 2325 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2326 conflict = self._match_text_seq("ON", "CONFLICT") 2327 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2328 2329 if not conflict and not duplicate: 2330 return None 2331 2332 conflict_keys = None 2333 constraint = None 2334 2335 if conflict: 2336 if self._match_text_seq("ON", "CONSTRAINT"): 2337 constraint = self._parse_id_var() 2338 elif self._match(TokenType.L_PAREN): 2339 conflict_keys = self._parse_csv(self._parse_id_var) 2340 self._match_r_paren() 2341 2342 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2343 if self._prev.token_type == TokenType.UPDATE: 2344 self._match(TokenType.SET) 2345 expressions = self._parse_csv(self._parse_equality) 2346 else: 2347 expressions = None 2348 2349 return self.expression( 2350 exp.OnConflict, 2351 duplicate=duplicate, 2352 expressions=expressions, 2353 action=action, 2354 conflict_keys=conflict_keys, 2355 constraint=constraint, 2356 ) 2357 2358 def _parse_returning(self) -> t.Optional[exp.Returning]: 2359 if not self._match(TokenType.RETURNING): 2360 return None 2361 return self.expression( 2362 exp.Returning, 2363 expressions=self._parse_csv(self._parse_expression), 2364 into=self._match(TokenType.INTO) and self._parse_table_part(), 2365 ) 2366 2367 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2368 if not self._match(TokenType.FORMAT): 2369 return None 2370 return self._parse_row_format() 2371 2372 def _parse_row_format( 2373 self, match_row: bool = False 2374 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2375 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2376 return None 2377 2378 if self._match_text_seq("SERDE"): 2379 this = self._parse_string() 2380 2381 serde_properties = None 2382 if self._match(TokenType.SERDE_PROPERTIES): 2383 serde_properties = self.expression( 2384 exp.SerdeProperties, expressions=self._parse_wrapped_properties() 2385 ) 2386 2387 return self.expression( 2388 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2389 ) 2390 2391 self._match_text_seq("DELIMITED") 2392 2393 kwargs = {} 2394 2395 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2396 kwargs["fields"] = self._parse_string() 2397 if self._match_text_seq("ESCAPED", "BY"): 2398 kwargs["escaped"] = self._parse_string() 2399 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2400 kwargs["collection_items"] = self._parse_string() 2401 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2402 kwargs["map_keys"] = self._parse_string() 2403 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2404 kwargs["lines"] = self._parse_string() 2405 if self._match_text_seq("NULL", "DEFINED", "AS"): 2406 kwargs["null"] = self._parse_string() 2407 2408 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2409 2410 def _parse_load(self) -> exp.LoadData | exp.Command: 2411 if self._match_text_seq("DATA"): 2412 local = self._match_text_seq("LOCAL") 2413 self._match_text_seq("INPATH") 2414 inpath = self._parse_string() 2415 overwrite = self._match(TokenType.OVERWRITE) 2416 self._match_pair(TokenType.INTO, TokenType.TABLE) 2417 2418 return self.expression( 2419 exp.LoadData, 2420 this=self._parse_table(schema=True), 2421 local=local, 2422 overwrite=overwrite, 2423 inpath=inpath, 2424 partition=self._parse_partition(), 2425 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2426 serde=self._match_text_seq("SERDE") and self._parse_string(), 2427 ) 2428 return self._parse_as_command(self._prev) 2429 2430 def _parse_delete(self) -> exp.Delete: 2431 # This handles MySQL's "Multiple-Table Syntax" 2432 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2433 tables = None 2434 comments = self._prev_comments 2435 if not self._match(TokenType.FROM, advance=False): 2436 tables = self._parse_csv(self._parse_table) or None 2437 2438 returning = self._parse_returning() 2439 2440 return self.expression( 2441 exp.Delete, 2442 comments=comments, 2443 tables=tables, 2444 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2445 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2446 where=self._parse_where(), 2447 returning=returning or self._parse_returning(), 2448 limit=self._parse_limit(), 2449 ) 2450 2451 def _parse_update(self) -> exp.Update: 2452 comments = self._prev_comments 2453 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2454 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2455 returning = self._parse_returning() 2456 return self.expression( 2457 exp.Update, 2458 comments=comments, 2459 **{ # type: ignore 2460 "this": this, 2461 "expressions": expressions, 2462 "from": self._parse_from(joins=True), 2463 "where": self._parse_where(), 2464 "returning": returning or self._parse_returning(), 2465 "order": self._parse_order(), 2466 "limit": self._parse_limit(), 2467 }, 2468 ) 2469 2470 def _parse_uncache(self) -> exp.Uncache: 2471 if not self._match(TokenType.TABLE): 2472 self.raise_error("Expecting TABLE after UNCACHE") 2473 2474 return self.expression( 2475 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2476 ) 2477 2478 def _parse_cache(self) -> exp.Cache: 2479 lazy = self._match_text_seq("LAZY") 2480 self._match(TokenType.TABLE) 2481 table = self._parse_table(schema=True) 2482 2483 options = [] 2484 if self._match_text_seq("OPTIONS"): 2485 self._match_l_paren() 2486 k = self._parse_string() 2487 self._match(TokenType.EQ) 2488 v = self._parse_string() 2489 options = [k, v] 2490 self._match_r_paren() 2491 2492 self._match(TokenType.ALIAS) 2493 return self.expression( 2494 exp.Cache, 2495 this=table, 2496 lazy=lazy, 2497 options=options, 2498 expression=self._parse_select(nested=True), 2499 ) 2500 2501 def _parse_partition(self) -> t.Optional[exp.Partition]: 2502 if not self._match(TokenType.PARTITION): 2503 return None 2504 2505 return self.expression( 2506 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2507 ) 2508 2509 def _parse_value(self) -> t.Optional[exp.Tuple]: 2510 if self._match(TokenType.L_PAREN): 2511 expressions = self._parse_csv(self._parse_expression) 2512 self._match_r_paren() 2513 return self.expression(exp.Tuple, expressions=expressions) 2514 2515 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2516 expression = self._parse_expression() 2517 if expression: 2518 return self.expression(exp.Tuple, expressions=[expression]) 2519 return None 2520 2521 def _parse_projections(self) -> t.List[exp.Expression]: 2522 return self._parse_expressions() 2523 2524 def _parse_select( 2525 self, 2526 nested: bool = False, 2527 table: bool = False, 2528 parse_subquery_alias: bool = True, 2529 parse_set_operation: bool = True, 2530 ) -> t.Optional[exp.Expression]: 2531 cte = self._parse_with() 2532 2533 if cte: 2534 this = self._parse_statement() 2535 2536 if not this: 2537 self.raise_error("Failed to parse any statement following CTE") 2538 return cte 2539 2540 if "with" in this.arg_types: 2541 this.set("with", cte) 2542 else: 2543 self.raise_error(f"{this.key} does not support CTE") 2544 this = cte 2545 2546 return this 2547 2548 # duckdb supports leading with FROM x 2549 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2550 2551 if self._match(TokenType.SELECT): 2552 comments = self._prev_comments 2553 2554 hint = self._parse_hint() 2555 all_ = self._match(TokenType.ALL) 2556 distinct = self._match_set(self.DISTINCT_TOKENS) 2557 2558 kind = ( 2559 self._match(TokenType.ALIAS) 2560 and self._match_texts(("STRUCT", "VALUE")) 2561 and self._prev.text.upper() 2562 ) 2563 2564 if distinct: 2565 distinct = self.expression( 2566 exp.Distinct, 2567 on=self._parse_value() if self._match(TokenType.ON) else None, 2568 ) 2569 2570 if all_ and distinct: 2571 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2572 2573 limit = self._parse_limit(top=True) 2574 projections = self._parse_projections() 2575 2576 this = self.expression( 2577 exp.Select, 2578 kind=kind, 2579 hint=hint, 2580 distinct=distinct, 2581 expressions=projections, 2582 limit=limit, 2583 ) 2584 this.comments = comments 2585 2586 into = self._parse_into() 2587 if into: 2588 this.set("into", into) 2589 2590 if not from_: 2591 from_ = self._parse_from() 2592 2593 if from_: 2594 this.set("from", from_) 2595 2596 this = self._parse_query_modifiers(this) 2597 elif (table or nested) and self._match(TokenType.L_PAREN): 2598 if self._match(TokenType.PIVOT): 2599 this = self._parse_simplified_pivot() 2600 elif self._match(TokenType.FROM): 2601 this = exp.select("*").from_( 2602 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2603 ) 2604 else: 2605 this = ( 2606 self._parse_table() 2607 if table 2608 else self._parse_select(nested=True, parse_set_operation=False) 2609 ) 2610 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2611 2612 self._match_r_paren() 2613 2614 # We return early here so that the UNION isn't attached to the subquery by the 2615 # following call to _parse_set_operations, but instead becomes the parent node 2616 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2617 elif self._match(TokenType.VALUES, advance=False): 2618 this = self._parse_derived_table_values() 2619 elif from_: 2620 this = exp.select("*").from_(from_.this, copy=False) 2621 else: 2622 this = None 2623 2624 if parse_set_operation: 2625 return self._parse_set_operations(this) 2626 return this 2627 2628 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2629 if not skip_with_token and not self._match(TokenType.WITH): 2630 return None 2631 2632 comments = self._prev_comments 2633 recursive = self._match(TokenType.RECURSIVE) 2634 2635 expressions = [] 2636 while True: 2637 expressions.append(self._parse_cte()) 2638 2639 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2640 break 2641 else: 2642 self._match(TokenType.WITH) 2643 2644 return self.expression( 2645 exp.With, comments=comments, expressions=expressions, recursive=recursive 2646 ) 2647 2648 def _parse_cte(self) -> exp.CTE: 2649 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2650 if not alias or not alias.this: 2651 self.raise_error("Expected CTE to have alias") 2652 2653 self._match(TokenType.ALIAS) 2654 2655 if self._match_text_seq("NOT", "MATERIALIZED"): 2656 materialized = False 2657 elif self._match_text_seq("MATERIALIZED"): 2658 materialized = True 2659 else: 2660 materialized = None 2661 2662 return self.expression( 2663 exp.CTE, 2664 this=self._parse_wrapped(self._parse_statement), 2665 alias=alias, 2666 materialized=materialized, 2667 ) 2668 2669 def _parse_table_alias( 2670 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2671 ) -> t.Optional[exp.TableAlias]: 2672 any_token = self._match(TokenType.ALIAS) 2673 alias = ( 2674 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2675 or self._parse_string_as_identifier() 2676 ) 2677 2678 index = self._index 2679 if self._match(TokenType.L_PAREN): 2680 columns = self._parse_csv(self._parse_function_parameter) 2681 self._match_r_paren() if columns else self._retreat(index) 2682 else: 2683 columns = None 2684 2685 if not alias and not columns: 2686 return None 2687 2688 return self.expression(exp.TableAlias, this=alias, columns=columns) 2689 2690 def _parse_subquery( 2691 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2692 ) -> t.Optional[exp.Subquery]: 2693 if not this: 2694 return None 2695 2696 return self.expression( 2697 exp.Subquery, 2698 this=this, 2699 pivots=self._parse_pivots(), 2700 alias=self._parse_table_alias() if parse_alias else None, 2701 ) 2702 2703 def _implicit_unnests_to_explicit(self, this: E) -> E: 2704 from sqlglot.optimizer.normalize_identifiers import ( 2705 normalize_identifiers as _norm, 2706 ) 2707 2708 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2709 for i, join in enumerate(this.args.get("joins") or []): 2710 table = join.this 2711 normalized_table = table.copy() 2712 normalized_table.meta["maybe_column"] = True 2713 normalized_table = _norm(normalized_table, dialect=self.dialect) 2714 2715 if isinstance(table, exp.Table) and not join.args.get("on"): 2716 if normalized_table.parts[0].name in refs: 2717 table_as_column = table.to_column() 2718 unnest = exp.Unnest(expressions=[table_as_column]) 2719 2720 # Table.to_column creates a parent Alias node that we want to convert to 2721 # a TableAlias and attach to the Unnest, so it matches the parser's output 2722 if isinstance(table.args.get("alias"), exp.TableAlias): 2723 table_as_column.replace(table_as_column.this) 2724 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2725 2726 table.replace(unnest) 2727 2728 refs.add(normalized_table.alias_or_name) 2729 2730 return this 2731 2732 def _parse_query_modifiers( 2733 self, this: t.Optional[exp.Expression] 2734 ) -> t.Optional[exp.Expression]: 2735 if isinstance(this, (exp.Query, exp.Table)): 2736 for join in self._parse_joins(): 2737 this.append("joins", join) 2738 for lateral in iter(self._parse_lateral, None): 2739 this.append("laterals", lateral) 2740 2741 while True: 2742 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2743 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2744 key, expression = parser(self) 2745 2746 if expression: 2747 this.set(key, expression) 2748 if key == "limit": 2749 offset = expression.args.pop("offset", None) 2750 2751 if offset: 2752 offset = exp.Offset(expression=offset) 2753 this.set("offset", offset) 2754 2755 limit_by_expressions = expression.expressions 2756 expression.set("expressions", None) 2757 offset.set("expressions", limit_by_expressions) 2758 continue 2759 break 2760 2761 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2762 this = self._implicit_unnests_to_explicit(this) 2763 2764 return this 2765 2766 def _parse_hint(self) -> t.Optional[exp.Hint]: 2767 if self._match(TokenType.HINT): 2768 hints = [] 2769 for hint in iter( 2770 lambda: self._parse_csv( 2771 lambda: self._parse_function() or self._parse_var(upper=True) 2772 ), 2773 [], 2774 ): 2775 hints.extend(hint) 2776 2777 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2778 self.raise_error("Expected */ after HINT") 2779 2780 return self.expression(exp.Hint, expressions=hints) 2781 2782 return None 2783 2784 def _parse_into(self) -> t.Optional[exp.Into]: 2785 if not self._match(TokenType.INTO): 2786 return None 2787 2788 temp = self._match(TokenType.TEMPORARY) 2789 unlogged = self._match_text_seq("UNLOGGED") 2790 self._match(TokenType.TABLE) 2791 2792 return self.expression( 2793 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2794 ) 2795 2796 def _parse_from( 2797 self, joins: bool = False, skip_from_token: bool = False 2798 ) -> t.Optional[exp.From]: 2799 if not skip_from_token and not self._match(TokenType.FROM): 2800 return None 2801 2802 return self.expression( 2803 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2804 ) 2805 2806 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2807 return self.expression( 2808 exp.MatchRecognizeMeasure, 2809 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2810 this=self._parse_expression(), 2811 ) 2812 2813 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2814 if not self._match(TokenType.MATCH_RECOGNIZE): 2815 return None 2816 2817 self._match_l_paren() 2818 2819 partition = self._parse_partition_by() 2820 order = self._parse_order() 2821 2822 measures = ( 2823 self._parse_csv(self._parse_match_recognize_measure) 2824 if self._match_text_seq("MEASURES") 2825 else None 2826 ) 2827 2828 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2829 rows = exp.var("ONE ROW PER MATCH") 2830 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2831 text = "ALL ROWS PER MATCH" 2832 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2833 text += " SHOW EMPTY MATCHES" 2834 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2835 text += " OMIT EMPTY MATCHES" 2836 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2837 text += " WITH UNMATCHED ROWS" 2838 rows = exp.var(text) 2839 else: 2840 rows = None 2841 2842 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2843 text = "AFTER MATCH SKIP" 2844 if self._match_text_seq("PAST", "LAST", "ROW"): 2845 text += " PAST LAST ROW" 2846 elif self._match_text_seq("TO", "NEXT", "ROW"): 2847 text += " TO NEXT ROW" 2848 elif self._match_text_seq("TO", "FIRST"): 2849 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2850 elif self._match_text_seq("TO", "LAST"): 2851 text += f" TO LAST {self._advance_any().text}" # type: ignore 2852 after = exp.var(text) 2853 else: 2854 after = None 2855 2856 if self._match_text_seq("PATTERN"): 2857 self._match_l_paren() 2858 2859 if not self._curr: 2860 self.raise_error("Expecting )", self._curr) 2861 2862 paren = 1 2863 start = self._curr 2864 2865 while self._curr and paren > 0: 2866 if self._curr.token_type == TokenType.L_PAREN: 2867 paren += 1 2868 if self._curr.token_type == TokenType.R_PAREN: 2869 paren -= 1 2870 2871 end = self._prev 2872 self._advance() 2873 2874 if paren > 0: 2875 self.raise_error("Expecting )", self._curr) 2876 2877 pattern = exp.var(self._find_sql(start, end)) 2878 else: 2879 pattern = None 2880 2881 define = ( 2882 self._parse_csv(self._parse_name_as_expression) 2883 if self._match_text_seq("DEFINE") 2884 else None 2885 ) 2886 2887 self._match_r_paren() 2888 2889 return self.expression( 2890 exp.MatchRecognize, 2891 partition_by=partition, 2892 order=order, 2893 measures=measures, 2894 rows=rows, 2895 after=after, 2896 pattern=pattern, 2897 define=define, 2898 alias=self._parse_table_alias(), 2899 ) 2900 2901 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2902 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2903 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2904 cross_apply = False 2905 2906 if cross_apply is not None: 2907 this = self._parse_select(table=True) 2908 view = None 2909 outer = None 2910 elif self._match(TokenType.LATERAL): 2911 this = self._parse_select(table=True) 2912 view = self._match(TokenType.VIEW) 2913 outer = self._match(TokenType.OUTER) 2914 else: 2915 return None 2916 2917 if not this: 2918 this = ( 2919 self._parse_unnest() 2920 or self._parse_function() 2921 or self._parse_id_var(any_token=False) 2922 ) 2923 2924 while self._match(TokenType.DOT): 2925 this = exp.Dot( 2926 this=this, 2927 expression=self._parse_function() or self._parse_id_var(any_token=False), 2928 ) 2929 2930 if view: 2931 table = self._parse_id_var(any_token=False) 2932 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2933 table_alias: t.Optional[exp.TableAlias] = self.expression( 2934 exp.TableAlias, this=table, columns=columns 2935 ) 2936 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2937 # We move the alias from the lateral's child node to the lateral itself 2938 table_alias = this.args["alias"].pop() 2939 else: 2940 table_alias = self._parse_table_alias() 2941 2942 return self.expression( 2943 exp.Lateral, 2944 this=this, 2945 view=view, 2946 outer=outer, 2947 alias=table_alias, 2948 cross_apply=cross_apply, 2949 ) 2950 2951 def _parse_join_parts( 2952 self, 2953 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2954 return ( 2955 self._match_set(self.JOIN_METHODS) and self._prev, 2956 self._match_set(self.JOIN_SIDES) and self._prev, 2957 self._match_set(self.JOIN_KINDS) and self._prev, 2958 ) 2959 2960 def _parse_join( 2961 self, skip_join_token: bool = False, parse_bracket: bool = False 2962 ) -> t.Optional[exp.Join]: 2963 if self._match(TokenType.COMMA): 2964 return self.expression(exp.Join, this=self._parse_table()) 2965 2966 index = self._index 2967 method, side, kind = self._parse_join_parts() 2968 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2969 join = self._match(TokenType.JOIN) 2970 2971 if not skip_join_token and not join: 2972 self._retreat(index) 2973 kind = None 2974 method = None 2975 side = None 2976 2977 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2978 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2979 2980 if not skip_join_token and not join and not outer_apply and not cross_apply: 2981 return None 2982 2983 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2984 2985 if method: 2986 kwargs["method"] = method.text 2987 if side: 2988 kwargs["side"] = side.text 2989 if kind: 2990 kwargs["kind"] = kind.text 2991 if hint: 2992 kwargs["hint"] = hint 2993 2994 if self._match(TokenType.MATCH_CONDITION): 2995 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 2996 2997 if self._match(TokenType.ON): 2998 kwargs["on"] = self._parse_conjunction() 2999 elif self._match(TokenType.USING): 3000 kwargs["using"] = self._parse_wrapped_id_vars() 3001 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3002 kind and kind.token_type == TokenType.CROSS 3003 ): 3004 index = self._index 3005 joins: t.Optional[list] = list(self._parse_joins()) 3006 3007 if joins and self._match(TokenType.ON): 3008 kwargs["on"] = self._parse_conjunction() 3009 elif joins and self._match(TokenType.USING): 3010 kwargs["using"] = self._parse_wrapped_id_vars() 3011 else: 3012 joins = None 3013 self._retreat(index) 3014 3015 kwargs["this"].set("joins", joins if joins else None) 3016 3017 comments = [c for token in (method, side, kind) if token for c in token.comments] 3018 return self.expression(exp.Join, comments=comments, **kwargs) 3019 3020 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3021 this = self._parse_conjunction() 3022 3023 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3024 return this 3025 3026 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3027 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3028 3029 return this 3030 3031 def _parse_index_params(self) -> exp.IndexParameters: 3032 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3033 3034 if self._match(TokenType.L_PAREN, advance=False): 3035 columns = self._parse_wrapped_csv(self._parse_with_operator) 3036 else: 3037 columns = None 3038 3039 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3040 partition_by = self._parse_partition_by() 3041 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3042 tablespace = ( 3043 self._parse_var(any_token=True) 3044 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3045 else None 3046 ) 3047 where = self._parse_where() 3048 3049 return self.expression( 3050 exp.IndexParameters, 3051 using=using, 3052 columns=columns, 3053 include=include, 3054 partition_by=partition_by, 3055 where=where, 3056 with_storage=with_storage, 3057 tablespace=tablespace, 3058 ) 3059 3060 def _parse_index( 3061 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3062 ) -> t.Optional[exp.Index]: 3063 if index or anonymous: 3064 unique = None 3065 primary = None 3066 amp = None 3067 3068 self._match(TokenType.ON) 3069 self._match(TokenType.TABLE) # hive 3070 table = self._parse_table_parts(schema=True) 3071 else: 3072 unique = self._match(TokenType.UNIQUE) 3073 primary = self._match_text_seq("PRIMARY") 3074 amp = self._match_text_seq("AMP") 3075 3076 if not self._match(TokenType.INDEX): 3077 return None 3078 3079 index = self._parse_id_var() 3080 table = None 3081 3082 params = self._parse_index_params() 3083 3084 return self.expression( 3085 exp.Index, 3086 this=index, 3087 table=table, 3088 unique=unique, 3089 primary=primary, 3090 amp=amp, 3091 params=params, 3092 ) 3093 3094 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3095 hints: t.List[exp.Expression] = [] 3096 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3097 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3098 hints.append( 3099 self.expression( 3100 exp.WithTableHint, 3101 expressions=self._parse_csv( 3102 lambda: self._parse_function() or self._parse_var(any_token=True) 3103 ), 3104 ) 3105 ) 3106 self._match_r_paren() 3107 else: 3108 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3109 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3110 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3111 3112 self._match_texts(("INDEX", "KEY")) 3113 if self._match(TokenType.FOR): 3114 hint.set("target", self._advance_any() and self._prev.text.upper()) 3115 3116 hint.set("expressions", self._parse_wrapped_id_vars()) 3117 hints.append(hint) 3118 3119 return hints or None 3120 3121 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3122 return ( 3123 (not schema and self._parse_function(optional_parens=False)) 3124 or self._parse_id_var(any_token=False) 3125 or self._parse_string_as_identifier() 3126 or self._parse_placeholder() 3127 ) 3128 3129 def _parse_table_parts( 3130 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3131 ) -> exp.Table: 3132 catalog = None 3133 db = None 3134 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3135 3136 while self._match(TokenType.DOT): 3137 if catalog: 3138 # This allows nesting the table in arbitrarily many dot expressions if needed 3139 table = self.expression( 3140 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3141 ) 3142 else: 3143 catalog = db 3144 db = table 3145 # "" used for tsql FROM a..b case 3146 table = self._parse_table_part(schema=schema) or "" 3147 3148 if ( 3149 wildcard 3150 and self._is_connected() 3151 and (isinstance(table, exp.Identifier) or not table) 3152 and self._match(TokenType.STAR) 3153 ): 3154 if isinstance(table, exp.Identifier): 3155 table.args["this"] += "*" 3156 else: 3157 table = exp.Identifier(this="*") 3158 3159 # We bubble up comments from the Identifier to the Table 3160 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3161 3162 if is_db_reference: 3163 catalog = db 3164 db = table 3165 table = None 3166 3167 if not table and not is_db_reference: 3168 self.raise_error(f"Expected table name but got {self._curr}") 3169 if not db and is_db_reference: 3170 self.raise_error(f"Expected database name but got {self._curr}") 3171 3172 return self.expression( 3173 exp.Table, 3174 comments=comments, 3175 this=table, 3176 db=db, 3177 catalog=catalog, 3178 pivots=self._parse_pivots(), 3179 ) 3180 3181 def _parse_table( 3182 self, 3183 schema: bool = False, 3184 joins: bool = False, 3185 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3186 parse_bracket: bool = False, 3187 is_db_reference: bool = False, 3188 parse_partition: bool = False, 3189 ) -> t.Optional[exp.Expression]: 3190 lateral = self._parse_lateral() 3191 if lateral: 3192 return lateral 3193 3194 unnest = self._parse_unnest() 3195 if unnest: 3196 return unnest 3197 3198 values = self._parse_derived_table_values() 3199 if values: 3200 return values 3201 3202 subquery = self._parse_select(table=True) 3203 if subquery: 3204 if not subquery.args.get("pivots"): 3205 subquery.set("pivots", self._parse_pivots()) 3206 return subquery 3207 3208 bracket = parse_bracket and self._parse_bracket(None) 3209 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3210 3211 only = self._match(TokenType.ONLY) 3212 3213 this = t.cast( 3214 exp.Expression, 3215 bracket 3216 or self._parse_bracket( 3217 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3218 ), 3219 ) 3220 3221 if only: 3222 this.set("only", only) 3223 3224 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3225 self._match_text_seq("*") 3226 3227 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3228 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3229 this.set("partition", self._parse_partition()) 3230 3231 if schema: 3232 return self._parse_schema(this=this) 3233 3234 version = self._parse_version() 3235 3236 if version: 3237 this.set("version", version) 3238 3239 if self.dialect.ALIAS_POST_TABLESAMPLE: 3240 table_sample = self._parse_table_sample() 3241 3242 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3243 if alias: 3244 this.set("alias", alias) 3245 3246 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3247 return self.expression( 3248 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3249 ) 3250 3251 this.set("hints", self._parse_table_hints()) 3252 3253 if not this.args.get("pivots"): 3254 this.set("pivots", self._parse_pivots()) 3255 3256 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3257 table_sample = self._parse_table_sample() 3258 3259 if table_sample: 3260 table_sample.set("this", this) 3261 this = table_sample 3262 3263 if joins: 3264 for join in self._parse_joins(): 3265 this.append("joins", join) 3266 3267 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3268 this.set("ordinality", True) 3269 this.set("alias", self._parse_table_alias()) 3270 3271 return this 3272 3273 def _parse_version(self) -> t.Optional[exp.Version]: 3274 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3275 this = "TIMESTAMP" 3276 elif self._match(TokenType.VERSION_SNAPSHOT): 3277 this = "VERSION" 3278 else: 3279 return None 3280 3281 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3282 kind = self._prev.text.upper() 3283 start = self._parse_bitwise() 3284 self._match_texts(("TO", "AND")) 3285 end = self._parse_bitwise() 3286 expression: t.Optional[exp.Expression] = self.expression( 3287 exp.Tuple, expressions=[start, end] 3288 ) 3289 elif self._match_text_seq("CONTAINED", "IN"): 3290 kind = "CONTAINED IN" 3291 expression = self.expression( 3292 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3293 ) 3294 elif self._match(TokenType.ALL): 3295 kind = "ALL" 3296 expression = None 3297 else: 3298 self._match_text_seq("AS", "OF") 3299 kind = "AS OF" 3300 expression = self._parse_type() 3301 3302 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3303 3304 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3305 if not self._match(TokenType.UNNEST): 3306 return None 3307 3308 expressions = self._parse_wrapped_csv(self._parse_equality) 3309 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3310 3311 alias = self._parse_table_alias() if with_alias else None 3312 3313 if alias: 3314 if self.dialect.UNNEST_COLUMN_ONLY: 3315 if alias.args.get("columns"): 3316 self.raise_error("Unexpected extra column alias in unnest.") 3317 3318 alias.set("columns", [alias.this]) 3319 alias.set("this", None) 3320 3321 columns = alias.args.get("columns") or [] 3322 if offset and len(expressions) < len(columns): 3323 offset = columns.pop() 3324 3325 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3326 self._match(TokenType.ALIAS) 3327 offset = self._parse_id_var( 3328 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3329 ) or exp.to_identifier("offset") 3330 3331 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3332 3333 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3334 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3335 if not is_derived and not self._match_text_seq("VALUES"): 3336 return None 3337 3338 expressions = self._parse_csv(self._parse_value) 3339 alias = self._parse_table_alias() 3340 3341 if is_derived: 3342 self._match_r_paren() 3343 3344 return self.expression( 3345 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3346 ) 3347 3348 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3349 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3350 as_modifier and self._match_text_seq("USING", "SAMPLE") 3351 ): 3352 return None 3353 3354 bucket_numerator = None 3355 bucket_denominator = None 3356 bucket_field = None 3357 percent = None 3358 size = None 3359 seed = None 3360 3361 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3362 matched_l_paren = self._match(TokenType.L_PAREN) 3363 3364 if self.TABLESAMPLE_CSV: 3365 num = None 3366 expressions = self._parse_csv(self._parse_primary) 3367 else: 3368 expressions = None 3369 num = ( 3370 self._parse_factor() 3371 if self._match(TokenType.NUMBER, advance=False) 3372 else self._parse_primary() or self._parse_placeholder() 3373 ) 3374 3375 if self._match_text_seq("BUCKET"): 3376 bucket_numerator = self._parse_number() 3377 self._match_text_seq("OUT", "OF") 3378 bucket_denominator = bucket_denominator = self._parse_number() 3379 self._match(TokenType.ON) 3380 bucket_field = self._parse_field() 3381 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3382 percent = num 3383 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3384 size = num 3385 else: 3386 percent = num 3387 3388 if matched_l_paren: 3389 self._match_r_paren() 3390 3391 if self._match(TokenType.L_PAREN): 3392 method = self._parse_var(upper=True) 3393 seed = self._match(TokenType.COMMA) and self._parse_number() 3394 self._match_r_paren() 3395 elif self._match_texts(("SEED", "REPEATABLE")): 3396 seed = self._parse_wrapped(self._parse_number) 3397 3398 if not method and self.DEFAULT_SAMPLING_METHOD: 3399 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3400 3401 return self.expression( 3402 exp.TableSample, 3403 expressions=expressions, 3404 method=method, 3405 bucket_numerator=bucket_numerator, 3406 bucket_denominator=bucket_denominator, 3407 bucket_field=bucket_field, 3408 percent=percent, 3409 size=size, 3410 seed=seed, 3411 ) 3412 3413 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3414 return list(iter(self._parse_pivot, None)) or None 3415 3416 def _parse_joins(self) -> t.Iterator[exp.Join]: 3417 return iter(self._parse_join, None) 3418 3419 # https://duckdb.org/docs/sql/statements/pivot 3420 def _parse_simplified_pivot(self) -> exp.Pivot: 3421 def _parse_on() -> t.Optional[exp.Expression]: 3422 this = self._parse_bitwise() 3423 return self._parse_in(this) if self._match(TokenType.IN) else this 3424 3425 this = self._parse_table() 3426 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3427 using = self._match(TokenType.USING) and self._parse_csv( 3428 lambda: self._parse_alias(self._parse_function()) 3429 ) 3430 group = self._parse_group() 3431 return self.expression( 3432 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3433 ) 3434 3435 def _parse_pivot_in(self) -> exp.In: 3436 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3437 this = self._parse_conjunction() 3438 3439 self._match(TokenType.ALIAS) 3440 alias = self._parse_field() 3441 if alias: 3442 return self.expression(exp.PivotAlias, this=this, alias=alias) 3443 3444 return this 3445 3446 value = self._parse_column() 3447 3448 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3449 self.raise_error("Expecting IN (") 3450 3451 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3452 3453 self._match_r_paren() 3454 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3455 3456 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3457 index = self._index 3458 include_nulls = None 3459 3460 if self._match(TokenType.PIVOT): 3461 unpivot = False 3462 elif self._match(TokenType.UNPIVOT): 3463 unpivot = True 3464 3465 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3466 if self._match_text_seq("INCLUDE", "NULLS"): 3467 include_nulls = True 3468 elif self._match_text_seq("EXCLUDE", "NULLS"): 3469 include_nulls = False 3470 else: 3471 return None 3472 3473 expressions = [] 3474 3475 if not self._match(TokenType.L_PAREN): 3476 self._retreat(index) 3477 return None 3478 3479 if unpivot: 3480 expressions = self._parse_csv(self._parse_column) 3481 else: 3482 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3483 3484 if not expressions: 3485 self.raise_error("Failed to parse PIVOT's aggregation list") 3486 3487 if not self._match(TokenType.FOR): 3488 self.raise_error("Expecting FOR") 3489 3490 field = self._parse_pivot_in() 3491 3492 self._match_r_paren() 3493 3494 pivot = self.expression( 3495 exp.Pivot, 3496 expressions=expressions, 3497 field=field, 3498 unpivot=unpivot, 3499 include_nulls=include_nulls, 3500 ) 3501 3502 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3503 pivot.set("alias", self._parse_table_alias()) 3504 3505 if not unpivot: 3506 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3507 3508 columns: t.List[exp.Expression] = [] 3509 for fld in pivot.args["field"].expressions: 3510 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3511 for name in names: 3512 if self.PREFIXED_PIVOT_COLUMNS: 3513 name = f"{name}_{field_name}" if name else field_name 3514 else: 3515 name = f"{field_name}_{name}" if name else field_name 3516 3517 columns.append(exp.to_identifier(name)) 3518 3519 pivot.set("columns", columns) 3520 3521 return pivot 3522 3523 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3524 return [agg.alias for agg in aggregations] 3525 3526 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3527 if not skip_where_token and not self._match(TokenType.PREWHERE): 3528 return None 3529 3530 return self.expression( 3531 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3532 ) 3533 3534 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3535 if not skip_where_token and not self._match(TokenType.WHERE): 3536 return None 3537 3538 return self.expression( 3539 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3540 ) 3541 3542 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3543 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3544 return None 3545 3546 elements: t.Dict[str, t.Any] = defaultdict(list) 3547 3548 if self._match(TokenType.ALL): 3549 elements["all"] = True 3550 elif self._match(TokenType.DISTINCT): 3551 elements["all"] = False 3552 3553 while True: 3554 expressions = self._parse_csv(self._parse_conjunction) 3555 if expressions: 3556 elements["expressions"].extend(expressions) 3557 3558 grouping_sets = self._parse_grouping_sets() 3559 if grouping_sets: 3560 elements["grouping_sets"].extend(grouping_sets) 3561 3562 rollup = None 3563 cube = None 3564 totals = None 3565 3566 index = self._index 3567 with_ = self._match(TokenType.WITH) 3568 if self._match(TokenType.ROLLUP): 3569 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3570 elements["rollup"].extend(ensure_list(rollup)) 3571 3572 if self._match(TokenType.CUBE): 3573 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3574 elements["cube"].extend(ensure_list(cube)) 3575 3576 if self._match_text_seq("TOTALS"): 3577 totals = True 3578 elements["totals"] = True # type: ignore 3579 3580 if not (grouping_sets or rollup or cube or totals): 3581 if with_: 3582 self._retreat(index) 3583 break 3584 3585 return self.expression(exp.Group, **elements) # type: ignore 3586 3587 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3588 if not self._match(TokenType.GROUPING_SETS): 3589 return None 3590 3591 return self._parse_wrapped_csv(self._parse_grouping_set) 3592 3593 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3594 if self._match(TokenType.L_PAREN): 3595 grouping_set = self._parse_csv(self._parse_column) 3596 self._match_r_paren() 3597 return self.expression(exp.Tuple, expressions=grouping_set) 3598 3599 return self._parse_column() 3600 3601 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3602 if not skip_having_token and not self._match(TokenType.HAVING): 3603 return None 3604 return self.expression(exp.Having, this=self._parse_conjunction()) 3605 3606 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3607 if not self._match(TokenType.QUALIFY): 3608 return None 3609 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3610 3611 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3612 if skip_start_token: 3613 start = None 3614 elif self._match(TokenType.START_WITH): 3615 start = self._parse_conjunction() 3616 else: 3617 return None 3618 3619 self._match(TokenType.CONNECT_BY) 3620 nocycle = self._match_text_seq("NOCYCLE") 3621 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3622 exp.Prior, this=self._parse_bitwise() 3623 ) 3624 connect = self._parse_conjunction() 3625 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3626 3627 if not start and self._match(TokenType.START_WITH): 3628 start = self._parse_conjunction() 3629 3630 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3631 3632 def _parse_name_as_expression(self) -> exp.Alias: 3633 return self.expression( 3634 exp.Alias, 3635 alias=self._parse_id_var(any_token=True), 3636 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3637 ) 3638 3639 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3640 if self._match_text_seq("INTERPOLATE"): 3641 return self._parse_wrapped_csv(self._parse_name_as_expression) 3642 return None 3643 3644 def _parse_order( 3645 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3646 ) -> t.Optional[exp.Expression]: 3647 siblings = None 3648 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3649 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3650 return this 3651 3652 siblings = True 3653 3654 return self.expression( 3655 exp.Order, 3656 this=this, 3657 expressions=self._parse_csv(self._parse_ordered), 3658 interpolate=self._parse_interpolate(), 3659 siblings=siblings, 3660 ) 3661 3662 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3663 if not self._match(token): 3664 return None 3665 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3666 3667 def _parse_ordered( 3668 self, parse_method: t.Optional[t.Callable] = None 3669 ) -> t.Optional[exp.Ordered]: 3670 this = parse_method() if parse_method else self._parse_conjunction() 3671 if not this: 3672 return None 3673 3674 asc = self._match(TokenType.ASC) 3675 desc = self._match(TokenType.DESC) or (asc and False) 3676 3677 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3678 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3679 3680 nulls_first = is_nulls_first or False 3681 explicitly_null_ordered = is_nulls_first or is_nulls_last 3682 3683 if ( 3684 not explicitly_null_ordered 3685 and ( 3686 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3687 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3688 ) 3689 and self.dialect.NULL_ORDERING != "nulls_are_last" 3690 ): 3691 nulls_first = True 3692 3693 if self._match_text_seq("WITH", "FILL"): 3694 with_fill = self.expression( 3695 exp.WithFill, 3696 **{ # type: ignore 3697 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3698 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3699 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3700 }, 3701 ) 3702 else: 3703 with_fill = None 3704 3705 return self.expression( 3706 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3707 ) 3708 3709 def _parse_limit( 3710 self, 3711 this: t.Optional[exp.Expression] = None, 3712 top: bool = False, 3713 skip_limit_token: bool = False, 3714 ) -> t.Optional[exp.Expression]: 3715 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3716 comments = self._prev_comments 3717 if top: 3718 limit_paren = self._match(TokenType.L_PAREN) 3719 expression = self._parse_term() if limit_paren else self._parse_number() 3720 3721 if limit_paren: 3722 self._match_r_paren() 3723 else: 3724 expression = self._parse_term() 3725 3726 if self._match(TokenType.COMMA): 3727 offset = expression 3728 expression = self._parse_term() 3729 else: 3730 offset = None 3731 3732 limit_exp = self.expression( 3733 exp.Limit, 3734 this=this, 3735 expression=expression, 3736 offset=offset, 3737 comments=comments, 3738 expressions=self._parse_limit_by(), 3739 ) 3740 3741 return limit_exp 3742 3743 if self._match(TokenType.FETCH): 3744 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3745 direction = self._prev.text.upper() if direction else "FIRST" 3746 3747 count = self._parse_field(tokens=self.FETCH_TOKENS) 3748 percent = self._match(TokenType.PERCENT) 3749 3750 self._match_set((TokenType.ROW, TokenType.ROWS)) 3751 3752 only = self._match_text_seq("ONLY") 3753 with_ties = self._match_text_seq("WITH", "TIES") 3754 3755 if only and with_ties: 3756 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3757 3758 return self.expression( 3759 exp.Fetch, 3760 direction=direction, 3761 count=count, 3762 percent=percent, 3763 with_ties=with_ties, 3764 ) 3765 3766 return this 3767 3768 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3769 if not self._match(TokenType.OFFSET): 3770 return this 3771 3772 count = self._parse_term() 3773 self._match_set((TokenType.ROW, TokenType.ROWS)) 3774 3775 return self.expression( 3776 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3777 ) 3778 3779 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3780 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3781 3782 def _parse_locks(self) -> t.List[exp.Lock]: 3783 locks = [] 3784 while True: 3785 if self._match_text_seq("FOR", "UPDATE"): 3786 update = True 3787 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3788 "LOCK", "IN", "SHARE", "MODE" 3789 ): 3790 update = False 3791 else: 3792 break 3793 3794 expressions = None 3795 if self._match_text_seq("OF"): 3796 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3797 3798 wait: t.Optional[bool | exp.Expression] = None 3799 if self._match_text_seq("NOWAIT"): 3800 wait = True 3801 elif self._match_text_seq("WAIT"): 3802 wait = self._parse_primary() 3803 elif self._match_text_seq("SKIP", "LOCKED"): 3804 wait = False 3805 3806 locks.append( 3807 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3808 ) 3809 3810 return locks 3811 3812 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3813 while this and self._match_set(self.SET_OPERATIONS): 3814 token_type = self._prev.token_type 3815 3816 if token_type == TokenType.UNION: 3817 operation = exp.Union 3818 elif token_type == TokenType.EXCEPT: 3819 operation = exp.Except 3820 else: 3821 operation = exp.Intersect 3822 3823 comments = self._prev.comments 3824 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3825 by_name = self._match_text_seq("BY", "NAME") 3826 expression = self._parse_select(nested=True, parse_set_operation=False) 3827 3828 this = self.expression( 3829 operation, 3830 comments=comments, 3831 this=this, 3832 distinct=distinct, 3833 by_name=by_name, 3834 expression=expression, 3835 ) 3836 3837 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3838 expression = this.expression 3839 3840 if expression: 3841 for arg in self.UNION_MODIFIERS: 3842 expr = expression.args.get(arg) 3843 if expr: 3844 this.set(arg, expr.pop()) 3845 3846 return this 3847 3848 def _parse_expression(self) -> t.Optional[exp.Expression]: 3849 return self._parse_alias(self._parse_conjunction()) 3850 3851 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3852 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3853 3854 def _parse_equality(self) -> t.Optional[exp.Expression]: 3855 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3856 3857 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3858 return self._parse_tokens(self._parse_range, self.COMPARISON) 3859 3860 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3861 this = this or self._parse_bitwise() 3862 negate = self._match(TokenType.NOT) 3863 3864 if self._match_set(self.RANGE_PARSERS): 3865 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3866 if not expression: 3867 return this 3868 3869 this = expression 3870 elif self._match(TokenType.ISNULL): 3871 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3872 3873 # Postgres supports ISNULL and NOTNULL for conditions. 3874 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3875 if self._match(TokenType.NOTNULL): 3876 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3877 this = self.expression(exp.Not, this=this) 3878 3879 if negate: 3880 this = self.expression(exp.Not, this=this) 3881 3882 if self._match(TokenType.IS): 3883 this = self._parse_is(this) 3884 3885 return this 3886 3887 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3888 index = self._index - 1 3889 negate = self._match(TokenType.NOT) 3890 3891 if self._match_text_seq("DISTINCT", "FROM"): 3892 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3893 return self.expression(klass, this=this, expression=self._parse_bitwise()) 3894 3895 expression = self._parse_null() or self._parse_boolean() 3896 if not expression: 3897 self._retreat(index) 3898 return None 3899 3900 this = self.expression(exp.Is, this=this, expression=expression) 3901 return self.expression(exp.Not, this=this) if negate else this 3902 3903 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3904 unnest = self._parse_unnest(with_alias=False) 3905 if unnest: 3906 this = self.expression(exp.In, this=this, unnest=unnest) 3907 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3908 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3909 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3910 3911 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 3912 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 3913 else: 3914 this = self.expression(exp.In, this=this, expressions=expressions) 3915 3916 if matched_l_paren: 3917 self._match_r_paren(this) 3918 elif not self._match(TokenType.R_BRACKET, expression=this): 3919 self.raise_error("Expecting ]") 3920 else: 3921 this = self.expression(exp.In, this=this, field=self._parse_field()) 3922 3923 return this 3924 3925 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3926 low = self._parse_bitwise() 3927 self._match(TokenType.AND) 3928 high = self._parse_bitwise() 3929 return self.expression(exp.Between, this=this, low=low, high=high) 3930 3931 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3932 if not self._match(TokenType.ESCAPE): 3933 return this 3934 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3935 3936 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3937 index = self._index 3938 3939 if not self._match(TokenType.INTERVAL) and match_interval: 3940 return None 3941 3942 if self._match(TokenType.STRING, advance=False): 3943 this = self._parse_primary() 3944 else: 3945 this = self._parse_term() 3946 3947 if not this or ( 3948 isinstance(this, exp.Column) 3949 and not this.table 3950 and not this.this.quoted 3951 and this.name.upper() == "IS" 3952 ): 3953 self._retreat(index) 3954 return None 3955 3956 unit = self._parse_function() or ( 3957 not self._match(TokenType.ALIAS, advance=False) 3958 and self._parse_var(any_token=True, upper=True) 3959 ) 3960 3961 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3962 # each INTERVAL expression into this canonical form so it's easy to transpile 3963 if this and this.is_number: 3964 this = exp.Literal.string(this.name) 3965 elif this and this.is_string: 3966 parts = this.name.split() 3967 3968 if len(parts) == 2: 3969 if unit: 3970 # This is not actually a unit, it's something else (e.g. a "window side") 3971 unit = None 3972 self._retreat(self._index - 1) 3973 3974 this = exp.Literal.string(parts[0]) 3975 unit = self.expression(exp.Var, this=parts[1].upper()) 3976 3977 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 3978 unit = self.expression( 3979 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 3980 ) 3981 3982 return self.expression(exp.Interval, this=this, unit=unit) 3983 3984 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3985 this = self._parse_term() 3986 3987 while True: 3988 if self._match_set(self.BITWISE): 3989 this = self.expression( 3990 self.BITWISE[self._prev.token_type], 3991 this=this, 3992 expression=self._parse_term(), 3993 ) 3994 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3995 this = self.expression( 3996 exp.DPipe, 3997 this=this, 3998 expression=self._parse_term(), 3999 safe=not self.dialect.STRICT_STRING_CONCAT, 4000 ) 4001 elif self._match(TokenType.DQMARK): 4002 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4003 elif self._match_pair(TokenType.LT, TokenType.LT): 4004 this = self.expression( 4005 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4006 ) 4007 elif self._match_pair(TokenType.GT, TokenType.GT): 4008 this = self.expression( 4009 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4010 ) 4011 else: 4012 break 4013 4014 return this 4015 4016 def _parse_term(self) -> t.Optional[exp.Expression]: 4017 return self._parse_tokens(self._parse_factor, self.TERM) 4018 4019 def _parse_factor(self) -> t.Optional[exp.Expression]: 4020 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4021 this = parse_method() 4022 4023 while self._match_set(self.FACTOR): 4024 this = self.expression( 4025 self.FACTOR[self._prev.token_type], 4026 this=this, 4027 comments=self._prev_comments, 4028 expression=parse_method(), 4029 ) 4030 if isinstance(this, exp.Div): 4031 this.args["typed"] = self.dialect.TYPED_DIVISION 4032 this.args["safe"] = self.dialect.SAFE_DIVISION 4033 4034 return this 4035 4036 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4037 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4038 4039 def _parse_unary(self) -> t.Optional[exp.Expression]: 4040 if self._match_set(self.UNARY_PARSERS): 4041 return self.UNARY_PARSERS[self._prev.token_type](self) 4042 return self._parse_at_time_zone(self._parse_type()) 4043 4044 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 4045 interval = parse_interval and self._parse_interval() 4046 if interval: 4047 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4048 while True: 4049 index = self._index 4050 self._match(TokenType.PLUS) 4051 4052 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4053 self._retreat(index) 4054 break 4055 4056 interval = self.expression( # type: ignore 4057 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4058 ) 4059 4060 return interval 4061 4062 index = self._index 4063 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4064 this = self._parse_column() 4065 4066 if data_type: 4067 if isinstance(this, exp.Literal): 4068 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4069 if parser: 4070 return parser(self, this, data_type) 4071 return self.expression(exp.Cast, this=this, to=data_type) 4072 if not data_type.expressions: 4073 self._retreat(index) 4074 return self._parse_column() 4075 return self._parse_column_ops(data_type) 4076 4077 return this and self._parse_column_ops(this) 4078 4079 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4080 this = self._parse_type() 4081 if not this: 4082 return None 4083 4084 if isinstance(this, exp.Column) and not this.table: 4085 this = exp.var(this.name.upper()) 4086 4087 return self.expression( 4088 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4089 ) 4090 4091 def _parse_types( 4092 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4093 ) -> t.Optional[exp.Expression]: 4094 index = self._index 4095 4096 prefix = self._match_text_seq("SYSUDTLIB", ".") 4097 4098 if not self._match_set(self.TYPE_TOKENS): 4099 identifier = allow_identifiers and self._parse_id_var( 4100 any_token=False, tokens=(TokenType.VAR,) 4101 ) 4102 if identifier: 4103 tokens = self.dialect.tokenize(identifier.name) 4104 4105 if len(tokens) != 1: 4106 self.raise_error("Unexpected identifier", self._prev) 4107 4108 if tokens[0].token_type in self.TYPE_TOKENS: 4109 self._prev = tokens[0] 4110 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4111 type_name = identifier.name 4112 4113 while self._match(TokenType.DOT): 4114 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4115 4116 return exp.DataType.build(type_name, udt=True) 4117 else: 4118 self._retreat(self._index - 1) 4119 return None 4120 else: 4121 return None 4122 4123 type_token = self._prev.token_type 4124 4125 if type_token == TokenType.PSEUDO_TYPE: 4126 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4127 4128 if type_token == TokenType.OBJECT_IDENTIFIER: 4129 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4130 4131 nested = type_token in self.NESTED_TYPE_TOKENS 4132 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4133 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4134 expressions = None 4135 maybe_func = False 4136 4137 if self._match(TokenType.L_PAREN): 4138 if is_struct: 4139 expressions = self._parse_csv(self._parse_struct_types) 4140 elif nested: 4141 expressions = self._parse_csv( 4142 lambda: self._parse_types( 4143 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4144 ) 4145 ) 4146 elif type_token in self.ENUM_TYPE_TOKENS: 4147 expressions = self._parse_csv(self._parse_equality) 4148 elif is_aggregate: 4149 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4150 any_token=False, tokens=(TokenType.VAR,) 4151 ) 4152 if not func_or_ident or not self._match(TokenType.COMMA): 4153 return None 4154 expressions = self._parse_csv( 4155 lambda: self._parse_types( 4156 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4157 ) 4158 ) 4159 expressions.insert(0, func_or_ident) 4160 else: 4161 expressions = self._parse_csv(self._parse_type_size) 4162 4163 if not expressions or not self._match(TokenType.R_PAREN): 4164 self._retreat(index) 4165 return None 4166 4167 maybe_func = True 4168 4169 this: t.Optional[exp.Expression] = None 4170 values: t.Optional[t.List[exp.Expression]] = None 4171 4172 if nested and self._match(TokenType.LT): 4173 if is_struct: 4174 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4175 else: 4176 expressions = self._parse_csv( 4177 lambda: self._parse_types( 4178 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4179 ) 4180 ) 4181 4182 if not self._match(TokenType.GT): 4183 self.raise_error("Expecting >") 4184 4185 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4186 values = self._parse_csv(self._parse_conjunction) 4187 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4188 4189 if type_token in self.TIMESTAMPS: 4190 if self._match_text_seq("WITH", "TIME", "ZONE"): 4191 maybe_func = False 4192 tz_type = ( 4193 exp.DataType.Type.TIMETZ 4194 if type_token in self.TIMES 4195 else exp.DataType.Type.TIMESTAMPTZ 4196 ) 4197 this = exp.DataType(this=tz_type, expressions=expressions) 4198 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4199 maybe_func = False 4200 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4201 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4202 maybe_func = False 4203 elif type_token == TokenType.INTERVAL: 4204 unit = self._parse_var(upper=True) 4205 if unit: 4206 if self._match_text_seq("TO"): 4207 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4208 4209 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4210 else: 4211 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4212 4213 if maybe_func and check_func: 4214 index2 = self._index 4215 peek = self._parse_string() 4216 4217 if not peek: 4218 self._retreat(index) 4219 return None 4220 4221 self._retreat(index2) 4222 4223 if not this: 4224 if self._match_text_seq("UNSIGNED"): 4225 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4226 if not unsigned_type_token: 4227 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4228 4229 type_token = unsigned_type_token or type_token 4230 4231 this = exp.DataType( 4232 this=exp.DataType.Type[type_token.value], 4233 expressions=expressions, 4234 nested=nested, 4235 values=values, 4236 prefix=prefix, 4237 ) 4238 4239 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 4240 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 4241 4242 return this 4243 4244 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4245 index = self._index 4246 this = self._parse_type(parse_interval=False) or self._parse_id_var() 4247 self._match(TokenType.COLON) 4248 column_def = self._parse_column_def(this) 4249 4250 if type_required and ( 4251 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 4252 ): 4253 self._retreat(index) 4254 return self._parse_types() 4255 4256 return column_def 4257 4258 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4259 if not self._match_text_seq("AT", "TIME", "ZONE"): 4260 return this 4261 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4262 4263 def _parse_column(self) -> t.Optional[exp.Expression]: 4264 this = self._parse_column_reference() 4265 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4266 4267 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4268 this = self._parse_field() 4269 if ( 4270 not this 4271 and self._match(TokenType.VALUES, advance=False) 4272 and self.VALUES_FOLLOWED_BY_PAREN 4273 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4274 ): 4275 this = self._parse_id_var() 4276 4277 if isinstance(this, exp.Identifier): 4278 # We bubble up comments from the Identifier to the Column 4279 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4280 4281 return this 4282 4283 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4284 this = self._parse_bracket(this) 4285 4286 while self._match_set(self.COLUMN_OPERATORS): 4287 op_token = self._prev.token_type 4288 op = self.COLUMN_OPERATORS.get(op_token) 4289 4290 if op_token == TokenType.DCOLON: 4291 field = self._parse_types() 4292 if not field: 4293 self.raise_error("Expected type") 4294 elif op and self._curr: 4295 field = self._parse_column_reference() 4296 else: 4297 field = self._parse_field(any_token=True, anonymous_func=True) 4298 4299 if isinstance(field, exp.Func) and this: 4300 # bigquery allows function calls like x.y.count(...) 4301 # SAFE.SUBSTR(...) 4302 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4303 this = exp.replace_tree( 4304 this, 4305 lambda n: ( 4306 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4307 if n.table 4308 else n.this 4309 ) 4310 if isinstance(n, exp.Column) 4311 else n, 4312 ) 4313 4314 if op: 4315 this = op(self, this, field) 4316 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4317 this = self.expression( 4318 exp.Column, 4319 this=field, 4320 table=this.this, 4321 db=this.args.get("table"), 4322 catalog=this.args.get("db"), 4323 ) 4324 else: 4325 this = self.expression(exp.Dot, this=this, expression=field) 4326 this = self._parse_bracket(this) 4327 return this 4328 4329 def _parse_primary(self) -> t.Optional[exp.Expression]: 4330 if self._match_set(self.PRIMARY_PARSERS): 4331 token_type = self._prev.token_type 4332 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4333 4334 if token_type == TokenType.STRING: 4335 expressions = [primary] 4336 while self._match(TokenType.STRING): 4337 expressions.append(exp.Literal.string(self._prev.text)) 4338 4339 if len(expressions) > 1: 4340 return self.expression(exp.Concat, expressions=expressions) 4341 4342 return primary 4343 4344 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4345 return exp.Literal.number(f"0.{self._prev.text}") 4346 4347 if self._match(TokenType.L_PAREN): 4348 comments = self._prev_comments 4349 query = self._parse_select() 4350 4351 if query: 4352 expressions = [query] 4353 else: 4354 expressions = self._parse_expressions() 4355 4356 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4357 4358 if not this and self._match(TokenType.R_PAREN, advance=False): 4359 this = self.expression(exp.Tuple) 4360 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4361 this = self._parse_subquery(this=this, parse_alias=False) 4362 elif isinstance(this, exp.Subquery): 4363 this = self._parse_subquery( 4364 this=self._parse_set_operations(this), parse_alias=False 4365 ) 4366 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4367 this = self.expression(exp.Tuple, expressions=expressions) 4368 else: 4369 this = self.expression(exp.Paren, this=this) 4370 4371 if this: 4372 this.add_comments(comments) 4373 4374 self._match_r_paren(expression=this) 4375 return this 4376 4377 return None 4378 4379 def _parse_field( 4380 self, 4381 any_token: bool = False, 4382 tokens: t.Optional[t.Collection[TokenType]] = None, 4383 anonymous_func: bool = False, 4384 ) -> t.Optional[exp.Expression]: 4385 if anonymous_func: 4386 field = ( 4387 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4388 or self._parse_primary() 4389 ) 4390 else: 4391 field = self._parse_primary() or self._parse_function( 4392 anonymous=anonymous_func, any_token=any_token 4393 ) 4394 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4395 4396 def _parse_function( 4397 self, 4398 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4399 anonymous: bool = False, 4400 optional_parens: bool = True, 4401 any_token: bool = False, 4402 ) -> t.Optional[exp.Expression]: 4403 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4404 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4405 fn_syntax = False 4406 if ( 4407 self._match(TokenType.L_BRACE, advance=False) 4408 and self._next 4409 and self._next.text.upper() == "FN" 4410 ): 4411 self._advance(2) 4412 fn_syntax = True 4413 4414 func = self._parse_function_call( 4415 functions=functions, 4416 anonymous=anonymous, 4417 optional_parens=optional_parens, 4418 any_token=any_token, 4419 ) 4420 4421 if fn_syntax: 4422 self._match(TokenType.R_BRACE) 4423 4424 return func 4425 4426 def _parse_function_call( 4427 self, 4428 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4429 anonymous: bool = False, 4430 optional_parens: bool = True, 4431 any_token: bool = False, 4432 ) -> t.Optional[exp.Expression]: 4433 if not self._curr: 4434 return None 4435 4436 comments = self._curr.comments 4437 token_type = self._curr.token_type 4438 this = self._curr.text 4439 upper = this.upper() 4440 4441 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4442 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4443 self._advance() 4444 return self._parse_window(parser(self)) 4445 4446 if not self._next or self._next.token_type != TokenType.L_PAREN: 4447 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4448 self._advance() 4449 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4450 4451 return None 4452 4453 if any_token: 4454 if token_type in self.RESERVED_TOKENS: 4455 return None 4456 elif token_type not in self.FUNC_TOKENS: 4457 return None 4458 4459 self._advance(2) 4460 4461 parser = self.FUNCTION_PARSERS.get(upper) 4462 if parser and not anonymous: 4463 this = parser(self) 4464 else: 4465 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4466 4467 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4468 this = self.expression(subquery_predicate, this=self._parse_select()) 4469 self._match_r_paren() 4470 return this 4471 4472 if functions is None: 4473 functions = self.FUNCTIONS 4474 4475 function = functions.get(upper) 4476 4477 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4478 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4479 4480 if alias: 4481 args = self._kv_to_prop_eq(args) 4482 4483 if function and not anonymous: 4484 if "dialect" in function.__code__.co_varnames: 4485 func = function(args, dialect=self.dialect) 4486 else: 4487 func = function(args) 4488 4489 func = self.validate_expression(func, args) 4490 if not self.dialect.NORMALIZE_FUNCTIONS: 4491 func.meta["name"] = this 4492 4493 this = func 4494 else: 4495 if token_type == TokenType.IDENTIFIER: 4496 this = exp.Identifier(this=this, quoted=True) 4497 this = self.expression(exp.Anonymous, this=this, expressions=args) 4498 4499 if isinstance(this, exp.Expression): 4500 this.add_comments(comments) 4501 4502 self._match_r_paren(this) 4503 return self._parse_window(this) 4504 4505 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4506 transformed = [] 4507 4508 for e in expressions: 4509 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4510 if isinstance(e, exp.Alias): 4511 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4512 4513 if not isinstance(e, exp.PropertyEQ): 4514 e = self.expression( 4515 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4516 ) 4517 4518 if isinstance(e.this, exp.Column): 4519 e.this.replace(e.this.this) 4520 4521 transformed.append(e) 4522 4523 return transformed 4524 4525 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4526 return self._parse_column_def(self._parse_id_var()) 4527 4528 def _parse_user_defined_function( 4529 self, kind: t.Optional[TokenType] = None 4530 ) -> t.Optional[exp.Expression]: 4531 this = self._parse_id_var() 4532 4533 while self._match(TokenType.DOT): 4534 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4535 4536 if not self._match(TokenType.L_PAREN): 4537 return this 4538 4539 expressions = self._parse_csv(self._parse_function_parameter) 4540 self._match_r_paren() 4541 return self.expression( 4542 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4543 ) 4544 4545 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4546 literal = self._parse_primary() 4547 if literal: 4548 return self.expression(exp.Introducer, this=token.text, expression=literal) 4549 4550 return self.expression(exp.Identifier, this=token.text) 4551 4552 def _parse_session_parameter(self) -> exp.SessionParameter: 4553 kind = None 4554 this = self._parse_id_var() or self._parse_primary() 4555 4556 if this and self._match(TokenType.DOT): 4557 kind = this.name 4558 this = self._parse_var() or self._parse_primary() 4559 4560 return self.expression(exp.SessionParameter, this=this, kind=kind) 4561 4562 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4563 index = self._index 4564 4565 if self._match(TokenType.L_PAREN): 4566 expressions = t.cast( 4567 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4568 ) 4569 4570 if not self._match(TokenType.R_PAREN): 4571 self._retreat(index) 4572 else: 4573 expressions = [self._parse_id_var()] 4574 4575 if self._match_set(self.LAMBDAS): 4576 return self.LAMBDAS[self._prev.token_type](self, expressions) 4577 4578 self._retreat(index) 4579 4580 this: t.Optional[exp.Expression] 4581 4582 if self._match(TokenType.DISTINCT): 4583 this = self.expression( 4584 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4585 ) 4586 else: 4587 this = self._parse_select_or_expression(alias=alias) 4588 4589 return self._parse_limit( 4590 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4591 ) 4592 4593 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4594 index = self._index 4595 if not self._match(TokenType.L_PAREN): 4596 return this 4597 4598 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4599 # expr can be of both types 4600 if self._match_set(self.SELECT_START_TOKENS): 4601 self._retreat(index) 4602 return this 4603 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4604 self._match_r_paren() 4605 return self.expression(exp.Schema, this=this, expressions=args) 4606 4607 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4608 return self._parse_column_def(self._parse_field(any_token=True)) 4609 4610 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4611 # column defs are not really columns, they're identifiers 4612 if isinstance(this, exp.Column): 4613 this = this.this 4614 4615 kind = self._parse_types(schema=True) 4616 4617 if self._match_text_seq("FOR", "ORDINALITY"): 4618 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4619 4620 constraints: t.List[exp.Expression] = [] 4621 4622 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4623 ("ALIAS", "MATERIALIZED") 4624 ): 4625 persisted = self._prev.text.upper() == "MATERIALIZED" 4626 constraints.append( 4627 self.expression( 4628 exp.ComputedColumnConstraint, 4629 this=self._parse_conjunction(), 4630 persisted=persisted or self._match_text_seq("PERSISTED"), 4631 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4632 ) 4633 ) 4634 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4635 self._match(TokenType.ALIAS) 4636 constraints.append( 4637 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4638 ) 4639 4640 while True: 4641 constraint = self._parse_column_constraint() 4642 if not constraint: 4643 break 4644 constraints.append(constraint) 4645 4646 if not kind and not constraints: 4647 return this 4648 4649 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4650 4651 def _parse_auto_increment( 4652 self, 4653 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4654 start = None 4655 increment = None 4656 4657 if self._match(TokenType.L_PAREN, advance=False): 4658 args = self._parse_wrapped_csv(self._parse_bitwise) 4659 start = seq_get(args, 0) 4660 increment = seq_get(args, 1) 4661 elif self._match_text_seq("START"): 4662 start = self._parse_bitwise() 4663 self._match_text_seq("INCREMENT") 4664 increment = self._parse_bitwise() 4665 4666 if start and increment: 4667 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4668 4669 return exp.AutoIncrementColumnConstraint() 4670 4671 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4672 if not self._match_text_seq("REFRESH"): 4673 self._retreat(self._index - 1) 4674 return None 4675 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4676 4677 def _parse_compress(self) -> exp.CompressColumnConstraint: 4678 if self._match(TokenType.L_PAREN, advance=False): 4679 return self.expression( 4680 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4681 ) 4682 4683 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4684 4685 def _parse_generated_as_identity( 4686 self, 4687 ) -> ( 4688 exp.GeneratedAsIdentityColumnConstraint 4689 | exp.ComputedColumnConstraint 4690 | exp.GeneratedAsRowColumnConstraint 4691 ): 4692 if self._match_text_seq("BY", "DEFAULT"): 4693 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4694 this = self.expression( 4695 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4696 ) 4697 else: 4698 self._match_text_seq("ALWAYS") 4699 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4700 4701 self._match(TokenType.ALIAS) 4702 4703 if self._match_text_seq("ROW"): 4704 start = self._match_text_seq("START") 4705 if not start: 4706 self._match(TokenType.END) 4707 hidden = self._match_text_seq("HIDDEN") 4708 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4709 4710 identity = self._match_text_seq("IDENTITY") 4711 4712 if self._match(TokenType.L_PAREN): 4713 if self._match(TokenType.START_WITH): 4714 this.set("start", self._parse_bitwise()) 4715 if self._match_text_seq("INCREMENT", "BY"): 4716 this.set("increment", self._parse_bitwise()) 4717 if self._match_text_seq("MINVALUE"): 4718 this.set("minvalue", self._parse_bitwise()) 4719 if self._match_text_seq("MAXVALUE"): 4720 this.set("maxvalue", self._parse_bitwise()) 4721 4722 if self._match_text_seq("CYCLE"): 4723 this.set("cycle", True) 4724 elif self._match_text_seq("NO", "CYCLE"): 4725 this.set("cycle", False) 4726 4727 if not identity: 4728 this.set("expression", self._parse_range()) 4729 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4730 args = self._parse_csv(self._parse_bitwise) 4731 this.set("start", seq_get(args, 0)) 4732 this.set("increment", seq_get(args, 1)) 4733 4734 self._match_r_paren() 4735 4736 return this 4737 4738 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4739 self._match_text_seq("LENGTH") 4740 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4741 4742 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4743 if self._match_text_seq("NULL"): 4744 return self.expression(exp.NotNullColumnConstraint) 4745 if self._match_text_seq("CASESPECIFIC"): 4746 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4747 if self._match_text_seq("FOR", "REPLICATION"): 4748 return self.expression(exp.NotForReplicationColumnConstraint) 4749 return None 4750 4751 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4752 if self._match(TokenType.CONSTRAINT): 4753 this = self._parse_id_var() 4754 else: 4755 this = None 4756 4757 if self._match_texts(self.CONSTRAINT_PARSERS): 4758 return self.expression( 4759 exp.ColumnConstraint, 4760 this=this, 4761 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4762 ) 4763 4764 return this 4765 4766 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4767 if not self._match(TokenType.CONSTRAINT): 4768 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4769 4770 return self.expression( 4771 exp.Constraint, 4772 this=self._parse_id_var(), 4773 expressions=self._parse_unnamed_constraints(), 4774 ) 4775 4776 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4777 constraints = [] 4778 while True: 4779 constraint = self._parse_unnamed_constraint() or self._parse_function() 4780 if not constraint: 4781 break 4782 constraints.append(constraint) 4783 4784 return constraints 4785 4786 def _parse_unnamed_constraint( 4787 self, constraints: t.Optional[t.Collection[str]] = None 4788 ) -> t.Optional[exp.Expression]: 4789 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4790 constraints or self.CONSTRAINT_PARSERS 4791 ): 4792 return None 4793 4794 constraint = self._prev.text.upper() 4795 if constraint not in self.CONSTRAINT_PARSERS: 4796 self.raise_error(f"No parser found for schema constraint {constraint}.") 4797 4798 return self.CONSTRAINT_PARSERS[constraint](self) 4799 4800 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4801 self._match_text_seq("KEY") 4802 return self.expression( 4803 exp.UniqueColumnConstraint, 4804 this=self._parse_schema(self._parse_id_var(any_token=False)), 4805 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4806 on_conflict=self._parse_on_conflict(), 4807 ) 4808 4809 def _parse_key_constraint_options(self) -> t.List[str]: 4810 options = [] 4811 while True: 4812 if not self._curr: 4813 break 4814 4815 if self._match(TokenType.ON): 4816 action = None 4817 on = self._advance_any() and self._prev.text 4818 4819 if self._match_text_seq("NO", "ACTION"): 4820 action = "NO ACTION" 4821 elif self._match_text_seq("CASCADE"): 4822 action = "CASCADE" 4823 elif self._match_text_seq("RESTRICT"): 4824 action = "RESTRICT" 4825 elif self._match_pair(TokenType.SET, TokenType.NULL): 4826 action = "SET NULL" 4827 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4828 action = "SET DEFAULT" 4829 else: 4830 self.raise_error("Invalid key constraint") 4831 4832 options.append(f"ON {on} {action}") 4833 elif self._match_text_seq("NOT", "ENFORCED"): 4834 options.append("NOT ENFORCED") 4835 elif self._match_text_seq("DEFERRABLE"): 4836 options.append("DEFERRABLE") 4837 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4838 options.append("INITIALLY DEFERRED") 4839 elif self._match_text_seq("NORELY"): 4840 options.append("NORELY") 4841 elif self._match_text_seq("MATCH", "FULL"): 4842 options.append("MATCH FULL") 4843 else: 4844 break 4845 4846 return options 4847 4848 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4849 if match and not self._match(TokenType.REFERENCES): 4850 return None 4851 4852 expressions = None 4853 this = self._parse_table(schema=True) 4854 options = self._parse_key_constraint_options() 4855 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4856 4857 def _parse_foreign_key(self) -> exp.ForeignKey: 4858 expressions = self._parse_wrapped_id_vars() 4859 reference = self._parse_references() 4860 options = {} 4861 4862 while self._match(TokenType.ON): 4863 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4864 self.raise_error("Expected DELETE or UPDATE") 4865 4866 kind = self._prev.text.lower() 4867 4868 if self._match_text_seq("NO", "ACTION"): 4869 action = "NO ACTION" 4870 elif self._match(TokenType.SET): 4871 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4872 action = "SET " + self._prev.text.upper() 4873 else: 4874 self._advance() 4875 action = self._prev.text.upper() 4876 4877 options[kind] = action 4878 4879 return self.expression( 4880 exp.ForeignKey, 4881 expressions=expressions, 4882 reference=reference, 4883 **options, # type: ignore 4884 ) 4885 4886 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4887 return self._parse_field() 4888 4889 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4890 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4891 self._retreat(self._index - 1) 4892 return None 4893 4894 id_vars = self._parse_wrapped_id_vars() 4895 return self.expression( 4896 exp.PeriodForSystemTimeConstraint, 4897 this=seq_get(id_vars, 0), 4898 expression=seq_get(id_vars, 1), 4899 ) 4900 4901 def _parse_primary_key( 4902 self, wrapped_optional: bool = False, in_props: bool = False 4903 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4904 desc = ( 4905 self._match_set((TokenType.ASC, TokenType.DESC)) 4906 and self._prev.token_type == TokenType.DESC 4907 ) 4908 4909 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4910 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4911 4912 expressions = self._parse_wrapped_csv( 4913 self._parse_primary_key_part, optional=wrapped_optional 4914 ) 4915 options = self._parse_key_constraint_options() 4916 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4917 4918 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4919 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4920 4921 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4922 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4923 return this 4924 4925 bracket_kind = self._prev.token_type 4926 expressions = self._parse_csv( 4927 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4928 ) 4929 4930 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 4931 self.raise_error("Expected ]") 4932 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 4933 self.raise_error("Expected }") 4934 4935 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4936 if bracket_kind == TokenType.L_BRACE: 4937 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 4938 elif not this or this.name.upper() == "ARRAY": 4939 this = self.expression(exp.Array, expressions=expressions) 4940 else: 4941 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4942 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4943 4944 self._add_comments(this) 4945 return self._parse_bracket(this) 4946 4947 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4948 if self._match(TokenType.COLON): 4949 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4950 return this 4951 4952 def _parse_case(self) -> t.Optional[exp.Expression]: 4953 ifs = [] 4954 default = None 4955 4956 comments = self._prev_comments 4957 expression = self._parse_conjunction() 4958 4959 while self._match(TokenType.WHEN): 4960 this = self._parse_conjunction() 4961 self._match(TokenType.THEN) 4962 then = self._parse_conjunction() 4963 ifs.append(self.expression(exp.If, this=this, true=then)) 4964 4965 if self._match(TokenType.ELSE): 4966 default = self._parse_conjunction() 4967 4968 if not self._match(TokenType.END): 4969 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4970 default = exp.column("interval") 4971 else: 4972 self.raise_error("Expected END after CASE", self._prev) 4973 4974 return self.expression( 4975 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 4976 ) 4977 4978 def _parse_if(self) -> t.Optional[exp.Expression]: 4979 if self._match(TokenType.L_PAREN): 4980 args = self._parse_csv(self._parse_conjunction) 4981 this = self.validate_expression(exp.If.from_arg_list(args), args) 4982 self._match_r_paren() 4983 else: 4984 index = self._index - 1 4985 4986 if self.NO_PAREN_IF_COMMANDS and index == 0: 4987 return self._parse_as_command(self._prev) 4988 4989 condition = self._parse_conjunction() 4990 4991 if not condition: 4992 self._retreat(index) 4993 return None 4994 4995 self._match(TokenType.THEN) 4996 true = self._parse_conjunction() 4997 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4998 self._match(TokenType.END) 4999 this = self.expression(exp.If, this=condition, true=true, false=false) 5000 5001 return this 5002 5003 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5004 if not self._match_text_seq("VALUE", "FOR"): 5005 self._retreat(self._index - 1) 5006 return None 5007 5008 return self.expression( 5009 exp.NextValueFor, 5010 this=self._parse_column(), 5011 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5012 ) 5013 5014 def _parse_extract(self) -> exp.Extract: 5015 this = self._parse_function() or self._parse_var() or self._parse_type() 5016 5017 if self._match(TokenType.FROM): 5018 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5019 5020 if not self._match(TokenType.COMMA): 5021 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5022 5023 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5024 5025 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5026 this = self._parse_conjunction() 5027 5028 if not self._match(TokenType.ALIAS): 5029 if self._match(TokenType.COMMA): 5030 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5031 5032 self.raise_error("Expected AS after CAST") 5033 5034 fmt = None 5035 to = self._parse_types() 5036 5037 if self._match(TokenType.FORMAT): 5038 fmt_string = self._parse_string() 5039 fmt = self._parse_at_time_zone(fmt_string) 5040 5041 if not to: 5042 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5043 if to.this in exp.DataType.TEMPORAL_TYPES: 5044 this = self.expression( 5045 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5046 this=this, 5047 format=exp.Literal.string( 5048 format_time( 5049 fmt_string.this if fmt_string else "", 5050 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5051 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5052 ) 5053 ), 5054 ) 5055 5056 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5057 this.set("zone", fmt.args["zone"]) 5058 return this 5059 elif not to: 5060 self.raise_error("Expected TYPE after CAST") 5061 elif isinstance(to, exp.Identifier): 5062 to = exp.DataType.build(to.name, udt=True) 5063 elif to.this == exp.DataType.Type.CHAR: 5064 if self._match(TokenType.CHARACTER_SET): 5065 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5066 5067 return self.expression( 5068 exp.Cast if strict else exp.TryCast, 5069 this=this, 5070 to=to, 5071 format=fmt, 5072 safe=safe, 5073 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5074 ) 5075 5076 def _parse_string_agg(self) -> exp.Expression: 5077 if self._match(TokenType.DISTINCT): 5078 args: t.List[t.Optional[exp.Expression]] = [ 5079 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 5080 ] 5081 if self._match(TokenType.COMMA): 5082 args.extend(self._parse_csv(self._parse_conjunction)) 5083 else: 5084 args = self._parse_csv(self._parse_conjunction) # type: ignore 5085 5086 index = self._index 5087 if not self._match(TokenType.R_PAREN) and args: 5088 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5089 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5090 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5091 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5092 5093 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5094 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5095 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5096 if not self._match_text_seq("WITHIN", "GROUP"): 5097 self._retreat(index) 5098 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5099 5100 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5101 order = self._parse_order(this=seq_get(args, 0)) 5102 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5103 5104 def _parse_convert( 5105 self, strict: bool, safe: t.Optional[bool] = None 5106 ) -> t.Optional[exp.Expression]: 5107 this = self._parse_bitwise() 5108 5109 if self._match(TokenType.USING): 5110 to: t.Optional[exp.Expression] = self.expression( 5111 exp.CharacterSet, this=self._parse_var() 5112 ) 5113 elif self._match(TokenType.COMMA): 5114 to = self._parse_types() 5115 else: 5116 to = None 5117 5118 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5119 5120 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5121 """ 5122 There are generally two variants of the DECODE function: 5123 5124 - DECODE(bin, charset) 5125 - DECODE(expression, search, result [, search, result] ... [, default]) 5126 5127 The second variant will always be parsed into a CASE expression. Note that NULL 5128 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5129 instead of relying on pattern matching. 5130 """ 5131 args = self._parse_csv(self._parse_conjunction) 5132 5133 if len(args) < 3: 5134 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5135 5136 expression, *expressions = args 5137 if not expression: 5138 return None 5139 5140 ifs = [] 5141 for search, result in zip(expressions[::2], expressions[1::2]): 5142 if not search or not result: 5143 return None 5144 5145 if isinstance(search, exp.Literal): 5146 ifs.append( 5147 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5148 ) 5149 elif isinstance(search, exp.Null): 5150 ifs.append( 5151 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5152 ) 5153 else: 5154 cond = exp.or_( 5155 exp.EQ(this=expression.copy(), expression=search), 5156 exp.and_( 5157 exp.Is(this=expression.copy(), expression=exp.Null()), 5158 exp.Is(this=search.copy(), expression=exp.Null()), 5159 copy=False, 5160 ), 5161 copy=False, 5162 ) 5163 ifs.append(exp.If(this=cond, true=result)) 5164 5165 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5166 5167 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5168 self._match_text_seq("KEY") 5169 key = self._parse_column() 5170 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5171 self._match_text_seq("VALUE") 5172 value = self._parse_bitwise() 5173 5174 if not key and not value: 5175 return None 5176 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5177 5178 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5179 if not this or not self._match_text_seq("FORMAT", "JSON"): 5180 return this 5181 5182 return self.expression(exp.FormatJson, this=this) 5183 5184 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5185 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5186 for value in values: 5187 if self._match_text_seq(value, "ON", on): 5188 return f"{value} ON {on}" 5189 5190 return None 5191 5192 @t.overload 5193 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5194 5195 @t.overload 5196 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5197 5198 def _parse_json_object(self, agg=False): 5199 star = self._parse_star() 5200 expressions = ( 5201 [star] 5202 if star 5203 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5204 ) 5205 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5206 5207 unique_keys = None 5208 if self._match_text_seq("WITH", "UNIQUE"): 5209 unique_keys = True 5210 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5211 unique_keys = False 5212 5213 self._match_text_seq("KEYS") 5214 5215 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5216 self._parse_type() 5217 ) 5218 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5219 5220 return self.expression( 5221 exp.JSONObjectAgg if agg else exp.JSONObject, 5222 expressions=expressions, 5223 null_handling=null_handling, 5224 unique_keys=unique_keys, 5225 return_type=return_type, 5226 encoding=encoding, 5227 ) 5228 5229 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5230 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5231 if not self._match_text_seq("NESTED"): 5232 this = self._parse_id_var() 5233 kind = self._parse_types(allow_identifiers=False) 5234 nested = None 5235 else: 5236 this = None 5237 kind = None 5238 nested = True 5239 5240 path = self._match_text_seq("PATH") and self._parse_string() 5241 nested_schema = nested and self._parse_json_schema() 5242 5243 return self.expression( 5244 exp.JSONColumnDef, 5245 this=this, 5246 kind=kind, 5247 path=path, 5248 nested_schema=nested_schema, 5249 ) 5250 5251 def _parse_json_schema(self) -> exp.JSONSchema: 5252 self._match_text_seq("COLUMNS") 5253 return self.expression( 5254 exp.JSONSchema, 5255 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5256 ) 5257 5258 def _parse_json_table(self) -> exp.JSONTable: 5259 this = self._parse_format_json(self._parse_bitwise()) 5260 path = self._match(TokenType.COMMA) and self._parse_string() 5261 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5262 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5263 schema = self._parse_json_schema() 5264 5265 return exp.JSONTable( 5266 this=this, 5267 schema=schema, 5268 path=path, 5269 error_handling=error_handling, 5270 empty_handling=empty_handling, 5271 ) 5272 5273 def _parse_match_against(self) -> exp.MatchAgainst: 5274 expressions = self._parse_csv(self._parse_column) 5275 5276 self._match_text_seq(")", "AGAINST", "(") 5277 5278 this = self._parse_string() 5279 5280 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5281 modifier = "IN NATURAL LANGUAGE MODE" 5282 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5283 modifier = f"{modifier} WITH QUERY EXPANSION" 5284 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5285 modifier = "IN BOOLEAN MODE" 5286 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5287 modifier = "WITH QUERY EXPANSION" 5288 else: 5289 modifier = None 5290 5291 return self.expression( 5292 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5293 ) 5294 5295 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5296 def _parse_open_json(self) -> exp.OpenJSON: 5297 this = self._parse_bitwise() 5298 path = self._match(TokenType.COMMA) and self._parse_string() 5299 5300 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5301 this = self._parse_field(any_token=True) 5302 kind = self._parse_types() 5303 path = self._parse_string() 5304 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5305 5306 return self.expression( 5307 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5308 ) 5309 5310 expressions = None 5311 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5312 self._match_l_paren() 5313 expressions = self._parse_csv(_parse_open_json_column_def) 5314 5315 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5316 5317 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5318 args = self._parse_csv(self._parse_bitwise) 5319 5320 if self._match(TokenType.IN): 5321 return self.expression( 5322 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5323 ) 5324 5325 if haystack_first: 5326 haystack = seq_get(args, 0) 5327 needle = seq_get(args, 1) 5328 else: 5329 needle = seq_get(args, 0) 5330 haystack = seq_get(args, 1) 5331 5332 return self.expression( 5333 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5334 ) 5335 5336 def _parse_predict(self) -> exp.Predict: 5337 self._match_text_seq("MODEL") 5338 this = self._parse_table() 5339 5340 self._match(TokenType.COMMA) 5341 self._match_text_seq("TABLE") 5342 5343 return self.expression( 5344 exp.Predict, 5345 this=this, 5346 expression=self._parse_table(), 5347 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5348 ) 5349 5350 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5351 args = self._parse_csv(self._parse_table) 5352 return exp.JoinHint(this=func_name.upper(), expressions=args) 5353 5354 def _parse_substring(self) -> exp.Substring: 5355 # Postgres supports the form: substring(string [from int] [for int]) 5356 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5357 5358 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5359 5360 if self._match(TokenType.FROM): 5361 args.append(self._parse_bitwise()) 5362 if self._match(TokenType.FOR): 5363 if len(args) == 1: 5364 args.append(exp.Literal.number(1)) 5365 args.append(self._parse_bitwise()) 5366 5367 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5368 5369 def _parse_trim(self) -> exp.Trim: 5370 # https://www.w3resource.com/sql/character-functions/trim.php 5371 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5372 5373 position = None 5374 collation = None 5375 expression = None 5376 5377 if self._match_texts(self.TRIM_TYPES): 5378 position = self._prev.text.upper() 5379 5380 this = self._parse_bitwise() 5381 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5382 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5383 expression = self._parse_bitwise() 5384 5385 if invert_order: 5386 this, expression = expression, this 5387 5388 if self._match(TokenType.COLLATE): 5389 collation = self._parse_bitwise() 5390 5391 return self.expression( 5392 exp.Trim, this=this, position=position, expression=expression, collation=collation 5393 ) 5394 5395 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5396 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5397 5398 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5399 return self._parse_window(self._parse_id_var(), alias=True) 5400 5401 def _parse_respect_or_ignore_nulls( 5402 self, this: t.Optional[exp.Expression] 5403 ) -> t.Optional[exp.Expression]: 5404 if self._match_text_seq("IGNORE", "NULLS"): 5405 return self.expression(exp.IgnoreNulls, this=this) 5406 if self._match_text_seq("RESPECT", "NULLS"): 5407 return self.expression(exp.RespectNulls, this=this) 5408 return this 5409 5410 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5411 if self._match(TokenType.HAVING): 5412 self._match_texts(("MAX", "MIN")) 5413 max = self._prev.text.upper() != "MIN" 5414 return self.expression( 5415 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5416 ) 5417 5418 return this 5419 5420 def _parse_window( 5421 self, this: t.Optional[exp.Expression], alias: bool = False 5422 ) -> t.Optional[exp.Expression]: 5423 func = this 5424 comments = func.comments if isinstance(func, exp.Expression) else None 5425 5426 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5427 self._match(TokenType.WHERE) 5428 this = self.expression( 5429 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5430 ) 5431 self._match_r_paren() 5432 5433 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5434 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5435 if self._match_text_seq("WITHIN", "GROUP"): 5436 order = self._parse_wrapped(self._parse_order) 5437 this = self.expression(exp.WithinGroup, this=this, expression=order) 5438 5439 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5440 # Some dialects choose to implement and some do not. 5441 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5442 5443 # There is some code above in _parse_lambda that handles 5444 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5445 5446 # The below changes handle 5447 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5448 5449 # Oracle allows both formats 5450 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5451 # and Snowflake chose to do the same for familiarity 5452 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5453 if isinstance(this, exp.AggFunc): 5454 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5455 5456 if ignore_respect and ignore_respect is not this: 5457 ignore_respect.replace(ignore_respect.this) 5458 this = self.expression(ignore_respect.__class__, this=this) 5459 5460 this = self._parse_respect_or_ignore_nulls(this) 5461 5462 # bigquery select from window x AS (partition by ...) 5463 if alias: 5464 over = None 5465 self._match(TokenType.ALIAS) 5466 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5467 return this 5468 else: 5469 over = self._prev.text.upper() 5470 5471 if comments and isinstance(func, exp.Expression): 5472 func.pop_comments() 5473 5474 if not self._match(TokenType.L_PAREN): 5475 return self.expression( 5476 exp.Window, 5477 comments=comments, 5478 this=this, 5479 alias=self._parse_id_var(False), 5480 over=over, 5481 ) 5482 5483 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5484 5485 first = self._match(TokenType.FIRST) 5486 if self._match_text_seq("LAST"): 5487 first = False 5488 5489 partition, order = self._parse_partition_and_order() 5490 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5491 5492 if kind: 5493 self._match(TokenType.BETWEEN) 5494 start = self._parse_window_spec() 5495 self._match(TokenType.AND) 5496 end = self._parse_window_spec() 5497 5498 spec = self.expression( 5499 exp.WindowSpec, 5500 kind=kind, 5501 start=start["value"], 5502 start_side=start["side"], 5503 end=end["value"], 5504 end_side=end["side"], 5505 ) 5506 else: 5507 spec = None 5508 5509 self._match_r_paren() 5510 5511 window = self.expression( 5512 exp.Window, 5513 comments=comments, 5514 this=this, 5515 partition_by=partition, 5516 order=order, 5517 spec=spec, 5518 alias=window_alias, 5519 over=over, 5520 first=first, 5521 ) 5522 5523 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5524 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5525 return self._parse_window(window, alias=alias) 5526 5527 return window 5528 5529 def _parse_partition_and_order( 5530 self, 5531 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5532 return self._parse_partition_by(), self._parse_order() 5533 5534 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5535 self._match(TokenType.BETWEEN) 5536 5537 return { 5538 "value": ( 5539 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5540 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5541 or self._parse_bitwise() 5542 ), 5543 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5544 } 5545 5546 def _parse_alias( 5547 self, this: t.Optional[exp.Expression], explicit: bool = False 5548 ) -> t.Optional[exp.Expression]: 5549 any_token = self._match(TokenType.ALIAS) 5550 comments = self._prev_comments or [] 5551 5552 if explicit and not any_token: 5553 return this 5554 5555 if self._match(TokenType.L_PAREN): 5556 aliases = self.expression( 5557 exp.Aliases, 5558 comments=comments, 5559 this=this, 5560 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5561 ) 5562 self._match_r_paren(aliases) 5563 return aliases 5564 5565 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5566 self.STRING_ALIASES and self._parse_string_as_identifier() 5567 ) 5568 5569 if alias: 5570 comments.extend(alias.pop_comments()) 5571 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5572 column = this.this 5573 5574 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5575 if not this.comments and column and column.comments: 5576 this.comments = column.pop_comments() 5577 5578 return this 5579 5580 def _parse_id_var( 5581 self, 5582 any_token: bool = True, 5583 tokens: t.Optional[t.Collection[TokenType]] = None, 5584 ) -> t.Optional[exp.Expression]: 5585 expression = self._parse_identifier() 5586 if not expression and ( 5587 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5588 ): 5589 quoted = self._prev.token_type == TokenType.STRING 5590 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5591 5592 return expression 5593 5594 def _parse_string(self) -> t.Optional[exp.Expression]: 5595 if self._match_set(self.STRING_PARSERS): 5596 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5597 return self._parse_placeholder() 5598 5599 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5600 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5601 5602 def _parse_number(self) -> t.Optional[exp.Expression]: 5603 if self._match_set(self.NUMERIC_PARSERS): 5604 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5605 return self._parse_placeholder() 5606 5607 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5608 if self._match(TokenType.IDENTIFIER): 5609 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5610 return self._parse_placeholder() 5611 5612 def _parse_var( 5613 self, 5614 any_token: bool = False, 5615 tokens: t.Optional[t.Collection[TokenType]] = None, 5616 upper: bool = False, 5617 ) -> t.Optional[exp.Expression]: 5618 if ( 5619 (any_token and self._advance_any()) 5620 or self._match(TokenType.VAR) 5621 or (self._match_set(tokens) if tokens else False) 5622 ): 5623 return self.expression( 5624 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5625 ) 5626 return self._parse_placeholder() 5627 5628 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5629 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5630 self._advance() 5631 return self._prev 5632 return None 5633 5634 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5635 return self._parse_var() or self._parse_string() 5636 5637 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5638 return self._parse_primary() or self._parse_var(any_token=True) 5639 5640 def _parse_null(self) -> t.Optional[exp.Expression]: 5641 if self._match_set(self.NULL_TOKENS): 5642 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5643 return self._parse_placeholder() 5644 5645 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5646 if self._match(TokenType.TRUE): 5647 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5648 if self._match(TokenType.FALSE): 5649 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5650 return self._parse_placeholder() 5651 5652 def _parse_star(self) -> t.Optional[exp.Expression]: 5653 if self._match(TokenType.STAR): 5654 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5655 return self._parse_placeholder() 5656 5657 def _parse_parameter(self) -> exp.Parameter: 5658 this = self._parse_identifier() or self._parse_primary_or_var() 5659 return self.expression(exp.Parameter, this=this) 5660 5661 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5662 if self._match_set(self.PLACEHOLDER_PARSERS): 5663 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5664 if placeholder: 5665 return placeholder 5666 self._advance(-1) 5667 return None 5668 5669 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5670 if not self._match(TokenType.EXCEPT): 5671 return None 5672 if self._match(TokenType.L_PAREN, advance=False): 5673 return self._parse_wrapped_csv(self._parse_column) 5674 5675 except_column = self._parse_column() 5676 return [except_column] if except_column else None 5677 5678 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5679 if not self._match(TokenType.REPLACE): 5680 return None 5681 if self._match(TokenType.L_PAREN, advance=False): 5682 return self._parse_wrapped_csv(self._parse_expression) 5683 5684 replace_expression = self._parse_expression() 5685 return [replace_expression] if replace_expression else None 5686 5687 def _parse_csv( 5688 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5689 ) -> t.List[exp.Expression]: 5690 parse_result = parse_method() 5691 items = [parse_result] if parse_result is not None else [] 5692 5693 while self._match(sep): 5694 self._add_comments(parse_result) 5695 parse_result = parse_method() 5696 if parse_result is not None: 5697 items.append(parse_result) 5698 5699 return items 5700 5701 def _parse_tokens( 5702 self, parse_method: t.Callable, expressions: t.Dict 5703 ) -> t.Optional[exp.Expression]: 5704 this = parse_method() 5705 5706 while self._match_set(expressions): 5707 this = self.expression( 5708 expressions[self._prev.token_type], 5709 this=this, 5710 comments=self._prev_comments, 5711 expression=parse_method(), 5712 ) 5713 5714 return this 5715 5716 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5717 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5718 5719 def _parse_wrapped_csv( 5720 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5721 ) -> t.List[exp.Expression]: 5722 return self._parse_wrapped( 5723 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5724 ) 5725 5726 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5727 wrapped = self._match(TokenType.L_PAREN) 5728 if not wrapped and not optional: 5729 self.raise_error("Expecting (") 5730 parse_result = parse_method() 5731 if wrapped: 5732 self._match_r_paren() 5733 return parse_result 5734 5735 def _parse_expressions(self) -> t.List[exp.Expression]: 5736 return self._parse_csv(self._parse_expression) 5737 5738 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5739 return self._parse_select() or self._parse_set_operations( 5740 self._parse_expression() if alias else self._parse_conjunction() 5741 ) 5742 5743 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5744 return self._parse_query_modifiers( 5745 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5746 ) 5747 5748 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5749 this = None 5750 if self._match_texts(self.TRANSACTION_KIND): 5751 this = self._prev.text 5752 5753 self._match_texts(("TRANSACTION", "WORK")) 5754 5755 modes = [] 5756 while True: 5757 mode = [] 5758 while self._match(TokenType.VAR): 5759 mode.append(self._prev.text) 5760 5761 if mode: 5762 modes.append(" ".join(mode)) 5763 if not self._match(TokenType.COMMA): 5764 break 5765 5766 return self.expression(exp.Transaction, this=this, modes=modes) 5767 5768 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5769 chain = None 5770 savepoint = None 5771 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5772 5773 self._match_texts(("TRANSACTION", "WORK")) 5774 5775 if self._match_text_seq("TO"): 5776 self._match_text_seq("SAVEPOINT") 5777 savepoint = self._parse_id_var() 5778 5779 if self._match(TokenType.AND): 5780 chain = not self._match_text_seq("NO") 5781 self._match_text_seq("CHAIN") 5782 5783 if is_rollback: 5784 return self.expression(exp.Rollback, savepoint=savepoint) 5785 5786 return self.expression(exp.Commit, chain=chain) 5787 5788 def _parse_refresh(self) -> exp.Refresh: 5789 self._match(TokenType.TABLE) 5790 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5791 5792 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5793 if not self._match_text_seq("ADD"): 5794 return None 5795 5796 self._match(TokenType.COLUMN) 5797 exists_column = self._parse_exists(not_=True) 5798 expression = self._parse_field_def() 5799 5800 if expression: 5801 expression.set("exists", exists_column) 5802 5803 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5804 if self._match_texts(("FIRST", "AFTER")): 5805 position = self._prev.text 5806 column_position = self.expression( 5807 exp.ColumnPosition, this=self._parse_column(), position=position 5808 ) 5809 expression.set("position", column_position) 5810 5811 return expression 5812 5813 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5814 drop = self._match(TokenType.DROP) and self._parse_drop() 5815 if drop and not isinstance(drop, exp.Command): 5816 drop.set("kind", drop.args.get("kind", "COLUMN")) 5817 return drop 5818 5819 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5820 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5821 return self.expression( 5822 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5823 ) 5824 5825 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5826 index = self._index - 1 5827 5828 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5829 return self._parse_csv( 5830 lambda: self.expression( 5831 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 5832 ) 5833 ) 5834 5835 self._retreat(index) 5836 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5837 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5838 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5839 5840 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 5841 if self._match_texts(self.ALTER_ALTER_PARSERS): 5842 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 5843 5844 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 5845 # keyword after ALTER we default to parsing this statement 5846 self._match(TokenType.COLUMN) 5847 column = self._parse_field(any_token=True) 5848 5849 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5850 return self.expression(exp.AlterColumn, this=column, drop=True) 5851 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5852 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5853 if self._match(TokenType.COMMENT): 5854 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5855 5856 self._match_text_seq("SET", "DATA") 5857 self._match_text_seq("TYPE") 5858 return self.expression( 5859 exp.AlterColumn, 5860 this=column, 5861 dtype=self._parse_types(), 5862 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5863 using=self._match(TokenType.USING) and self._parse_conjunction(), 5864 ) 5865 5866 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 5867 if self._match_texts(("ALL", "EVEN", "AUTO")): 5868 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 5869 5870 self._match_text_seq("KEY", "DISTKEY") 5871 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 5872 5873 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 5874 if compound: 5875 self._match_text_seq("SORTKEY") 5876 5877 if self._match(TokenType.L_PAREN, advance=False): 5878 return self.expression( 5879 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 5880 ) 5881 5882 self._match_texts(("AUTO", "NONE")) 5883 return self.expression( 5884 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 5885 ) 5886 5887 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5888 index = self._index - 1 5889 5890 partition_exists = self._parse_exists() 5891 if self._match(TokenType.PARTITION, advance=False): 5892 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5893 5894 self._retreat(index) 5895 return self._parse_csv(self._parse_drop_column) 5896 5897 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5898 if self._match(TokenType.COLUMN): 5899 exists = self._parse_exists() 5900 old_column = self._parse_column() 5901 to = self._match_text_seq("TO") 5902 new_column = self._parse_column() 5903 5904 if old_column is None or to is None or new_column is None: 5905 return None 5906 5907 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5908 5909 self._match_text_seq("TO") 5910 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5911 5912 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5913 start = self._prev 5914 5915 if not self._match(TokenType.TABLE): 5916 return self._parse_as_command(start) 5917 5918 exists = self._parse_exists() 5919 only = self._match_text_seq("ONLY") 5920 this = self._parse_table(schema=True) 5921 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 5922 5923 if self._next: 5924 self._advance() 5925 5926 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5927 if parser: 5928 actions = ensure_list(parser(self)) 5929 options = self._parse_csv(self._parse_property) 5930 5931 if not self._curr and actions: 5932 return self.expression( 5933 exp.AlterTable, 5934 this=this, 5935 exists=exists, 5936 actions=actions, 5937 only=only, 5938 options=options, 5939 cluster=cluster, 5940 ) 5941 5942 return self._parse_as_command(start) 5943 5944 def _parse_merge(self) -> exp.Merge: 5945 self._match(TokenType.INTO) 5946 target = self._parse_table() 5947 5948 if target and self._match(TokenType.ALIAS, advance=False): 5949 target.set("alias", self._parse_table_alias()) 5950 5951 self._match(TokenType.USING) 5952 using = self._parse_table() 5953 5954 self._match(TokenType.ON) 5955 on = self._parse_conjunction() 5956 5957 return self.expression( 5958 exp.Merge, 5959 this=target, 5960 using=using, 5961 on=on, 5962 expressions=self._parse_when_matched(), 5963 ) 5964 5965 def _parse_when_matched(self) -> t.List[exp.When]: 5966 whens = [] 5967 5968 while self._match(TokenType.WHEN): 5969 matched = not self._match(TokenType.NOT) 5970 self._match_text_seq("MATCHED") 5971 source = ( 5972 False 5973 if self._match_text_seq("BY", "TARGET") 5974 else self._match_text_seq("BY", "SOURCE") 5975 ) 5976 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5977 5978 self._match(TokenType.THEN) 5979 5980 if self._match(TokenType.INSERT): 5981 _this = self._parse_star() 5982 if _this: 5983 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5984 else: 5985 then = self.expression( 5986 exp.Insert, 5987 this=self._parse_value(), 5988 expression=self._match_text_seq("VALUES") and self._parse_value(), 5989 ) 5990 elif self._match(TokenType.UPDATE): 5991 expressions = self._parse_star() 5992 if expressions: 5993 then = self.expression(exp.Update, expressions=expressions) 5994 else: 5995 then = self.expression( 5996 exp.Update, 5997 expressions=self._match(TokenType.SET) 5998 and self._parse_csv(self._parse_equality), 5999 ) 6000 elif self._match(TokenType.DELETE): 6001 then = self.expression(exp.Var, this=self._prev.text) 6002 else: 6003 then = None 6004 6005 whens.append( 6006 self.expression( 6007 exp.When, 6008 matched=matched, 6009 source=source, 6010 condition=condition, 6011 then=then, 6012 ) 6013 ) 6014 return whens 6015 6016 def _parse_show(self) -> t.Optional[exp.Expression]: 6017 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6018 if parser: 6019 return parser(self) 6020 return self._parse_as_command(self._prev) 6021 6022 def _parse_set_item_assignment( 6023 self, kind: t.Optional[str] = None 6024 ) -> t.Optional[exp.Expression]: 6025 index = self._index 6026 6027 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6028 return self._parse_set_transaction(global_=kind == "GLOBAL") 6029 6030 left = self._parse_primary() or self._parse_column() 6031 assignment_delimiter = self._match_texts(("=", "TO")) 6032 6033 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6034 self._retreat(index) 6035 return None 6036 6037 right = self._parse_statement() or self._parse_id_var() 6038 this = self.expression(exp.EQ, this=left, expression=right) 6039 6040 return self.expression(exp.SetItem, this=this, kind=kind) 6041 6042 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6043 self._match_text_seq("TRANSACTION") 6044 characteristics = self._parse_csv( 6045 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6046 ) 6047 return self.expression( 6048 exp.SetItem, 6049 expressions=characteristics, 6050 kind="TRANSACTION", 6051 **{"global": global_}, # type: ignore 6052 ) 6053 6054 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6055 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6056 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6057 6058 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6059 index = self._index 6060 set_ = self.expression( 6061 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6062 ) 6063 6064 if self._curr: 6065 self._retreat(index) 6066 return self._parse_as_command(self._prev) 6067 6068 return set_ 6069 6070 def _parse_var_from_options( 6071 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6072 ) -> t.Optional[exp.Var]: 6073 start = self._curr 6074 if not start: 6075 return None 6076 6077 option = start.text.upper() 6078 continuations = options.get(option) 6079 6080 index = self._index 6081 self._advance() 6082 for keywords in continuations or []: 6083 if isinstance(keywords, str): 6084 keywords = (keywords,) 6085 6086 if self._match_text_seq(*keywords): 6087 option = f"{option} {' '.join(keywords)}" 6088 break 6089 else: 6090 if continuations or continuations is None: 6091 if raise_unmatched: 6092 self.raise_error(f"Unknown option {option}") 6093 6094 self._retreat(index) 6095 return None 6096 6097 return exp.var(option) 6098 6099 def _parse_as_command(self, start: Token) -> exp.Command: 6100 while self._curr: 6101 self._advance() 6102 text = self._find_sql(start, self._prev) 6103 size = len(start.text) 6104 self._warn_unsupported() 6105 return exp.Command(this=text[:size], expression=text[size:]) 6106 6107 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6108 settings = [] 6109 6110 self._match_l_paren() 6111 kind = self._parse_id_var() 6112 6113 if self._match(TokenType.L_PAREN): 6114 while True: 6115 key = self._parse_id_var() 6116 value = self._parse_primary() 6117 6118 if not key and value is None: 6119 break 6120 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6121 self._match(TokenType.R_PAREN) 6122 6123 self._match_r_paren() 6124 6125 return self.expression( 6126 exp.DictProperty, 6127 this=this, 6128 kind=kind.this if kind else None, 6129 settings=settings, 6130 ) 6131 6132 def _parse_dict_range(self, this: str) -> exp.DictRange: 6133 self._match_l_paren() 6134 has_min = self._match_text_seq("MIN") 6135 if has_min: 6136 min = self._parse_var() or self._parse_primary() 6137 self._match_text_seq("MAX") 6138 max = self._parse_var() or self._parse_primary() 6139 else: 6140 max = self._parse_var() or self._parse_primary() 6141 min = exp.Literal.number(0) 6142 self._match_r_paren() 6143 return self.expression(exp.DictRange, this=this, min=min, max=max) 6144 6145 def _parse_comprehension( 6146 self, this: t.Optional[exp.Expression] 6147 ) -> t.Optional[exp.Comprehension]: 6148 index = self._index 6149 expression = self._parse_column() 6150 if not self._match(TokenType.IN): 6151 self._retreat(index - 1) 6152 return None 6153 iterator = self._parse_column() 6154 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 6155 return self.expression( 6156 exp.Comprehension, 6157 this=this, 6158 expression=expression, 6159 iterator=iterator, 6160 condition=condition, 6161 ) 6162 6163 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6164 if self._match(TokenType.HEREDOC_STRING): 6165 return self.expression(exp.Heredoc, this=self._prev.text) 6166 6167 if not self._match_text_seq("$"): 6168 return None 6169 6170 tags = ["$"] 6171 tag_text = None 6172 6173 if self._is_connected(): 6174 self._advance() 6175 tags.append(self._prev.text.upper()) 6176 else: 6177 self.raise_error("No closing $ found") 6178 6179 if tags[-1] != "$": 6180 if self._is_connected() and self._match_text_seq("$"): 6181 tag_text = tags[-1] 6182 tags.append("$") 6183 else: 6184 self.raise_error("No closing $ found") 6185 6186 heredoc_start = self._curr 6187 6188 while self._curr: 6189 if self._match_text_seq(*tags, advance=False): 6190 this = self._find_sql(heredoc_start, self._prev) 6191 self._advance(len(tags)) 6192 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6193 6194 self._advance() 6195 6196 self.raise_error(f"No closing {''.join(tags)} found") 6197 return None 6198 6199 def _find_parser( 6200 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6201 ) -> t.Optional[t.Callable]: 6202 if not self._curr: 6203 return None 6204 6205 index = self._index 6206 this = [] 6207 while True: 6208 # The current token might be multiple words 6209 curr = self._curr.text.upper() 6210 key = curr.split(" ") 6211 this.append(curr) 6212 6213 self._advance() 6214 result, trie = in_trie(trie, key) 6215 if result == TrieResult.FAILED: 6216 break 6217 6218 if result == TrieResult.EXISTS: 6219 subparser = parsers[" ".join(this)] 6220 return subparser 6221 6222 self._retreat(index) 6223 return None 6224 6225 def _match(self, token_type, advance=True, expression=None): 6226 if not self._curr: 6227 return None 6228 6229 if self._curr.token_type == token_type: 6230 if advance: 6231 self._advance() 6232 self._add_comments(expression) 6233 return True 6234 6235 return None 6236 6237 def _match_set(self, types, advance=True): 6238 if not self._curr: 6239 return None 6240 6241 if self._curr.token_type in types: 6242 if advance: 6243 self._advance() 6244 return True 6245 6246 return None 6247 6248 def _match_pair(self, token_type_a, token_type_b, advance=True): 6249 if not self._curr or not self._next: 6250 return None 6251 6252 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6253 if advance: 6254 self._advance(2) 6255 return True 6256 6257 return None 6258 6259 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6260 if not self._match(TokenType.L_PAREN, expression=expression): 6261 self.raise_error("Expecting (") 6262 6263 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6264 if not self._match(TokenType.R_PAREN, expression=expression): 6265 self.raise_error("Expecting )") 6266 6267 def _match_texts(self, texts, advance=True): 6268 if self._curr and self._curr.text.upper() in texts: 6269 if advance: 6270 self._advance() 6271 return True 6272 return None 6273 6274 def _match_text_seq(self, *texts, advance=True): 6275 index = self._index 6276 for text in texts: 6277 if self._curr and self._curr.text.upper() == text: 6278 self._advance() 6279 else: 6280 self._retreat(index) 6281 return None 6282 6283 if not advance: 6284 self._retreat(index) 6285 6286 return True 6287 6288 def _replace_lambda( 6289 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 6290 ) -> t.Optional[exp.Expression]: 6291 if not node: 6292 return node 6293 6294 for column in node.find_all(exp.Column): 6295 if column.parts[0].name in lambda_variables: 6296 dot_or_id = column.to_dot() if column.table else column.this 6297 parent = column.parent 6298 6299 while isinstance(parent, exp.Dot): 6300 if not isinstance(parent.parent, exp.Dot): 6301 parent.replace(dot_or_id) 6302 break 6303 parent = parent.parent 6304 else: 6305 if column is node: 6306 node = dot_or_id 6307 else: 6308 column.replace(dot_or_id) 6309 return node 6310 6311 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6312 start = self._prev 6313 6314 # Not to be confused with TRUNCATE(number, decimals) function call 6315 if self._match(TokenType.L_PAREN): 6316 self._retreat(self._index - 2) 6317 return self._parse_function() 6318 6319 # Clickhouse supports TRUNCATE DATABASE as well 6320 is_database = self._match(TokenType.DATABASE) 6321 6322 self._match(TokenType.TABLE) 6323 6324 exists = self._parse_exists(not_=False) 6325 6326 expressions = self._parse_csv( 6327 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6328 ) 6329 6330 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6331 6332 if self._match_text_seq("RESTART", "IDENTITY"): 6333 identity = "RESTART" 6334 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6335 identity = "CONTINUE" 6336 else: 6337 identity = None 6338 6339 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6340 option = self._prev.text 6341 else: 6342 option = None 6343 6344 partition = self._parse_partition() 6345 6346 # Fallback case 6347 if self._curr: 6348 return self._parse_as_command(start) 6349 6350 return self.expression( 6351 exp.TruncateTable, 6352 expressions=expressions, 6353 is_database=is_database, 6354 exists=exists, 6355 cluster=cluster, 6356 identity=identity, 6357 option=option, 6358 partition=partition, 6359 ) 6360 6361 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6362 this = self._parse_ordered(self._parse_opclass) 6363 6364 if not self._match(TokenType.WITH): 6365 return this 6366 6367 op = self._parse_var(any_token=True) 6368 6369 return self.expression(exp.WithOperator, this=this, op=op) 6370 6371 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6372 opts = [] 6373 self._match(TokenType.EQ) 6374 self._match(TokenType.L_PAREN) 6375 while self._curr and not self._match(TokenType.R_PAREN): 6376 opts.append(self._parse_conjunction()) 6377 self._match(TokenType.COMMA) 6378 return opts 6379 6380 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6381 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6382 6383 options = [] 6384 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6385 option = self._parse_unquoted_field() 6386 value = None 6387 6388 # Some options are defined as functions with the values as params 6389 if not isinstance(option, exp.Func): 6390 prev = self._prev.text.upper() 6391 # Different dialects might separate options and values by white space, "=" and "AS" 6392 self._match(TokenType.EQ) 6393 self._match(TokenType.ALIAS) 6394 6395 if prev == "FILE_FORMAT" and self._match(TokenType.L_PAREN): 6396 # Snowflake FILE_FORMAT case 6397 value = self._parse_wrapped_options() 6398 else: 6399 value = self._parse_unquoted_field() 6400 6401 param = self.expression(exp.CopyParameter, this=option, expression=value) 6402 options.append(param) 6403 6404 if sep: 6405 self._match(sep) 6406 6407 return options 6408 6409 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6410 expr = self.expression(exp.Credentials) 6411 6412 if self._match_text_seq("STORAGE_INTEGRATION", advance=False): 6413 expr.set("storage", self._parse_conjunction()) 6414 if self._match_text_seq("CREDENTIALS"): 6415 # Snowflake supports CREDENTIALS = (...), while Redshift CREDENTIALS <string> 6416 creds = ( 6417 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6418 ) 6419 expr.set("credentials", creds) 6420 if self._match_text_seq("ENCRYPTION"): 6421 expr.set("encryption", self._parse_wrapped_options()) 6422 if self._match_text_seq("IAM_ROLE"): 6423 expr.set("iam_role", self._parse_field()) 6424 if self._match_text_seq("REGION"): 6425 expr.set("region", self._parse_field()) 6426 6427 return expr 6428 6429 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6430 return self._parse_field() 6431 6432 def _parse_copy(self) -> exp.Copy | exp.Command: 6433 start = self._prev 6434 6435 self._match(TokenType.INTO) 6436 6437 this = ( 6438 self._parse_conjunction() 6439 if self._match(TokenType.L_PAREN, advance=False) 6440 else self._parse_table(schema=True) 6441 ) 6442 6443 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6444 6445 files = self._parse_csv(self._parse_file_location) 6446 credentials = self._parse_credentials() 6447 6448 self._match_text_seq("WITH") 6449 6450 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6451 6452 # Fallback case 6453 if self._curr: 6454 return self._parse_as_command(start) 6455 6456 return self.expression( 6457 exp.Copy, 6458 this=this, 6459 kind=kind, 6460 credentials=credentials, 6461 files=files, 6462 params=params, 6463 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
52def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 53 # Default argument order is base, expression 54 this = seq_get(args, 0) 55 expression = seq_get(args, 1) 56 57 if expression: 58 if not dialect.LOG_BASE_FIRST: 59 this, expression = expression, this 60 return exp.Log(this=this, expression=expression) 61 62 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
65def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 66 def _builder(args: t.List, dialect: Dialect) -> E: 67 expression = expr_type( 68 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 69 ) 70 if len(args) > 2 and expr_type is exp.JSONExtract: 71 expression.set("expressions", args[2:]) 72 73 return expression 74 75 return _builder
78def build_mod(args: t.List) -> exp.Mod: 79 this = seq_get(args, 0) 80 expression = seq_get(args, 1) 81 82 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 83 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 84 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 85 86 return exp.Mod(this=this, expression=expression)
99class Parser(metaclass=_Parser): 100 """ 101 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 102 103 Args: 104 error_level: The desired error level. 105 Default: ErrorLevel.IMMEDIATE 106 error_message_context: The amount of context to capture from a query string when displaying 107 the error message (in number of characters). 108 Default: 100 109 max_errors: Maximum number of error messages to include in a raised ParseError. 110 This is only relevant if error_level is ErrorLevel.RAISE. 111 Default: 3 112 """ 113 114 FUNCTIONS: t.Dict[str, t.Callable] = { 115 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 116 "CONCAT": lambda args, dialect: exp.Concat( 117 expressions=args, 118 safe=not dialect.STRICT_STRING_CONCAT, 119 coalesce=dialect.CONCAT_COALESCE, 120 ), 121 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 122 expressions=args, 123 safe=not dialect.STRICT_STRING_CONCAT, 124 coalesce=dialect.CONCAT_COALESCE, 125 ), 126 "DATE_TO_DATE_STR": lambda args: exp.Cast( 127 this=seq_get(args, 0), 128 to=exp.DataType(this=exp.DataType.Type.TEXT), 129 ), 130 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 131 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 132 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 133 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 134 "LIKE": build_like, 135 "LOG": build_logarithm, 136 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 137 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 138 "MOD": build_mod, 139 "TIME_TO_TIME_STR": lambda args: exp.Cast( 140 this=seq_get(args, 0), 141 to=exp.DataType(this=exp.DataType.Type.TEXT), 142 ), 143 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 144 this=exp.Cast( 145 this=seq_get(args, 0), 146 to=exp.DataType(this=exp.DataType.Type.TEXT), 147 ), 148 start=exp.Literal.number(1), 149 length=exp.Literal.number(10), 150 ), 151 "VAR_MAP": build_var_map, 152 } 153 154 NO_PAREN_FUNCTIONS = { 155 TokenType.CURRENT_DATE: exp.CurrentDate, 156 TokenType.CURRENT_DATETIME: exp.CurrentDate, 157 TokenType.CURRENT_TIME: exp.CurrentTime, 158 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 159 TokenType.CURRENT_USER: exp.CurrentUser, 160 } 161 162 STRUCT_TYPE_TOKENS = { 163 TokenType.NESTED, 164 TokenType.OBJECT, 165 TokenType.STRUCT, 166 } 167 168 NESTED_TYPE_TOKENS = { 169 TokenType.ARRAY, 170 TokenType.LOWCARDINALITY, 171 TokenType.MAP, 172 TokenType.NULLABLE, 173 *STRUCT_TYPE_TOKENS, 174 } 175 176 ENUM_TYPE_TOKENS = { 177 TokenType.ENUM, 178 TokenType.ENUM8, 179 TokenType.ENUM16, 180 } 181 182 AGGREGATE_TYPE_TOKENS = { 183 TokenType.AGGREGATEFUNCTION, 184 TokenType.SIMPLEAGGREGATEFUNCTION, 185 } 186 187 TYPE_TOKENS = { 188 TokenType.BIT, 189 TokenType.BOOLEAN, 190 TokenType.TINYINT, 191 TokenType.UTINYINT, 192 TokenType.SMALLINT, 193 TokenType.USMALLINT, 194 TokenType.INT, 195 TokenType.UINT, 196 TokenType.BIGINT, 197 TokenType.UBIGINT, 198 TokenType.INT128, 199 TokenType.UINT128, 200 TokenType.INT256, 201 TokenType.UINT256, 202 TokenType.MEDIUMINT, 203 TokenType.UMEDIUMINT, 204 TokenType.FIXEDSTRING, 205 TokenType.FLOAT, 206 TokenType.DOUBLE, 207 TokenType.CHAR, 208 TokenType.NCHAR, 209 TokenType.VARCHAR, 210 TokenType.NVARCHAR, 211 TokenType.BPCHAR, 212 TokenType.TEXT, 213 TokenType.MEDIUMTEXT, 214 TokenType.LONGTEXT, 215 TokenType.MEDIUMBLOB, 216 TokenType.LONGBLOB, 217 TokenType.BINARY, 218 TokenType.VARBINARY, 219 TokenType.JSON, 220 TokenType.JSONB, 221 TokenType.INTERVAL, 222 TokenType.TINYBLOB, 223 TokenType.TINYTEXT, 224 TokenType.TIME, 225 TokenType.TIMETZ, 226 TokenType.TIMESTAMP, 227 TokenType.TIMESTAMP_S, 228 TokenType.TIMESTAMP_MS, 229 TokenType.TIMESTAMP_NS, 230 TokenType.TIMESTAMPTZ, 231 TokenType.TIMESTAMPLTZ, 232 TokenType.TIMESTAMPNTZ, 233 TokenType.DATETIME, 234 TokenType.DATETIME64, 235 TokenType.DATE, 236 TokenType.DATE32, 237 TokenType.INT4RANGE, 238 TokenType.INT4MULTIRANGE, 239 TokenType.INT8RANGE, 240 TokenType.INT8MULTIRANGE, 241 TokenType.NUMRANGE, 242 TokenType.NUMMULTIRANGE, 243 TokenType.TSRANGE, 244 TokenType.TSMULTIRANGE, 245 TokenType.TSTZRANGE, 246 TokenType.TSTZMULTIRANGE, 247 TokenType.DATERANGE, 248 TokenType.DATEMULTIRANGE, 249 TokenType.DECIMAL, 250 TokenType.UDECIMAL, 251 TokenType.BIGDECIMAL, 252 TokenType.UUID, 253 TokenType.GEOGRAPHY, 254 TokenType.GEOMETRY, 255 TokenType.HLLSKETCH, 256 TokenType.HSTORE, 257 TokenType.PSEUDO_TYPE, 258 TokenType.SUPER, 259 TokenType.SERIAL, 260 TokenType.SMALLSERIAL, 261 TokenType.BIGSERIAL, 262 TokenType.XML, 263 TokenType.YEAR, 264 TokenType.UNIQUEIDENTIFIER, 265 TokenType.USERDEFINED, 266 TokenType.MONEY, 267 TokenType.SMALLMONEY, 268 TokenType.ROWVERSION, 269 TokenType.IMAGE, 270 TokenType.VARIANT, 271 TokenType.OBJECT, 272 TokenType.OBJECT_IDENTIFIER, 273 TokenType.INET, 274 TokenType.IPADDRESS, 275 TokenType.IPPREFIX, 276 TokenType.IPV4, 277 TokenType.IPV6, 278 TokenType.UNKNOWN, 279 TokenType.NULL, 280 TokenType.NAME, 281 TokenType.TDIGEST, 282 *ENUM_TYPE_TOKENS, 283 *NESTED_TYPE_TOKENS, 284 *AGGREGATE_TYPE_TOKENS, 285 } 286 287 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 288 TokenType.BIGINT: TokenType.UBIGINT, 289 TokenType.INT: TokenType.UINT, 290 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 291 TokenType.SMALLINT: TokenType.USMALLINT, 292 TokenType.TINYINT: TokenType.UTINYINT, 293 TokenType.DECIMAL: TokenType.UDECIMAL, 294 } 295 296 SUBQUERY_PREDICATES = { 297 TokenType.ANY: exp.Any, 298 TokenType.ALL: exp.All, 299 TokenType.EXISTS: exp.Exists, 300 TokenType.SOME: exp.Any, 301 } 302 303 RESERVED_TOKENS = { 304 *Tokenizer.SINGLE_TOKENS.values(), 305 TokenType.SELECT, 306 } - {TokenType.IDENTIFIER} 307 308 DB_CREATABLES = { 309 TokenType.DATABASE, 310 TokenType.SCHEMA, 311 TokenType.TABLE, 312 TokenType.VIEW, 313 TokenType.MODEL, 314 TokenType.DICTIONARY, 315 TokenType.SEQUENCE, 316 TokenType.STORAGE_INTEGRATION, 317 } 318 319 CREATABLES = { 320 TokenType.COLUMN, 321 TokenType.CONSTRAINT, 322 TokenType.FUNCTION, 323 TokenType.INDEX, 324 TokenType.PROCEDURE, 325 TokenType.FOREIGN_KEY, 326 *DB_CREATABLES, 327 } 328 329 # Tokens that can represent identifiers 330 ID_VAR_TOKENS = { 331 TokenType.VAR, 332 TokenType.ANTI, 333 TokenType.APPLY, 334 TokenType.ASC, 335 TokenType.ASOF, 336 TokenType.AUTO_INCREMENT, 337 TokenType.BEGIN, 338 TokenType.BPCHAR, 339 TokenType.CACHE, 340 TokenType.CASE, 341 TokenType.COLLATE, 342 TokenType.COMMAND, 343 TokenType.COMMENT, 344 TokenType.COMMIT, 345 TokenType.CONSTRAINT, 346 TokenType.COPY, 347 TokenType.DEFAULT, 348 TokenType.DELETE, 349 TokenType.DESC, 350 TokenType.DESCRIBE, 351 TokenType.DICTIONARY, 352 TokenType.DIV, 353 TokenType.END, 354 TokenType.EXECUTE, 355 TokenType.ESCAPE, 356 TokenType.FALSE, 357 TokenType.FIRST, 358 TokenType.FILTER, 359 TokenType.FINAL, 360 TokenType.FORMAT, 361 TokenType.FULL, 362 TokenType.IDENTIFIER, 363 TokenType.IS, 364 TokenType.ISNULL, 365 TokenType.INTERVAL, 366 TokenType.KEEP, 367 TokenType.KILL, 368 TokenType.LEFT, 369 TokenType.LOAD, 370 TokenType.MERGE, 371 TokenType.NATURAL, 372 TokenType.NEXT, 373 TokenType.OFFSET, 374 TokenType.OPERATOR, 375 TokenType.ORDINALITY, 376 TokenType.OVERLAPS, 377 TokenType.OVERWRITE, 378 TokenType.PARTITION, 379 TokenType.PERCENT, 380 TokenType.PIVOT, 381 TokenType.PRAGMA, 382 TokenType.RANGE, 383 TokenType.RECURSIVE, 384 TokenType.REFERENCES, 385 TokenType.REFRESH, 386 TokenType.REPLACE, 387 TokenType.RIGHT, 388 TokenType.ROW, 389 TokenType.ROWS, 390 TokenType.SEMI, 391 TokenType.SET, 392 TokenType.SETTINGS, 393 TokenType.SHOW, 394 TokenType.TEMPORARY, 395 TokenType.TOP, 396 TokenType.TRUE, 397 TokenType.TRUNCATE, 398 TokenType.UNIQUE, 399 TokenType.UNPIVOT, 400 TokenType.UPDATE, 401 TokenType.USE, 402 TokenType.VOLATILE, 403 TokenType.WINDOW, 404 *CREATABLES, 405 *SUBQUERY_PREDICATES, 406 *TYPE_TOKENS, 407 *NO_PAREN_FUNCTIONS, 408 } 409 410 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 411 412 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 413 TokenType.ANTI, 414 TokenType.APPLY, 415 TokenType.ASOF, 416 TokenType.FULL, 417 TokenType.LEFT, 418 TokenType.LOCK, 419 TokenType.NATURAL, 420 TokenType.OFFSET, 421 TokenType.RIGHT, 422 TokenType.SEMI, 423 TokenType.WINDOW, 424 } 425 426 ALIAS_TOKENS = ID_VAR_TOKENS 427 428 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 429 430 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 431 432 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 433 434 FUNC_TOKENS = { 435 TokenType.COLLATE, 436 TokenType.COMMAND, 437 TokenType.CURRENT_DATE, 438 TokenType.CURRENT_DATETIME, 439 TokenType.CURRENT_TIMESTAMP, 440 TokenType.CURRENT_TIME, 441 TokenType.CURRENT_USER, 442 TokenType.FILTER, 443 TokenType.FIRST, 444 TokenType.FORMAT, 445 TokenType.GLOB, 446 TokenType.IDENTIFIER, 447 TokenType.INDEX, 448 TokenType.ISNULL, 449 TokenType.ILIKE, 450 TokenType.INSERT, 451 TokenType.LIKE, 452 TokenType.MERGE, 453 TokenType.OFFSET, 454 TokenType.PRIMARY_KEY, 455 TokenType.RANGE, 456 TokenType.REPLACE, 457 TokenType.RLIKE, 458 TokenType.ROW, 459 TokenType.UNNEST, 460 TokenType.VAR, 461 TokenType.LEFT, 462 TokenType.RIGHT, 463 TokenType.SEQUENCE, 464 TokenType.DATE, 465 TokenType.DATETIME, 466 TokenType.TABLE, 467 TokenType.TIMESTAMP, 468 TokenType.TIMESTAMPTZ, 469 TokenType.TRUNCATE, 470 TokenType.WINDOW, 471 TokenType.XOR, 472 *TYPE_TOKENS, 473 *SUBQUERY_PREDICATES, 474 } 475 476 CONJUNCTION = { 477 TokenType.AND: exp.And, 478 TokenType.OR: exp.Or, 479 } 480 481 EQUALITY = { 482 TokenType.COLON_EQ: exp.PropertyEQ, 483 TokenType.EQ: exp.EQ, 484 TokenType.NEQ: exp.NEQ, 485 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 486 } 487 488 COMPARISON = { 489 TokenType.GT: exp.GT, 490 TokenType.GTE: exp.GTE, 491 TokenType.LT: exp.LT, 492 TokenType.LTE: exp.LTE, 493 } 494 495 BITWISE = { 496 TokenType.AMP: exp.BitwiseAnd, 497 TokenType.CARET: exp.BitwiseXor, 498 TokenType.PIPE: exp.BitwiseOr, 499 } 500 501 TERM = { 502 TokenType.DASH: exp.Sub, 503 TokenType.PLUS: exp.Add, 504 TokenType.MOD: exp.Mod, 505 TokenType.COLLATE: exp.Collate, 506 } 507 508 FACTOR = { 509 TokenType.DIV: exp.IntDiv, 510 TokenType.LR_ARROW: exp.Distance, 511 TokenType.SLASH: exp.Div, 512 TokenType.STAR: exp.Mul, 513 } 514 515 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 516 517 TIMES = { 518 TokenType.TIME, 519 TokenType.TIMETZ, 520 } 521 522 TIMESTAMPS = { 523 TokenType.TIMESTAMP, 524 TokenType.TIMESTAMPTZ, 525 TokenType.TIMESTAMPLTZ, 526 *TIMES, 527 } 528 529 SET_OPERATIONS = { 530 TokenType.UNION, 531 TokenType.INTERSECT, 532 TokenType.EXCEPT, 533 } 534 535 JOIN_METHODS = { 536 TokenType.ASOF, 537 TokenType.NATURAL, 538 TokenType.POSITIONAL, 539 } 540 541 JOIN_SIDES = { 542 TokenType.LEFT, 543 TokenType.RIGHT, 544 TokenType.FULL, 545 } 546 547 JOIN_KINDS = { 548 TokenType.INNER, 549 TokenType.OUTER, 550 TokenType.CROSS, 551 TokenType.SEMI, 552 TokenType.ANTI, 553 } 554 555 JOIN_HINTS: t.Set[str] = set() 556 557 LAMBDAS = { 558 TokenType.ARROW: lambda self, expressions: self.expression( 559 exp.Lambda, 560 this=self._replace_lambda( 561 self._parse_conjunction(), 562 {node.name for node in expressions}, 563 ), 564 expressions=expressions, 565 ), 566 TokenType.FARROW: lambda self, expressions: self.expression( 567 exp.Kwarg, 568 this=exp.var(expressions[0].name), 569 expression=self._parse_conjunction(), 570 ), 571 } 572 573 COLUMN_OPERATORS = { 574 TokenType.DOT: None, 575 TokenType.DCOLON: lambda self, this, to: self.expression( 576 exp.Cast if self.STRICT_CAST else exp.TryCast, 577 this=this, 578 to=to, 579 ), 580 TokenType.ARROW: lambda self, this, path: self.expression( 581 exp.JSONExtract, 582 this=this, 583 expression=self.dialect.to_json_path(path), 584 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 585 ), 586 TokenType.DARROW: lambda self, this, path: self.expression( 587 exp.JSONExtractScalar, 588 this=this, 589 expression=self.dialect.to_json_path(path), 590 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 591 ), 592 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 593 exp.JSONBExtract, 594 this=this, 595 expression=path, 596 ), 597 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 598 exp.JSONBExtractScalar, 599 this=this, 600 expression=path, 601 ), 602 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 603 exp.JSONBContains, 604 this=this, 605 expression=key, 606 ), 607 } 608 609 EXPRESSION_PARSERS = { 610 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 611 exp.Column: lambda self: self._parse_column(), 612 exp.Condition: lambda self: self._parse_conjunction(), 613 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 614 exp.Expression: lambda self: self._parse_expression(), 615 exp.From: lambda self: self._parse_from(joins=True), 616 exp.Group: lambda self: self._parse_group(), 617 exp.Having: lambda self: self._parse_having(), 618 exp.Identifier: lambda self: self._parse_id_var(), 619 exp.Join: lambda self: self._parse_join(), 620 exp.Lambda: lambda self: self._parse_lambda(), 621 exp.Lateral: lambda self: self._parse_lateral(), 622 exp.Limit: lambda self: self._parse_limit(), 623 exp.Offset: lambda self: self._parse_offset(), 624 exp.Order: lambda self: self._parse_order(), 625 exp.Ordered: lambda self: self._parse_ordered(), 626 exp.Properties: lambda self: self._parse_properties(), 627 exp.Qualify: lambda self: self._parse_qualify(), 628 exp.Returning: lambda self: self._parse_returning(), 629 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 630 exp.Table: lambda self: self._parse_table_parts(), 631 exp.TableAlias: lambda self: self._parse_table_alias(), 632 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 633 exp.Where: lambda self: self._parse_where(), 634 exp.Window: lambda self: self._parse_named_window(), 635 exp.With: lambda self: self._parse_with(), 636 "JOIN_TYPE": lambda self: self._parse_join_parts(), 637 } 638 639 STATEMENT_PARSERS = { 640 TokenType.ALTER: lambda self: self._parse_alter(), 641 TokenType.BEGIN: lambda self: self._parse_transaction(), 642 TokenType.CACHE: lambda self: self._parse_cache(), 643 TokenType.COMMENT: lambda self: self._parse_comment(), 644 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 645 TokenType.COPY: lambda self: self._parse_copy(), 646 TokenType.CREATE: lambda self: self._parse_create(), 647 TokenType.DELETE: lambda self: self._parse_delete(), 648 TokenType.DESC: lambda self: self._parse_describe(), 649 TokenType.DESCRIBE: lambda self: self._parse_describe(), 650 TokenType.DROP: lambda self: self._parse_drop(), 651 TokenType.INSERT: lambda self: self._parse_insert(), 652 TokenType.KILL: lambda self: self._parse_kill(), 653 TokenType.LOAD: lambda self: self._parse_load(), 654 TokenType.MERGE: lambda self: self._parse_merge(), 655 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 656 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 657 TokenType.REFRESH: lambda self: self._parse_refresh(), 658 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 659 TokenType.SET: lambda self: self._parse_set(), 660 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 661 TokenType.UNCACHE: lambda self: self._parse_uncache(), 662 TokenType.UPDATE: lambda self: self._parse_update(), 663 TokenType.USE: lambda self: self.expression( 664 exp.Use, 665 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 666 this=self._parse_table(schema=False), 667 ), 668 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 669 } 670 671 UNARY_PARSERS = { 672 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 673 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 674 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 675 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 676 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 677 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 678 } 679 680 STRING_PARSERS = { 681 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 682 exp.RawString, this=token.text 683 ), 684 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 685 exp.National, this=token.text 686 ), 687 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 688 TokenType.STRING: lambda self, token: self.expression( 689 exp.Literal, this=token.text, is_string=True 690 ), 691 TokenType.UNICODE_STRING: lambda self, token: self.expression( 692 exp.UnicodeString, 693 this=token.text, 694 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 695 ), 696 } 697 698 NUMERIC_PARSERS = { 699 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 700 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 701 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 702 TokenType.NUMBER: lambda self, token: self.expression( 703 exp.Literal, this=token.text, is_string=False 704 ), 705 } 706 707 PRIMARY_PARSERS = { 708 **STRING_PARSERS, 709 **NUMERIC_PARSERS, 710 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 711 TokenType.NULL: lambda self, _: self.expression(exp.Null), 712 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 713 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 714 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 715 TokenType.STAR: lambda self, _: self.expression( 716 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 717 ), 718 } 719 720 PLACEHOLDER_PARSERS = { 721 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 722 TokenType.PARAMETER: lambda self: self._parse_parameter(), 723 TokenType.COLON: lambda self: ( 724 self.expression(exp.Placeholder, this=self._prev.text) 725 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 726 else None 727 ), 728 } 729 730 RANGE_PARSERS = { 731 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 732 TokenType.GLOB: binary_range_parser(exp.Glob), 733 TokenType.ILIKE: binary_range_parser(exp.ILike), 734 TokenType.IN: lambda self, this: self._parse_in(this), 735 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 736 TokenType.IS: lambda self, this: self._parse_is(this), 737 TokenType.LIKE: binary_range_parser(exp.Like), 738 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 739 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 740 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 741 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 742 } 743 744 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 745 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 746 "AUTO": lambda self: self._parse_auto_property(), 747 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 748 "BACKUP": lambda self: self.expression( 749 exp.BackupProperty, this=self._parse_var(any_token=True) 750 ), 751 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 752 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 753 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 754 "CHECKSUM": lambda self: self._parse_checksum(), 755 "CLUSTER BY": lambda self: self._parse_cluster(), 756 "CLUSTERED": lambda self: self._parse_clustered_by(), 757 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 758 exp.CollateProperty, **kwargs 759 ), 760 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 761 "CONTAINS": lambda self: self._parse_contains_property(), 762 "COPY": lambda self: self._parse_copy_property(), 763 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 764 "DEFINER": lambda self: self._parse_definer(), 765 "DETERMINISTIC": lambda self: self.expression( 766 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 767 ), 768 "DISTKEY": lambda self: self._parse_distkey(), 769 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 770 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 771 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 772 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 773 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 774 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 775 "FREESPACE": lambda self: self._parse_freespace(), 776 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 777 "HEAP": lambda self: self.expression(exp.HeapProperty), 778 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 779 "IMMUTABLE": lambda self: self.expression( 780 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 781 ), 782 "INHERITS": lambda self: self.expression( 783 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 784 ), 785 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 786 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 787 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 788 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 789 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 790 "LIKE": lambda self: self._parse_create_like(), 791 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 792 "LOCK": lambda self: self._parse_locking(), 793 "LOCKING": lambda self: self._parse_locking(), 794 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 795 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 796 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 797 "MODIFIES": lambda self: self._parse_modifies_property(), 798 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 799 "NO": lambda self: self._parse_no_property(), 800 "ON": lambda self: self._parse_on_property(), 801 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 802 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 803 "PARTITION": lambda self: self._parse_partitioned_of(), 804 "PARTITION BY": lambda self: self._parse_partitioned_by(), 805 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 806 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 807 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 808 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 809 "READS": lambda self: self._parse_reads_property(), 810 "REMOTE": lambda self: self._parse_remote_with_connection(), 811 "RETURNS": lambda self: self._parse_returns(), 812 "ROW": lambda self: self._parse_row(), 813 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 814 "SAMPLE": lambda self: self.expression( 815 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 816 ), 817 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 818 "SETTINGS": lambda self: self.expression( 819 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 820 ), 821 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 822 "SORTKEY": lambda self: self._parse_sortkey(), 823 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 824 "STABLE": lambda self: self.expression( 825 exp.StabilityProperty, this=exp.Literal.string("STABLE") 826 ), 827 "STORED": lambda self: self._parse_stored(), 828 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 829 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 830 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 831 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 832 "TO": lambda self: self._parse_to_table(), 833 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 834 "TRANSFORM": lambda self: self.expression( 835 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 836 ), 837 "TTL": lambda self: self._parse_ttl(), 838 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 839 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 840 "VOLATILE": lambda self: self._parse_volatile_property(), 841 "WITH": lambda self: self._parse_with_property(), 842 } 843 844 CONSTRAINT_PARSERS = { 845 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 846 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 847 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 848 "CHARACTER SET": lambda self: self.expression( 849 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 850 ), 851 "CHECK": lambda self: self.expression( 852 exp.CheckColumnConstraint, 853 this=self._parse_wrapped(self._parse_conjunction), 854 enforced=self._match_text_seq("ENFORCED"), 855 ), 856 "COLLATE": lambda self: self.expression( 857 exp.CollateColumnConstraint, this=self._parse_var() 858 ), 859 "COMMENT": lambda self: self.expression( 860 exp.CommentColumnConstraint, this=self._parse_string() 861 ), 862 "COMPRESS": lambda self: self._parse_compress(), 863 "CLUSTERED": lambda self: self.expression( 864 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 865 ), 866 "NONCLUSTERED": lambda self: self.expression( 867 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 868 ), 869 "DEFAULT": lambda self: self.expression( 870 exp.DefaultColumnConstraint, this=self._parse_bitwise() 871 ), 872 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 873 "EPHEMERAL": lambda self: self.expression( 874 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 875 ), 876 "EXCLUDE": lambda self: self.expression( 877 exp.ExcludeColumnConstraint, this=self._parse_index_params() 878 ), 879 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 880 "FORMAT": lambda self: self.expression( 881 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 882 ), 883 "GENERATED": lambda self: self._parse_generated_as_identity(), 884 "IDENTITY": lambda self: self._parse_auto_increment(), 885 "INLINE": lambda self: self._parse_inline(), 886 "LIKE": lambda self: self._parse_create_like(), 887 "NOT": lambda self: self._parse_not_constraint(), 888 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 889 "ON": lambda self: ( 890 self._match(TokenType.UPDATE) 891 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 892 ) 893 or self.expression(exp.OnProperty, this=self._parse_id_var()), 894 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 895 "PERIOD": lambda self: self._parse_period_for_system_time(), 896 "PRIMARY KEY": lambda self: self._parse_primary_key(), 897 "REFERENCES": lambda self: self._parse_references(match=False), 898 "TITLE": lambda self: self.expression( 899 exp.TitleColumnConstraint, this=self._parse_var_or_string() 900 ), 901 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 902 "UNIQUE": lambda self: self._parse_unique(), 903 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 904 "WITH": lambda self: self.expression( 905 exp.Properties, expressions=self._parse_wrapped_properties() 906 ), 907 } 908 909 ALTER_PARSERS = { 910 "ADD": lambda self: self._parse_alter_table_add(), 911 "ALTER": lambda self: self._parse_alter_table_alter(), 912 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 913 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 914 "DROP": lambda self: self._parse_alter_table_drop(), 915 "RENAME": lambda self: self._parse_alter_table_rename(), 916 } 917 918 ALTER_ALTER_PARSERS = { 919 "DISTKEY": lambda self: self._parse_alter_diststyle(), 920 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 921 "SORTKEY": lambda self: self._parse_alter_sortkey(), 922 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 923 } 924 925 SCHEMA_UNNAMED_CONSTRAINTS = { 926 "CHECK", 927 "EXCLUDE", 928 "FOREIGN KEY", 929 "LIKE", 930 "PERIOD", 931 "PRIMARY KEY", 932 "UNIQUE", 933 } 934 935 NO_PAREN_FUNCTION_PARSERS = { 936 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 937 "CASE": lambda self: self._parse_case(), 938 "IF": lambda self: self._parse_if(), 939 "NEXT": lambda self: self._parse_next_value_for(), 940 } 941 942 INVALID_FUNC_NAME_TOKENS = { 943 TokenType.IDENTIFIER, 944 TokenType.STRING, 945 } 946 947 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 948 949 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 950 951 FUNCTION_PARSERS = { 952 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 953 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 954 "DECODE": lambda self: self._parse_decode(), 955 "EXTRACT": lambda self: self._parse_extract(), 956 "JSON_OBJECT": lambda self: self._parse_json_object(), 957 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 958 "JSON_TABLE": lambda self: self._parse_json_table(), 959 "MATCH": lambda self: self._parse_match_against(), 960 "OPENJSON": lambda self: self._parse_open_json(), 961 "POSITION": lambda self: self._parse_position(), 962 "PREDICT": lambda self: self._parse_predict(), 963 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 964 "STRING_AGG": lambda self: self._parse_string_agg(), 965 "SUBSTRING": lambda self: self._parse_substring(), 966 "TRIM": lambda self: self._parse_trim(), 967 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 968 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 969 } 970 971 QUERY_MODIFIER_PARSERS = { 972 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 973 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 974 TokenType.WHERE: lambda self: ("where", self._parse_where()), 975 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 976 TokenType.HAVING: lambda self: ("having", self._parse_having()), 977 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 978 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 979 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 980 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 981 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 982 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 983 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 984 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 985 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 986 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 987 TokenType.CLUSTER_BY: lambda self: ( 988 "cluster", 989 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 990 ), 991 TokenType.DISTRIBUTE_BY: lambda self: ( 992 "distribute", 993 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 994 ), 995 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 996 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 997 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 998 } 999 1000 SET_PARSERS = { 1001 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1002 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1003 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1004 "TRANSACTION": lambda self: self._parse_set_transaction(), 1005 } 1006 1007 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1008 1009 TYPE_LITERAL_PARSERS = { 1010 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1011 } 1012 1013 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1014 1015 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1016 1017 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1018 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1019 "ISOLATION": ( 1020 ("LEVEL", "REPEATABLE", "READ"), 1021 ("LEVEL", "READ", "COMMITTED"), 1022 ("LEVEL", "READ", "UNCOMITTED"), 1023 ("LEVEL", "SERIALIZABLE"), 1024 ), 1025 "READ": ("WRITE", "ONLY"), 1026 } 1027 1028 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1029 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1030 ) 1031 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1032 1033 CREATE_SEQUENCE: OPTIONS_TYPE = { 1034 "SCALE": ("EXTEND", "NOEXTEND"), 1035 "SHARD": ("EXTEND", "NOEXTEND"), 1036 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1037 **dict.fromkeys( 1038 ( 1039 "SESSION", 1040 "GLOBAL", 1041 "KEEP", 1042 "NOKEEP", 1043 "ORDER", 1044 "NOORDER", 1045 "NOCACHE", 1046 "CYCLE", 1047 "NOCYCLE", 1048 "NOMINVALUE", 1049 "NOMAXVALUE", 1050 "NOSCALE", 1051 "NOSHARD", 1052 ), 1053 tuple(), 1054 ), 1055 } 1056 1057 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1058 1059 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1060 1061 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1062 1063 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1064 1065 CLONE_KEYWORDS = {"CLONE", "COPY"} 1066 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1067 1068 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1069 1070 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1071 1072 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1073 1074 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1075 1076 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1077 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1078 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1079 1080 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1081 1082 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1083 1084 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1085 1086 DISTINCT_TOKENS = {TokenType.DISTINCT} 1087 1088 NULL_TOKENS = {TokenType.NULL} 1089 1090 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1091 1092 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1093 1094 STRICT_CAST = True 1095 1096 PREFIXED_PIVOT_COLUMNS = False 1097 IDENTIFY_PIVOT_STRINGS = False 1098 1099 LOG_DEFAULTS_TO_LN = False 1100 1101 # Whether ADD is present for each column added by ALTER TABLE 1102 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1103 1104 # Whether the table sample clause expects CSV syntax 1105 TABLESAMPLE_CSV = False 1106 1107 # The default method used for table sampling 1108 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1109 1110 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1111 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1112 1113 # Whether the TRIM function expects the characters to trim as its first argument 1114 TRIM_PATTERN_FIRST = False 1115 1116 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1117 STRING_ALIASES = False 1118 1119 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1120 MODIFIERS_ATTACHED_TO_UNION = True 1121 UNION_MODIFIERS = {"order", "limit", "offset"} 1122 1123 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1124 NO_PAREN_IF_COMMANDS = True 1125 1126 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1127 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1128 1129 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1130 # If this is True and '(' is not found, the keyword will be treated as an identifier 1131 VALUES_FOLLOWED_BY_PAREN = True 1132 1133 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1134 SUPPORTS_IMPLICIT_UNNEST = False 1135 1136 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1137 INTERVAL_SPANS = True 1138 1139 # Whether a PARTITION clause can follow a table reference 1140 SUPPORTS_PARTITION_SELECTION = False 1141 1142 __slots__ = ( 1143 "error_level", 1144 "error_message_context", 1145 "max_errors", 1146 "dialect", 1147 "sql", 1148 "errors", 1149 "_tokens", 1150 "_index", 1151 "_curr", 1152 "_next", 1153 "_prev", 1154 "_prev_comments", 1155 ) 1156 1157 # Autofilled 1158 SHOW_TRIE: t.Dict = {} 1159 SET_TRIE: t.Dict = {} 1160 1161 def __init__( 1162 self, 1163 error_level: t.Optional[ErrorLevel] = None, 1164 error_message_context: int = 100, 1165 max_errors: int = 3, 1166 dialect: DialectType = None, 1167 ): 1168 from sqlglot.dialects import Dialect 1169 1170 self.error_level = error_level or ErrorLevel.IMMEDIATE 1171 self.error_message_context = error_message_context 1172 self.max_errors = max_errors 1173 self.dialect = Dialect.get_or_raise(dialect) 1174 self.reset() 1175 1176 def reset(self): 1177 self.sql = "" 1178 self.errors = [] 1179 self._tokens = [] 1180 self._index = 0 1181 self._curr = None 1182 self._next = None 1183 self._prev = None 1184 self._prev_comments = None 1185 1186 def parse( 1187 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1188 ) -> t.List[t.Optional[exp.Expression]]: 1189 """ 1190 Parses a list of tokens and returns a list of syntax trees, one tree 1191 per parsed SQL statement. 1192 1193 Args: 1194 raw_tokens: The list of tokens. 1195 sql: The original SQL string, used to produce helpful debug messages. 1196 1197 Returns: 1198 The list of the produced syntax trees. 1199 """ 1200 return self._parse( 1201 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1202 ) 1203 1204 def parse_into( 1205 self, 1206 expression_types: exp.IntoType, 1207 raw_tokens: t.List[Token], 1208 sql: t.Optional[str] = None, 1209 ) -> t.List[t.Optional[exp.Expression]]: 1210 """ 1211 Parses a list of tokens into a given Expression type. If a collection of Expression 1212 types is given instead, this method will try to parse the token list into each one 1213 of them, stopping at the first for which the parsing succeeds. 1214 1215 Args: 1216 expression_types: The expression type(s) to try and parse the token list into. 1217 raw_tokens: The list of tokens. 1218 sql: The original SQL string, used to produce helpful debug messages. 1219 1220 Returns: 1221 The target Expression. 1222 """ 1223 errors = [] 1224 for expression_type in ensure_list(expression_types): 1225 parser = self.EXPRESSION_PARSERS.get(expression_type) 1226 if not parser: 1227 raise TypeError(f"No parser registered for {expression_type}") 1228 1229 try: 1230 return self._parse(parser, raw_tokens, sql) 1231 except ParseError as e: 1232 e.errors[0]["into_expression"] = expression_type 1233 errors.append(e) 1234 1235 raise ParseError( 1236 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1237 errors=merge_errors(errors), 1238 ) from errors[-1] 1239 1240 def _parse( 1241 self, 1242 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1243 raw_tokens: t.List[Token], 1244 sql: t.Optional[str] = None, 1245 ) -> t.List[t.Optional[exp.Expression]]: 1246 self.reset() 1247 self.sql = sql or "" 1248 1249 total = len(raw_tokens) 1250 chunks: t.List[t.List[Token]] = [[]] 1251 1252 for i, token in enumerate(raw_tokens): 1253 if token.token_type == TokenType.SEMICOLON: 1254 if token.comments: 1255 chunks.append([token]) 1256 1257 if i < total - 1: 1258 chunks.append([]) 1259 else: 1260 chunks[-1].append(token) 1261 1262 expressions = [] 1263 1264 for tokens in chunks: 1265 self._index = -1 1266 self._tokens = tokens 1267 self._advance() 1268 1269 expressions.append(parse_method(self)) 1270 1271 if self._index < len(self._tokens): 1272 self.raise_error("Invalid expression / Unexpected token") 1273 1274 self.check_errors() 1275 1276 return expressions 1277 1278 def check_errors(self) -> None: 1279 """Logs or raises any found errors, depending on the chosen error level setting.""" 1280 if self.error_level == ErrorLevel.WARN: 1281 for error in self.errors: 1282 logger.error(str(error)) 1283 elif self.error_level == ErrorLevel.RAISE and self.errors: 1284 raise ParseError( 1285 concat_messages(self.errors, self.max_errors), 1286 errors=merge_errors(self.errors), 1287 ) 1288 1289 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1290 """ 1291 Appends an error in the list of recorded errors or raises it, depending on the chosen 1292 error level setting. 1293 """ 1294 token = token or self._curr or self._prev or Token.string("") 1295 start = token.start 1296 end = token.end + 1 1297 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1298 highlight = self.sql[start:end] 1299 end_context = self.sql[end : end + self.error_message_context] 1300 1301 error = ParseError.new( 1302 f"{message}. Line {token.line}, Col: {token.col}.\n" 1303 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1304 description=message, 1305 line=token.line, 1306 col=token.col, 1307 start_context=start_context, 1308 highlight=highlight, 1309 end_context=end_context, 1310 ) 1311 1312 if self.error_level == ErrorLevel.IMMEDIATE: 1313 raise error 1314 1315 self.errors.append(error) 1316 1317 def expression( 1318 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1319 ) -> E: 1320 """ 1321 Creates a new, validated Expression. 1322 1323 Args: 1324 exp_class: The expression class to instantiate. 1325 comments: An optional list of comments to attach to the expression. 1326 kwargs: The arguments to set for the expression along with their respective values. 1327 1328 Returns: 1329 The target expression. 1330 """ 1331 instance = exp_class(**kwargs) 1332 instance.add_comments(comments) if comments else self._add_comments(instance) 1333 return self.validate_expression(instance) 1334 1335 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1336 if expression and self._prev_comments: 1337 expression.add_comments(self._prev_comments) 1338 self._prev_comments = None 1339 1340 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1341 """ 1342 Validates an Expression, making sure that all its mandatory arguments are set. 1343 1344 Args: 1345 expression: The expression to validate. 1346 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1347 1348 Returns: 1349 The validated expression. 1350 """ 1351 if self.error_level != ErrorLevel.IGNORE: 1352 for error_message in expression.error_messages(args): 1353 self.raise_error(error_message) 1354 1355 return expression 1356 1357 def _find_sql(self, start: Token, end: Token) -> str: 1358 return self.sql[start.start : end.end + 1] 1359 1360 def _is_connected(self) -> bool: 1361 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1362 1363 def _advance(self, times: int = 1) -> None: 1364 self._index += times 1365 self._curr = seq_get(self._tokens, self._index) 1366 self._next = seq_get(self._tokens, self._index + 1) 1367 1368 if self._index > 0: 1369 self._prev = self._tokens[self._index - 1] 1370 self._prev_comments = self._prev.comments 1371 else: 1372 self._prev = None 1373 self._prev_comments = None 1374 1375 def _retreat(self, index: int) -> None: 1376 if index != self._index: 1377 self._advance(index - self._index) 1378 1379 def _warn_unsupported(self) -> None: 1380 if len(self._tokens) <= 1: 1381 return 1382 1383 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1384 # interested in emitting a warning for the one being currently processed. 1385 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1386 1387 logger.warning( 1388 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1389 ) 1390 1391 def _parse_command(self) -> exp.Command: 1392 self._warn_unsupported() 1393 return self.expression( 1394 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1395 ) 1396 1397 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1398 """ 1399 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1400 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1401 the parser state accordingly 1402 """ 1403 index = self._index 1404 error_level = self.error_level 1405 1406 self.error_level = ErrorLevel.IMMEDIATE 1407 try: 1408 this = parse_method() 1409 except ParseError: 1410 this = None 1411 finally: 1412 if not this or retreat: 1413 self._retreat(index) 1414 self.error_level = error_level 1415 1416 return this 1417 1418 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1419 start = self._prev 1420 exists = self._parse_exists() if allow_exists else None 1421 1422 self._match(TokenType.ON) 1423 1424 materialized = self._match_text_seq("MATERIALIZED") 1425 kind = self._match_set(self.CREATABLES) and self._prev 1426 if not kind: 1427 return self._parse_as_command(start) 1428 1429 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1430 this = self._parse_user_defined_function(kind=kind.token_type) 1431 elif kind.token_type == TokenType.TABLE: 1432 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1433 elif kind.token_type == TokenType.COLUMN: 1434 this = self._parse_column() 1435 else: 1436 this = self._parse_id_var() 1437 1438 self._match(TokenType.IS) 1439 1440 return self.expression( 1441 exp.Comment, 1442 this=this, 1443 kind=kind.text, 1444 expression=self._parse_string(), 1445 exists=exists, 1446 materialized=materialized, 1447 ) 1448 1449 def _parse_to_table( 1450 self, 1451 ) -> exp.ToTableProperty: 1452 table = self._parse_table_parts(schema=True) 1453 return self.expression(exp.ToTableProperty, this=table) 1454 1455 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1456 def _parse_ttl(self) -> exp.Expression: 1457 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1458 this = self._parse_bitwise() 1459 1460 if self._match_text_seq("DELETE"): 1461 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1462 if self._match_text_seq("RECOMPRESS"): 1463 return self.expression( 1464 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1465 ) 1466 if self._match_text_seq("TO", "DISK"): 1467 return self.expression( 1468 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1469 ) 1470 if self._match_text_seq("TO", "VOLUME"): 1471 return self.expression( 1472 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1473 ) 1474 1475 return this 1476 1477 expressions = self._parse_csv(_parse_ttl_action) 1478 where = self._parse_where() 1479 group = self._parse_group() 1480 1481 aggregates = None 1482 if group and self._match(TokenType.SET): 1483 aggregates = self._parse_csv(self._parse_set_item) 1484 1485 return self.expression( 1486 exp.MergeTreeTTL, 1487 expressions=expressions, 1488 where=where, 1489 group=group, 1490 aggregates=aggregates, 1491 ) 1492 1493 def _parse_statement(self) -> t.Optional[exp.Expression]: 1494 if self._curr is None: 1495 return None 1496 1497 if self._match_set(self.STATEMENT_PARSERS): 1498 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1499 1500 if self._match_set(self.dialect.tokenizer.COMMANDS): 1501 return self._parse_command() 1502 1503 expression = self._parse_expression() 1504 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1505 return self._parse_query_modifiers(expression) 1506 1507 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1508 start = self._prev 1509 temporary = self._match(TokenType.TEMPORARY) 1510 materialized = self._match_text_seq("MATERIALIZED") 1511 1512 kind = self._match_set(self.CREATABLES) and self._prev.text 1513 if not kind: 1514 return self._parse_as_command(start) 1515 1516 if_exists = exists or self._parse_exists() 1517 table = self._parse_table_parts( 1518 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1519 ) 1520 1521 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1522 1523 if self._match(TokenType.L_PAREN, advance=False): 1524 expressions = self._parse_wrapped_csv(self._parse_types) 1525 else: 1526 expressions = None 1527 1528 return self.expression( 1529 exp.Drop, 1530 comments=start.comments, 1531 exists=if_exists, 1532 this=table, 1533 expressions=expressions, 1534 kind=kind.upper(), 1535 temporary=temporary, 1536 materialized=materialized, 1537 cascade=self._match_text_seq("CASCADE"), 1538 constraints=self._match_text_seq("CONSTRAINTS"), 1539 purge=self._match_text_seq("PURGE"), 1540 cluster=cluster, 1541 ) 1542 1543 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1544 return ( 1545 self._match_text_seq("IF") 1546 and (not not_ or self._match(TokenType.NOT)) 1547 and self._match(TokenType.EXISTS) 1548 ) 1549 1550 def _parse_create(self) -> exp.Create | exp.Command: 1551 # Note: this can't be None because we've matched a statement parser 1552 start = self._prev 1553 comments = self._prev_comments 1554 1555 replace = ( 1556 start.token_type == TokenType.REPLACE 1557 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1558 or self._match_pair(TokenType.OR, TokenType.ALTER) 1559 ) 1560 1561 unique = self._match(TokenType.UNIQUE) 1562 1563 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1564 self._advance() 1565 1566 properties = None 1567 create_token = self._match_set(self.CREATABLES) and self._prev 1568 1569 if not create_token: 1570 # exp.Properties.Location.POST_CREATE 1571 properties = self._parse_properties() 1572 create_token = self._match_set(self.CREATABLES) and self._prev 1573 1574 if not properties or not create_token: 1575 return self._parse_as_command(start) 1576 1577 exists = self._parse_exists(not_=True) 1578 this = None 1579 expression: t.Optional[exp.Expression] = None 1580 indexes = None 1581 no_schema_binding = None 1582 begin = None 1583 end = None 1584 clone = None 1585 1586 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1587 nonlocal properties 1588 if properties and temp_props: 1589 properties.expressions.extend(temp_props.expressions) 1590 elif temp_props: 1591 properties = temp_props 1592 1593 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1594 this = self._parse_user_defined_function(kind=create_token.token_type) 1595 1596 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1597 extend_props(self._parse_properties()) 1598 1599 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1600 1601 if not expression: 1602 if self._match(TokenType.COMMAND): 1603 expression = self._parse_as_command(self._prev) 1604 else: 1605 begin = self._match(TokenType.BEGIN) 1606 return_ = self._match_text_seq("RETURN") 1607 1608 if self._match(TokenType.STRING, advance=False): 1609 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1610 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1611 expression = self._parse_string() 1612 extend_props(self._parse_properties()) 1613 else: 1614 expression = self._parse_statement() 1615 1616 end = self._match_text_seq("END") 1617 1618 if return_: 1619 expression = self.expression(exp.Return, this=expression) 1620 elif create_token.token_type == TokenType.INDEX: 1621 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1622 if not self._match(TokenType.ON): 1623 index = self._parse_id_var() 1624 anonymous = False 1625 else: 1626 index = None 1627 anonymous = True 1628 1629 this = self._parse_index(index=index, anonymous=anonymous) 1630 elif create_token.token_type in self.DB_CREATABLES: 1631 table_parts = self._parse_table_parts( 1632 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1633 ) 1634 1635 # exp.Properties.Location.POST_NAME 1636 self._match(TokenType.COMMA) 1637 extend_props(self._parse_properties(before=True)) 1638 1639 this = self._parse_schema(this=table_parts) 1640 1641 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1642 extend_props(self._parse_properties()) 1643 1644 self._match(TokenType.ALIAS) 1645 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1646 # exp.Properties.Location.POST_ALIAS 1647 extend_props(self._parse_properties()) 1648 1649 if create_token.token_type == TokenType.SEQUENCE: 1650 expression = self._parse_types() 1651 extend_props(self._parse_properties()) 1652 else: 1653 expression = self._parse_ddl_select() 1654 1655 if create_token.token_type == TokenType.TABLE: 1656 # exp.Properties.Location.POST_EXPRESSION 1657 extend_props(self._parse_properties()) 1658 1659 indexes = [] 1660 while True: 1661 index = self._parse_index() 1662 1663 # exp.Properties.Location.POST_INDEX 1664 extend_props(self._parse_properties()) 1665 1666 if not index: 1667 break 1668 else: 1669 self._match(TokenType.COMMA) 1670 indexes.append(index) 1671 elif create_token.token_type == TokenType.VIEW: 1672 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1673 no_schema_binding = True 1674 1675 shallow = self._match_text_seq("SHALLOW") 1676 1677 if self._match_texts(self.CLONE_KEYWORDS): 1678 copy = self._prev.text.lower() == "copy" 1679 clone = self.expression( 1680 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1681 ) 1682 1683 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1684 return self._parse_as_command(start) 1685 1686 return self.expression( 1687 exp.Create, 1688 comments=comments, 1689 this=this, 1690 kind=create_token.text.upper(), 1691 replace=replace, 1692 unique=unique, 1693 expression=expression, 1694 exists=exists, 1695 properties=properties, 1696 indexes=indexes, 1697 no_schema_binding=no_schema_binding, 1698 begin=begin, 1699 end=end, 1700 clone=clone, 1701 ) 1702 1703 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1704 seq = exp.SequenceProperties() 1705 1706 options = [] 1707 index = self._index 1708 1709 while self._curr: 1710 self._match(TokenType.COMMA) 1711 if self._match_text_seq("INCREMENT"): 1712 self._match_text_seq("BY") 1713 self._match_text_seq("=") 1714 seq.set("increment", self._parse_term()) 1715 elif self._match_text_seq("MINVALUE"): 1716 seq.set("minvalue", self._parse_term()) 1717 elif self._match_text_seq("MAXVALUE"): 1718 seq.set("maxvalue", self._parse_term()) 1719 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1720 self._match_text_seq("=") 1721 seq.set("start", self._parse_term()) 1722 elif self._match_text_seq("CACHE"): 1723 # T-SQL allows empty CACHE which is initialized dynamically 1724 seq.set("cache", self._parse_number() or True) 1725 elif self._match_text_seq("OWNED", "BY"): 1726 # "OWNED BY NONE" is the default 1727 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1728 else: 1729 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1730 if opt: 1731 options.append(opt) 1732 else: 1733 break 1734 1735 seq.set("options", options if options else None) 1736 return None if self._index == index else seq 1737 1738 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1739 # only used for teradata currently 1740 self._match(TokenType.COMMA) 1741 1742 kwargs = { 1743 "no": self._match_text_seq("NO"), 1744 "dual": self._match_text_seq("DUAL"), 1745 "before": self._match_text_seq("BEFORE"), 1746 "default": self._match_text_seq("DEFAULT"), 1747 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1748 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1749 "after": self._match_text_seq("AFTER"), 1750 "minimum": self._match_texts(("MIN", "MINIMUM")), 1751 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1752 } 1753 1754 if self._match_texts(self.PROPERTY_PARSERS): 1755 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1756 try: 1757 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1758 except TypeError: 1759 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1760 1761 return None 1762 1763 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1764 return self._parse_wrapped_csv(self._parse_property) 1765 1766 def _parse_property(self) -> t.Optional[exp.Expression]: 1767 if self._match_texts(self.PROPERTY_PARSERS): 1768 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1769 1770 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1771 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1772 1773 if self._match_text_seq("COMPOUND", "SORTKEY"): 1774 return self._parse_sortkey(compound=True) 1775 1776 if self._match_text_seq("SQL", "SECURITY"): 1777 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1778 1779 index = self._index 1780 key = self._parse_column() 1781 1782 if not self._match(TokenType.EQ): 1783 self._retreat(index) 1784 return self._parse_sequence_properties() 1785 1786 return self.expression( 1787 exp.Property, 1788 this=key.to_dot() if isinstance(key, exp.Column) else key, 1789 value=self._parse_bitwise() or self._parse_var(any_token=True), 1790 ) 1791 1792 def _parse_stored(self) -> exp.FileFormatProperty: 1793 self._match(TokenType.ALIAS) 1794 1795 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1796 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1797 1798 return self.expression( 1799 exp.FileFormatProperty, 1800 this=( 1801 self.expression( 1802 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1803 ) 1804 if input_format or output_format 1805 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1806 ), 1807 ) 1808 1809 def _parse_unquoted_field(self): 1810 field = self._parse_field() 1811 if isinstance(field, exp.Identifier) and not field.quoted: 1812 field = exp.var(field) 1813 1814 return field 1815 1816 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1817 self._match(TokenType.EQ) 1818 self._match(TokenType.ALIAS) 1819 1820 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1821 1822 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1823 properties = [] 1824 while True: 1825 if before: 1826 prop = self._parse_property_before() 1827 else: 1828 prop = self._parse_property() 1829 if not prop: 1830 break 1831 for p in ensure_list(prop): 1832 properties.append(p) 1833 1834 if properties: 1835 return self.expression(exp.Properties, expressions=properties) 1836 1837 return None 1838 1839 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1840 return self.expression( 1841 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1842 ) 1843 1844 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1845 if self._index >= 2: 1846 pre_volatile_token = self._tokens[self._index - 2] 1847 else: 1848 pre_volatile_token = None 1849 1850 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1851 return exp.VolatileProperty() 1852 1853 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1854 1855 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1856 self._match_pair(TokenType.EQ, TokenType.ON) 1857 1858 prop = self.expression(exp.WithSystemVersioningProperty) 1859 if self._match(TokenType.L_PAREN): 1860 self._match_text_seq("HISTORY_TABLE", "=") 1861 prop.set("this", self._parse_table_parts()) 1862 1863 if self._match(TokenType.COMMA): 1864 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1865 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1866 1867 self._match_r_paren() 1868 1869 return prop 1870 1871 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1872 if self._match(TokenType.L_PAREN, advance=False): 1873 return self._parse_wrapped_properties() 1874 1875 if self._match_text_seq("JOURNAL"): 1876 return self._parse_withjournaltable() 1877 1878 if self._match_texts(self.VIEW_ATTRIBUTES): 1879 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1880 1881 if self._match_text_seq("DATA"): 1882 return self._parse_withdata(no=False) 1883 elif self._match_text_seq("NO", "DATA"): 1884 return self._parse_withdata(no=True) 1885 1886 if not self._next: 1887 return None 1888 1889 return self._parse_withisolatedloading() 1890 1891 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1892 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1893 self._match(TokenType.EQ) 1894 1895 user = self._parse_id_var() 1896 self._match(TokenType.PARAMETER) 1897 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1898 1899 if not user or not host: 1900 return None 1901 1902 return exp.DefinerProperty(this=f"{user}@{host}") 1903 1904 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1905 self._match(TokenType.TABLE) 1906 self._match(TokenType.EQ) 1907 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1908 1909 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1910 return self.expression(exp.LogProperty, no=no) 1911 1912 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1913 return self.expression(exp.JournalProperty, **kwargs) 1914 1915 def _parse_checksum(self) -> exp.ChecksumProperty: 1916 self._match(TokenType.EQ) 1917 1918 on = None 1919 if self._match(TokenType.ON): 1920 on = True 1921 elif self._match_text_seq("OFF"): 1922 on = False 1923 1924 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1925 1926 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1927 return self.expression( 1928 exp.Cluster, 1929 expressions=( 1930 self._parse_wrapped_csv(self._parse_ordered) 1931 if wrapped 1932 else self._parse_csv(self._parse_ordered) 1933 ), 1934 ) 1935 1936 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1937 self._match_text_seq("BY") 1938 1939 self._match_l_paren() 1940 expressions = self._parse_csv(self._parse_column) 1941 self._match_r_paren() 1942 1943 if self._match_text_seq("SORTED", "BY"): 1944 self._match_l_paren() 1945 sorted_by = self._parse_csv(self._parse_ordered) 1946 self._match_r_paren() 1947 else: 1948 sorted_by = None 1949 1950 self._match(TokenType.INTO) 1951 buckets = self._parse_number() 1952 self._match_text_seq("BUCKETS") 1953 1954 return self.expression( 1955 exp.ClusteredByProperty, 1956 expressions=expressions, 1957 sorted_by=sorted_by, 1958 buckets=buckets, 1959 ) 1960 1961 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1962 if not self._match_text_seq("GRANTS"): 1963 self._retreat(self._index - 1) 1964 return None 1965 1966 return self.expression(exp.CopyGrantsProperty) 1967 1968 def _parse_freespace(self) -> exp.FreespaceProperty: 1969 self._match(TokenType.EQ) 1970 return self.expression( 1971 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1972 ) 1973 1974 def _parse_mergeblockratio( 1975 self, no: bool = False, default: bool = False 1976 ) -> exp.MergeBlockRatioProperty: 1977 if self._match(TokenType.EQ): 1978 return self.expression( 1979 exp.MergeBlockRatioProperty, 1980 this=self._parse_number(), 1981 percent=self._match(TokenType.PERCENT), 1982 ) 1983 1984 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1985 1986 def _parse_datablocksize( 1987 self, 1988 default: t.Optional[bool] = None, 1989 minimum: t.Optional[bool] = None, 1990 maximum: t.Optional[bool] = None, 1991 ) -> exp.DataBlocksizeProperty: 1992 self._match(TokenType.EQ) 1993 size = self._parse_number() 1994 1995 units = None 1996 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1997 units = self._prev.text 1998 1999 return self.expression( 2000 exp.DataBlocksizeProperty, 2001 size=size, 2002 units=units, 2003 default=default, 2004 minimum=minimum, 2005 maximum=maximum, 2006 ) 2007 2008 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2009 self._match(TokenType.EQ) 2010 always = self._match_text_seq("ALWAYS") 2011 manual = self._match_text_seq("MANUAL") 2012 never = self._match_text_seq("NEVER") 2013 default = self._match_text_seq("DEFAULT") 2014 2015 autotemp = None 2016 if self._match_text_seq("AUTOTEMP"): 2017 autotemp = self._parse_schema() 2018 2019 return self.expression( 2020 exp.BlockCompressionProperty, 2021 always=always, 2022 manual=manual, 2023 never=never, 2024 default=default, 2025 autotemp=autotemp, 2026 ) 2027 2028 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2029 index = self._index 2030 no = self._match_text_seq("NO") 2031 concurrent = self._match_text_seq("CONCURRENT") 2032 2033 if not self._match_text_seq("ISOLATED", "LOADING"): 2034 self._retreat(index) 2035 return None 2036 2037 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2038 return self.expression( 2039 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2040 ) 2041 2042 def _parse_locking(self) -> exp.LockingProperty: 2043 if self._match(TokenType.TABLE): 2044 kind = "TABLE" 2045 elif self._match(TokenType.VIEW): 2046 kind = "VIEW" 2047 elif self._match(TokenType.ROW): 2048 kind = "ROW" 2049 elif self._match_text_seq("DATABASE"): 2050 kind = "DATABASE" 2051 else: 2052 kind = None 2053 2054 if kind in ("DATABASE", "TABLE", "VIEW"): 2055 this = self._parse_table_parts() 2056 else: 2057 this = None 2058 2059 if self._match(TokenType.FOR): 2060 for_or_in = "FOR" 2061 elif self._match(TokenType.IN): 2062 for_or_in = "IN" 2063 else: 2064 for_or_in = None 2065 2066 if self._match_text_seq("ACCESS"): 2067 lock_type = "ACCESS" 2068 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2069 lock_type = "EXCLUSIVE" 2070 elif self._match_text_seq("SHARE"): 2071 lock_type = "SHARE" 2072 elif self._match_text_seq("READ"): 2073 lock_type = "READ" 2074 elif self._match_text_seq("WRITE"): 2075 lock_type = "WRITE" 2076 elif self._match_text_seq("CHECKSUM"): 2077 lock_type = "CHECKSUM" 2078 else: 2079 lock_type = None 2080 2081 override = self._match_text_seq("OVERRIDE") 2082 2083 return self.expression( 2084 exp.LockingProperty, 2085 this=this, 2086 kind=kind, 2087 for_or_in=for_or_in, 2088 lock_type=lock_type, 2089 override=override, 2090 ) 2091 2092 def _parse_partition_by(self) -> t.List[exp.Expression]: 2093 if self._match(TokenType.PARTITION_BY): 2094 return self._parse_csv(self._parse_conjunction) 2095 return [] 2096 2097 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2098 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2099 if self._match_text_seq("MINVALUE"): 2100 return exp.var("MINVALUE") 2101 if self._match_text_seq("MAXVALUE"): 2102 return exp.var("MAXVALUE") 2103 return self._parse_bitwise() 2104 2105 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2106 expression = None 2107 from_expressions = None 2108 to_expressions = None 2109 2110 if self._match(TokenType.IN): 2111 this = self._parse_wrapped_csv(self._parse_bitwise) 2112 elif self._match(TokenType.FROM): 2113 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2114 self._match_text_seq("TO") 2115 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2116 elif self._match_text_seq("WITH", "(", "MODULUS"): 2117 this = self._parse_number() 2118 self._match_text_seq(",", "REMAINDER") 2119 expression = self._parse_number() 2120 self._match_r_paren() 2121 else: 2122 self.raise_error("Failed to parse partition bound spec.") 2123 2124 return self.expression( 2125 exp.PartitionBoundSpec, 2126 this=this, 2127 expression=expression, 2128 from_expressions=from_expressions, 2129 to_expressions=to_expressions, 2130 ) 2131 2132 # https://www.postgresql.org/docs/current/sql-createtable.html 2133 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2134 if not self._match_text_seq("OF"): 2135 self._retreat(self._index - 1) 2136 return None 2137 2138 this = self._parse_table(schema=True) 2139 2140 if self._match(TokenType.DEFAULT): 2141 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2142 elif self._match_text_seq("FOR", "VALUES"): 2143 expression = self._parse_partition_bound_spec() 2144 else: 2145 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2146 2147 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2148 2149 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2150 self._match(TokenType.EQ) 2151 return self.expression( 2152 exp.PartitionedByProperty, 2153 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2154 ) 2155 2156 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2157 if self._match_text_seq("AND", "STATISTICS"): 2158 statistics = True 2159 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2160 statistics = False 2161 else: 2162 statistics = None 2163 2164 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2165 2166 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2167 if self._match_text_seq("SQL"): 2168 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2169 return None 2170 2171 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2172 if self._match_text_seq("SQL", "DATA"): 2173 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2174 return None 2175 2176 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2177 if self._match_text_seq("PRIMARY", "INDEX"): 2178 return exp.NoPrimaryIndexProperty() 2179 if self._match_text_seq("SQL"): 2180 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2181 return None 2182 2183 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2184 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2185 return exp.OnCommitProperty() 2186 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2187 return exp.OnCommitProperty(delete=True) 2188 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2189 2190 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2191 if self._match_text_seq("SQL", "DATA"): 2192 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2193 return None 2194 2195 def _parse_distkey(self) -> exp.DistKeyProperty: 2196 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2197 2198 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2199 table = self._parse_table(schema=True) 2200 2201 options = [] 2202 while self._match_texts(("INCLUDING", "EXCLUDING")): 2203 this = self._prev.text.upper() 2204 2205 id_var = self._parse_id_var() 2206 if not id_var: 2207 return None 2208 2209 options.append( 2210 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2211 ) 2212 2213 return self.expression(exp.LikeProperty, this=table, expressions=options) 2214 2215 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2216 return self.expression( 2217 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2218 ) 2219 2220 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2221 self._match(TokenType.EQ) 2222 return self.expression( 2223 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2224 ) 2225 2226 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2227 self._match_text_seq("WITH", "CONNECTION") 2228 return self.expression( 2229 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2230 ) 2231 2232 def _parse_returns(self) -> exp.ReturnsProperty: 2233 value: t.Optional[exp.Expression] 2234 is_table = self._match(TokenType.TABLE) 2235 2236 if is_table: 2237 if self._match(TokenType.LT): 2238 value = self.expression( 2239 exp.Schema, 2240 this="TABLE", 2241 expressions=self._parse_csv(self._parse_struct_types), 2242 ) 2243 if not self._match(TokenType.GT): 2244 self.raise_error("Expecting >") 2245 else: 2246 value = self._parse_schema(exp.var("TABLE")) 2247 else: 2248 value = self._parse_types() 2249 2250 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2251 2252 def _parse_describe(self) -> exp.Describe: 2253 kind = self._match_set(self.CREATABLES) and self._prev.text 2254 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2255 if self._match(TokenType.DOT): 2256 style = None 2257 self._retreat(self._index - 2) 2258 this = self._parse_table(schema=True) 2259 properties = self._parse_properties() 2260 expressions = properties.expressions if properties else None 2261 return self.expression( 2262 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2263 ) 2264 2265 def _parse_insert(self) -> exp.Insert: 2266 comments = ensure_list(self._prev_comments) 2267 hint = self._parse_hint() 2268 overwrite = self._match(TokenType.OVERWRITE) 2269 ignore = self._match(TokenType.IGNORE) 2270 local = self._match_text_seq("LOCAL") 2271 alternative = None 2272 is_function = None 2273 2274 if self._match_text_seq("DIRECTORY"): 2275 this: t.Optional[exp.Expression] = self.expression( 2276 exp.Directory, 2277 this=self._parse_var_or_string(), 2278 local=local, 2279 row_format=self._parse_row_format(match_row=True), 2280 ) 2281 else: 2282 if self._match(TokenType.OR): 2283 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2284 2285 self._match(TokenType.INTO) 2286 comments += ensure_list(self._prev_comments) 2287 self._match(TokenType.TABLE) 2288 is_function = self._match(TokenType.FUNCTION) 2289 2290 this = ( 2291 self._parse_table(schema=True, parse_partition=True) 2292 if not is_function 2293 else self._parse_function() 2294 ) 2295 2296 returning = self._parse_returning() 2297 2298 return self.expression( 2299 exp.Insert, 2300 comments=comments, 2301 hint=hint, 2302 is_function=is_function, 2303 this=this, 2304 stored=self._match_text_seq("STORED") and self._parse_stored(), 2305 by_name=self._match_text_seq("BY", "NAME"), 2306 exists=self._parse_exists(), 2307 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2308 and self._parse_conjunction(), 2309 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2310 conflict=self._parse_on_conflict(), 2311 returning=returning or self._parse_returning(), 2312 overwrite=overwrite, 2313 alternative=alternative, 2314 ignore=ignore, 2315 ) 2316 2317 def _parse_kill(self) -> exp.Kill: 2318 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2319 2320 return self.expression( 2321 exp.Kill, 2322 this=self._parse_primary(), 2323 kind=kind, 2324 ) 2325 2326 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2327 conflict = self._match_text_seq("ON", "CONFLICT") 2328 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2329 2330 if not conflict and not duplicate: 2331 return None 2332 2333 conflict_keys = None 2334 constraint = None 2335 2336 if conflict: 2337 if self._match_text_seq("ON", "CONSTRAINT"): 2338 constraint = self._parse_id_var() 2339 elif self._match(TokenType.L_PAREN): 2340 conflict_keys = self._parse_csv(self._parse_id_var) 2341 self._match_r_paren() 2342 2343 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2344 if self._prev.token_type == TokenType.UPDATE: 2345 self._match(TokenType.SET) 2346 expressions = self._parse_csv(self._parse_equality) 2347 else: 2348 expressions = None 2349 2350 return self.expression( 2351 exp.OnConflict, 2352 duplicate=duplicate, 2353 expressions=expressions, 2354 action=action, 2355 conflict_keys=conflict_keys, 2356 constraint=constraint, 2357 ) 2358 2359 def _parse_returning(self) -> t.Optional[exp.Returning]: 2360 if not self._match(TokenType.RETURNING): 2361 return None 2362 return self.expression( 2363 exp.Returning, 2364 expressions=self._parse_csv(self._parse_expression), 2365 into=self._match(TokenType.INTO) and self._parse_table_part(), 2366 ) 2367 2368 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2369 if not self._match(TokenType.FORMAT): 2370 return None 2371 return self._parse_row_format() 2372 2373 def _parse_row_format( 2374 self, match_row: bool = False 2375 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2376 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2377 return None 2378 2379 if self._match_text_seq("SERDE"): 2380 this = self._parse_string() 2381 2382 serde_properties = None 2383 if self._match(TokenType.SERDE_PROPERTIES): 2384 serde_properties = self.expression( 2385 exp.SerdeProperties, expressions=self._parse_wrapped_properties() 2386 ) 2387 2388 return self.expression( 2389 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2390 ) 2391 2392 self._match_text_seq("DELIMITED") 2393 2394 kwargs = {} 2395 2396 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2397 kwargs["fields"] = self._parse_string() 2398 if self._match_text_seq("ESCAPED", "BY"): 2399 kwargs["escaped"] = self._parse_string() 2400 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2401 kwargs["collection_items"] = self._parse_string() 2402 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2403 kwargs["map_keys"] = self._parse_string() 2404 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2405 kwargs["lines"] = self._parse_string() 2406 if self._match_text_seq("NULL", "DEFINED", "AS"): 2407 kwargs["null"] = self._parse_string() 2408 2409 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2410 2411 def _parse_load(self) -> exp.LoadData | exp.Command: 2412 if self._match_text_seq("DATA"): 2413 local = self._match_text_seq("LOCAL") 2414 self._match_text_seq("INPATH") 2415 inpath = self._parse_string() 2416 overwrite = self._match(TokenType.OVERWRITE) 2417 self._match_pair(TokenType.INTO, TokenType.TABLE) 2418 2419 return self.expression( 2420 exp.LoadData, 2421 this=self._parse_table(schema=True), 2422 local=local, 2423 overwrite=overwrite, 2424 inpath=inpath, 2425 partition=self._parse_partition(), 2426 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2427 serde=self._match_text_seq("SERDE") and self._parse_string(), 2428 ) 2429 return self._parse_as_command(self._prev) 2430 2431 def _parse_delete(self) -> exp.Delete: 2432 # This handles MySQL's "Multiple-Table Syntax" 2433 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2434 tables = None 2435 comments = self._prev_comments 2436 if not self._match(TokenType.FROM, advance=False): 2437 tables = self._parse_csv(self._parse_table) or None 2438 2439 returning = self._parse_returning() 2440 2441 return self.expression( 2442 exp.Delete, 2443 comments=comments, 2444 tables=tables, 2445 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2446 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2447 where=self._parse_where(), 2448 returning=returning or self._parse_returning(), 2449 limit=self._parse_limit(), 2450 ) 2451 2452 def _parse_update(self) -> exp.Update: 2453 comments = self._prev_comments 2454 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2455 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2456 returning = self._parse_returning() 2457 return self.expression( 2458 exp.Update, 2459 comments=comments, 2460 **{ # type: ignore 2461 "this": this, 2462 "expressions": expressions, 2463 "from": self._parse_from(joins=True), 2464 "where": self._parse_where(), 2465 "returning": returning or self._parse_returning(), 2466 "order": self._parse_order(), 2467 "limit": self._parse_limit(), 2468 }, 2469 ) 2470 2471 def _parse_uncache(self) -> exp.Uncache: 2472 if not self._match(TokenType.TABLE): 2473 self.raise_error("Expecting TABLE after UNCACHE") 2474 2475 return self.expression( 2476 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2477 ) 2478 2479 def _parse_cache(self) -> exp.Cache: 2480 lazy = self._match_text_seq("LAZY") 2481 self._match(TokenType.TABLE) 2482 table = self._parse_table(schema=True) 2483 2484 options = [] 2485 if self._match_text_seq("OPTIONS"): 2486 self._match_l_paren() 2487 k = self._parse_string() 2488 self._match(TokenType.EQ) 2489 v = self._parse_string() 2490 options = [k, v] 2491 self._match_r_paren() 2492 2493 self._match(TokenType.ALIAS) 2494 return self.expression( 2495 exp.Cache, 2496 this=table, 2497 lazy=lazy, 2498 options=options, 2499 expression=self._parse_select(nested=True), 2500 ) 2501 2502 def _parse_partition(self) -> t.Optional[exp.Partition]: 2503 if not self._match(TokenType.PARTITION): 2504 return None 2505 2506 return self.expression( 2507 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2508 ) 2509 2510 def _parse_value(self) -> t.Optional[exp.Tuple]: 2511 if self._match(TokenType.L_PAREN): 2512 expressions = self._parse_csv(self._parse_expression) 2513 self._match_r_paren() 2514 return self.expression(exp.Tuple, expressions=expressions) 2515 2516 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2517 expression = self._parse_expression() 2518 if expression: 2519 return self.expression(exp.Tuple, expressions=[expression]) 2520 return None 2521 2522 def _parse_projections(self) -> t.List[exp.Expression]: 2523 return self._parse_expressions() 2524 2525 def _parse_select( 2526 self, 2527 nested: bool = False, 2528 table: bool = False, 2529 parse_subquery_alias: bool = True, 2530 parse_set_operation: bool = True, 2531 ) -> t.Optional[exp.Expression]: 2532 cte = self._parse_with() 2533 2534 if cte: 2535 this = self._parse_statement() 2536 2537 if not this: 2538 self.raise_error("Failed to parse any statement following CTE") 2539 return cte 2540 2541 if "with" in this.arg_types: 2542 this.set("with", cte) 2543 else: 2544 self.raise_error(f"{this.key} does not support CTE") 2545 this = cte 2546 2547 return this 2548 2549 # duckdb supports leading with FROM x 2550 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2551 2552 if self._match(TokenType.SELECT): 2553 comments = self._prev_comments 2554 2555 hint = self._parse_hint() 2556 all_ = self._match(TokenType.ALL) 2557 distinct = self._match_set(self.DISTINCT_TOKENS) 2558 2559 kind = ( 2560 self._match(TokenType.ALIAS) 2561 and self._match_texts(("STRUCT", "VALUE")) 2562 and self._prev.text.upper() 2563 ) 2564 2565 if distinct: 2566 distinct = self.expression( 2567 exp.Distinct, 2568 on=self._parse_value() if self._match(TokenType.ON) else None, 2569 ) 2570 2571 if all_ and distinct: 2572 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2573 2574 limit = self._parse_limit(top=True) 2575 projections = self._parse_projections() 2576 2577 this = self.expression( 2578 exp.Select, 2579 kind=kind, 2580 hint=hint, 2581 distinct=distinct, 2582 expressions=projections, 2583 limit=limit, 2584 ) 2585 this.comments = comments 2586 2587 into = self._parse_into() 2588 if into: 2589 this.set("into", into) 2590 2591 if not from_: 2592 from_ = self._parse_from() 2593 2594 if from_: 2595 this.set("from", from_) 2596 2597 this = self._parse_query_modifiers(this) 2598 elif (table or nested) and self._match(TokenType.L_PAREN): 2599 if self._match(TokenType.PIVOT): 2600 this = self._parse_simplified_pivot() 2601 elif self._match(TokenType.FROM): 2602 this = exp.select("*").from_( 2603 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2604 ) 2605 else: 2606 this = ( 2607 self._parse_table() 2608 if table 2609 else self._parse_select(nested=True, parse_set_operation=False) 2610 ) 2611 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2612 2613 self._match_r_paren() 2614 2615 # We return early here so that the UNION isn't attached to the subquery by the 2616 # following call to _parse_set_operations, but instead becomes the parent node 2617 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2618 elif self._match(TokenType.VALUES, advance=False): 2619 this = self._parse_derived_table_values() 2620 elif from_: 2621 this = exp.select("*").from_(from_.this, copy=False) 2622 else: 2623 this = None 2624 2625 if parse_set_operation: 2626 return self._parse_set_operations(this) 2627 return this 2628 2629 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2630 if not skip_with_token and not self._match(TokenType.WITH): 2631 return None 2632 2633 comments = self._prev_comments 2634 recursive = self._match(TokenType.RECURSIVE) 2635 2636 expressions = [] 2637 while True: 2638 expressions.append(self._parse_cte()) 2639 2640 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2641 break 2642 else: 2643 self._match(TokenType.WITH) 2644 2645 return self.expression( 2646 exp.With, comments=comments, expressions=expressions, recursive=recursive 2647 ) 2648 2649 def _parse_cte(self) -> exp.CTE: 2650 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2651 if not alias or not alias.this: 2652 self.raise_error("Expected CTE to have alias") 2653 2654 self._match(TokenType.ALIAS) 2655 2656 if self._match_text_seq("NOT", "MATERIALIZED"): 2657 materialized = False 2658 elif self._match_text_seq("MATERIALIZED"): 2659 materialized = True 2660 else: 2661 materialized = None 2662 2663 return self.expression( 2664 exp.CTE, 2665 this=self._parse_wrapped(self._parse_statement), 2666 alias=alias, 2667 materialized=materialized, 2668 ) 2669 2670 def _parse_table_alias( 2671 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2672 ) -> t.Optional[exp.TableAlias]: 2673 any_token = self._match(TokenType.ALIAS) 2674 alias = ( 2675 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2676 or self._parse_string_as_identifier() 2677 ) 2678 2679 index = self._index 2680 if self._match(TokenType.L_PAREN): 2681 columns = self._parse_csv(self._parse_function_parameter) 2682 self._match_r_paren() if columns else self._retreat(index) 2683 else: 2684 columns = None 2685 2686 if not alias and not columns: 2687 return None 2688 2689 return self.expression(exp.TableAlias, this=alias, columns=columns) 2690 2691 def _parse_subquery( 2692 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2693 ) -> t.Optional[exp.Subquery]: 2694 if not this: 2695 return None 2696 2697 return self.expression( 2698 exp.Subquery, 2699 this=this, 2700 pivots=self._parse_pivots(), 2701 alias=self._parse_table_alias() if parse_alias else None, 2702 ) 2703 2704 def _implicit_unnests_to_explicit(self, this: E) -> E: 2705 from sqlglot.optimizer.normalize_identifiers import ( 2706 normalize_identifiers as _norm, 2707 ) 2708 2709 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2710 for i, join in enumerate(this.args.get("joins") or []): 2711 table = join.this 2712 normalized_table = table.copy() 2713 normalized_table.meta["maybe_column"] = True 2714 normalized_table = _norm(normalized_table, dialect=self.dialect) 2715 2716 if isinstance(table, exp.Table) and not join.args.get("on"): 2717 if normalized_table.parts[0].name in refs: 2718 table_as_column = table.to_column() 2719 unnest = exp.Unnest(expressions=[table_as_column]) 2720 2721 # Table.to_column creates a parent Alias node that we want to convert to 2722 # a TableAlias and attach to the Unnest, so it matches the parser's output 2723 if isinstance(table.args.get("alias"), exp.TableAlias): 2724 table_as_column.replace(table_as_column.this) 2725 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2726 2727 table.replace(unnest) 2728 2729 refs.add(normalized_table.alias_or_name) 2730 2731 return this 2732 2733 def _parse_query_modifiers( 2734 self, this: t.Optional[exp.Expression] 2735 ) -> t.Optional[exp.Expression]: 2736 if isinstance(this, (exp.Query, exp.Table)): 2737 for join in self._parse_joins(): 2738 this.append("joins", join) 2739 for lateral in iter(self._parse_lateral, None): 2740 this.append("laterals", lateral) 2741 2742 while True: 2743 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2744 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2745 key, expression = parser(self) 2746 2747 if expression: 2748 this.set(key, expression) 2749 if key == "limit": 2750 offset = expression.args.pop("offset", None) 2751 2752 if offset: 2753 offset = exp.Offset(expression=offset) 2754 this.set("offset", offset) 2755 2756 limit_by_expressions = expression.expressions 2757 expression.set("expressions", None) 2758 offset.set("expressions", limit_by_expressions) 2759 continue 2760 break 2761 2762 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2763 this = self._implicit_unnests_to_explicit(this) 2764 2765 return this 2766 2767 def _parse_hint(self) -> t.Optional[exp.Hint]: 2768 if self._match(TokenType.HINT): 2769 hints = [] 2770 for hint in iter( 2771 lambda: self._parse_csv( 2772 lambda: self._parse_function() or self._parse_var(upper=True) 2773 ), 2774 [], 2775 ): 2776 hints.extend(hint) 2777 2778 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2779 self.raise_error("Expected */ after HINT") 2780 2781 return self.expression(exp.Hint, expressions=hints) 2782 2783 return None 2784 2785 def _parse_into(self) -> t.Optional[exp.Into]: 2786 if not self._match(TokenType.INTO): 2787 return None 2788 2789 temp = self._match(TokenType.TEMPORARY) 2790 unlogged = self._match_text_seq("UNLOGGED") 2791 self._match(TokenType.TABLE) 2792 2793 return self.expression( 2794 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2795 ) 2796 2797 def _parse_from( 2798 self, joins: bool = False, skip_from_token: bool = False 2799 ) -> t.Optional[exp.From]: 2800 if not skip_from_token and not self._match(TokenType.FROM): 2801 return None 2802 2803 return self.expression( 2804 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2805 ) 2806 2807 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2808 return self.expression( 2809 exp.MatchRecognizeMeasure, 2810 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2811 this=self._parse_expression(), 2812 ) 2813 2814 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2815 if not self._match(TokenType.MATCH_RECOGNIZE): 2816 return None 2817 2818 self._match_l_paren() 2819 2820 partition = self._parse_partition_by() 2821 order = self._parse_order() 2822 2823 measures = ( 2824 self._parse_csv(self._parse_match_recognize_measure) 2825 if self._match_text_seq("MEASURES") 2826 else None 2827 ) 2828 2829 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2830 rows = exp.var("ONE ROW PER MATCH") 2831 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2832 text = "ALL ROWS PER MATCH" 2833 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2834 text += " SHOW EMPTY MATCHES" 2835 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2836 text += " OMIT EMPTY MATCHES" 2837 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2838 text += " WITH UNMATCHED ROWS" 2839 rows = exp.var(text) 2840 else: 2841 rows = None 2842 2843 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2844 text = "AFTER MATCH SKIP" 2845 if self._match_text_seq("PAST", "LAST", "ROW"): 2846 text += " PAST LAST ROW" 2847 elif self._match_text_seq("TO", "NEXT", "ROW"): 2848 text += " TO NEXT ROW" 2849 elif self._match_text_seq("TO", "FIRST"): 2850 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2851 elif self._match_text_seq("TO", "LAST"): 2852 text += f" TO LAST {self._advance_any().text}" # type: ignore 2853 after = exp.var(text) 2854 else: 2855 after = None 2856 2857 if self._match_text_seq("PATTERN"): 2858 self._match_l_paren() 2859 2860 if not self._curr: 2861 self.raise_error("Expecting )", self._curr) 2862 2863 paren = 1 2864 start = self._curr 2865 2866 while self._curr and paren > 0: 2867 if self._curr.token_type == TokenType.L_PAREN: 2868 paren += 1 2869 if self._curr.token_type == TokenType.R_PAREN: 2870 paren -= 1 2871 2872 end = self._prev 2873 self._advance() 2874 2875 if paren > 0: 2876 self.raise_error("Expecting )", self._curr) 2877 2878 pattern = exp.var(self._find_sql(start, end)) 2879 else: 2880 pattern = None 2881 2882 define = ( 2883 self._parse_csv(self._parse_name_as_expression) 2884 if self._match_text_seq("DEFINE") 2885 else None 2886 ) 2887 2888 self._match_r_paren() 2889 2890 return self.expression( 2891 exp.MatchRecognize, 2892 partition_by=partition, 2893 order=order, 2894 measures=measures, 2895 rows=rows, 2896 after=after, 2897 pattern=pattern, 2898 define=define, 2899 alias=self._parse_table_alias(), 2900 ) 2901 2902 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2903 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2904 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2905 cross_apply = False 2906 2907 if cross_apply is not None: 2908 this = self._parse_select(table=True) 2909 view = None 2910 outer = None 2911 elif self._match(TokenType.LATERAL): 2912 this = self._parse_select(table=True) 2913 view = self._match(TokenType.VIEW) 2914 outer = self._match(TokenType.OUTER) 2915 else: 2916 return None 2917 2918 if not this: 2919 this = ( 2920 self._parse_unnest() 2921 or self._parse_function() 2922 or self._parse_id_var(any_token=False) 2923 ) 2924 2925 while self._match(TokenType.DOT): 2926 this = exp.Dot( 2927 this=this, 2928 expression=self._parse_function() or self._parse_id_var(any_token=False), 2929 ) 2930 2931 if view: 2932 table = self._parse_id_var(any_token=False) 2933 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2934 table_alias: t.Optional[exp.TableAlias] = self.expression( 2935 exp.TableAlias, this=table, columns=columns 2936 ) 2937 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2938 # We move the alias from the lateral's child node to the lateral itself 2939 table_alias = this.args["alias"].pop() 2940 else: 2941 table_alias = self._parse_table_alias() 2942 2943 return self.expression( 2944 exp.Lateral, 2945 this=this, 2946 view=view, 2947 outer=outer, 2948 alias=table_alias, 2949 cross_apply=cross_apply, 2950 ) 2951 2952 def _parse_join_parts( 2953 self, 2954 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2955 return ( 2956 self._match_set(self.JOIN_METHODS) and self._prev, 2957 self._match_set(self.JOIN_SIDES) and self._prev, 2958 self._match_set(self.JOIN_KINDS) and self._prev, 2959 ) 2960 2961 def _parse_join( 2962 self, skip_join_token: bool = False, parse_bracket: bool = False 2963 ) -> t.Optional[exp.Join]: 2964 if self._match(TokenType.COMMA): 2965 return self.expression(exp.Join, this=self._parse_table()) 2966 2967 index = self._index 2968 method, side, kind = self._parse_join_parts() 2969 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2970 join = self._match(TokenType.JOIN) 2971 2972 if not skip_join_token and not join: 2973 self._retreat(index) 2974 kind = None 2975 method = None 2976 side = None 2977 2978 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2979 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2980 2981 if not skip_join_token and not join and not outer_apply and not cross_apply: 2982 return None 2983 2984 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2985 2986 if method: 2987 kwargs["method"] = method.text 2988 if side: 2989 kwargs["side"] = side.text 2990 if kind: 2991 kwargs["kind"] = kind.text 2992 if hint: 2993 kwargs["hint"] = hint 2994 2995 if self._match(TokenType.MATCH_CONDITION): 2996 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 2997 2998 if self._match(TokenType.ON): 2999 kwargs["on"] = self._parse_conjunction() 3000 elif self._match(TokenType.USING): 3001 kwargs["using"] = self._parse_wrapped_id_vars() 3002 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3003 kind and kind.token_type == TokenType.CROSS 3004 ): 3005 index = self._index 3006 joins: t.Optional[list] = list(self._parse_joins()) 3007 3008 if joins and self._match(TokenType.ON): 3009 kwargs["on"] = self._parse_conjunction() 3010 elif joins and self._match(TokenType.USING): 3011 kwargs["using"] = self._parse_wrapped_id_vars() 3012 else: 3013 joins = None 3014 self._retreat(index) 3015 3016 kwargs["this"].set("joins", joins if joins else None) 3017 3018 comments = [c for token in (method, side, kind) if token for c in token.comments] 3019 return self.expression(exp.Join, comments=comments, **kwargs) 3020 3021 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3022 this = self._parse_conjunction() 3023 3024 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3025 return this 3026 3027 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3028 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3029 3030 return this 3031 3032 def _parse_index_params(self) -> exp.IndexParameters: 3033 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3034 3035 if self._match(TokenType.L_PAREN, advance=False): 3036 columns = self._parse_wrapped_csv(self._parse_with_operator) 3037 else: 3038 columns = None 3039 3040 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3041 partition_by = self._parse_partition_by() 3042 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3043 tablespace = ( 3044 self._parse_var(any_token=True) 3045 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3046 else None 3047 ) 3048 where = self._parse_where() 3049 3050 return self.expression( 3051 exp.IndexParameters, 3052 using=using, 3053 columns=columns, 3054 include=include, 3055 partition_by=partition_by, 3056 where=where, 3057 with_storage=with_storage, 3058 tablespace=tablespace, 3059 ) 3060 3061 def _parse_index( 3062 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3063 ) -> t.Optional[exp.Index]: 3064 if index or anonymous: 3065 unique = None 3066 primary = None 3067 amp = None 3068 3069 self._match(TokenType.ON) 3070 self._match(TokenType.TABLE) # hive 3071 table = self._parse_table_parts(schema=True) 3072 else: 3073 unique = self._match(TokenType.UNIQUE) 3074 primary = self._match_text_seq("PRIMARY") 3075 amp = self._match_text_seq("AMP") 3076 3077 if not self._match(TokenType.INDEX): 3078 return None 3079 3080 index = self._parse_id_var() 3081 table = None 3082 3083 params = self._parse_index_params() 3084 3085 return self.expression( 3086 exp.Index, 3087 this=index, 3088 table=table, 3089 unique=unique, 3090 primary=primary, 3091 amp=amp, 3092 params=params, 3093 ) 3094 3095 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3096 hints: t.List[exp.Expression] = [] 3097 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3098 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3099 hints.append( 3100 self.expression( 3101 exp.WithTableHint, 3102 expressions=self._parse_csv( 3103 lambda: self._parse_function() or self._parse_var(any_token=True) 3104 ), 3105 ) 3106 ) 3107 self._match_r_paren() 3108 else: 3109 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3110 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3111 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3112 3113 self._match_texts(("INDEX", "KEY")) 3114 if self._match(TokenType.FOR): 3115 hint.set("target", self._advance_any() and self._prev.text.upper()) 3116 3117 hint.set("expressions", self._parse_wrapped_id_vars()) 3118 hints.append(hint) 3119 3120 return hints or None 3121 3122 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3123 return ( 3124 (not schema and self._parse_function(optional_parens=False)) 3125 or self._parse_id_var(any_token=False) 3126 or self._parse_string_as_identifier() 3127 or self._parse_placeholder() 3128 ) 3129 3130 def _parse_table_parts( 3131 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3132 ) -> exp.Table: 3133 catalog = None 3134 db = None 3135 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3136 3137 while self._match(TokenType.DOT): 3138 if catalog: 3139 # This allows nesting the table in arbitrarily many dot expressions if needed 3140 table = self.expression( 3141 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3142 ) 3143 else: 3144 catalog = db 3145 db = table 3146 # "" used for tsql FROM a..b case 3147 table = self._parse_table_part(schema=schema) or "" 3148 3149 if ( 3150 wildcard 3151 and self._is_connected() 3152 and (isinstance(table, exp.Identifier) or not table) 3153 and self._match(TokenType.STAR) 3154 ): 3155 if isinstance(table, exp.Identifier): 3156 table.args["this"] += "*" 3157 else: 3158 table = exp.Identifier(this="*") 3159 3160 # We bubble up comments from the Identifier to the Table 3161 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3162 3163 if is_db_reference: 3164 catalog = db 3165 db = table 3166 table = None 3167 3168 if not table and not is_db_reference: 3169 self.raise_error(f"Expected table name but got {self._curr}") 3170 if not db and is_db_reference: 3171 self.raise_error(f"Expected database name but got {self._curr}") 3172 3173 return self.expression( 3174 exp.Table, 3175 comments=comments, 3176 this=table, 3177 db=db, 3178 catalog=catalog, 3179 pivots=self._parse_pivots(), 3180 ) 3181 3182 def _parse_table( 3183 self, 3184 schema: bool = False, 3185 joins: bool = False, 3186 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3187 parse_bracket: bool = False, 3188 is_db_reference: bool = False, 3189 parse_partition: bool = False, 3190 ) -> t.Optional[exp.Expression]: 3191 lateral = self._parse_lateral() 3192 if lateral: 3193 return lateral 3194 3195 unnest = self._parse_unnest() 3196 if unnest: 3197 return unnest 3198 3199 values = self._parse_derived_table_values() 3200 if values: 3201 return values 3202 3203 subquery = self._parse_select(table=True) 3204 if subquery: 3205 if not subquery.args.get("pivots"): 3206 subquery.set("pivots", self._parse_pivots()) 3207 return subquery 3208 3209 bracket = parse_bracket and self._parse_bracket(None) 3210 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3211 3212 only = self._match(TokenType.ONLY) 3213 3214 this = t.cast( 3215 exp.Expression, 3216 bracket 3217 or self._parse_bracket( 3218 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3219 ), 3220 ) 3221 3222 if only: 3223 this.set("only", only) 3224 3225 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3226 self._match_text_seq("*") 3227 3228 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3229 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3230 this.set("partition", self._parse_partition()) 3231 3232 if schema: 3233 return self._parse_schema(this=this) 3234 3235 version = self._parse_version() 3236 3237 if version: 3238 this.set("version", version) 3239 3240 if self.dialect.ALIAS_POST_TABLESAMPLE: 3241 table_sample = self._parse_table_sample() 3242 3243 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3244 if alias: 3245 this.set("alias", alias) 3246 3247 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3248 return self.expression( 3249 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3250 ) 3251 3252 this.set("hints", self._parse_table_hints()) 3253 3254 if not this.args.get("pivots"): 3255 this.set("pivots", self._parse_pivots()) 3256 3257 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3258 table_sample = self._parse_table_sample() 3259 3260 if table_sample: 3261 table_sample.set("this", this) 3262 this = table_sample 3263 3264 if joins: 3265 for join in self._parse_joins(): 3266 this.append("joins", join) 3267 3268 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3269 this.set("ordinality", True) 3270 this.set("alias", self._parse_table_alias()) 3271 3272 return this 3273 3274 def _parse_version(self) -> t.Optional[exp.Version]: 3275 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3276 this = "TIMESTAMP" 3277 elif self._match(TokenType.VERSION_SNAPSHOT): 3278 this = "VERSION" 3279 else: 3280 return None 3281 3282 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3283 kind = self._prev.text.upper() 3284 start = self._parse_bitwise() 3285 self._match_texts(("TO", "AND")) 3286 end = self._parse_bitwise() 3287 expression: t.Optional[exp.Expression] = self.expression( 3288 exp.Tuple, expressions=[start, end] 3289 ) 3290 elif self._match_text_seq("CONTAINED", "IN"): 3291 kind = "CONTAINED IN" 3292 expression = self.expression( 3293 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3294 ) 3295 elif self._match(TokenType.ALL): 3296 kind = "ALL" 3297 expression = None 3298 else: 3299 self._match_text_seq("AS", "OF") 3300 kind = "AS OF" 3301 expression = self._parse_type() 3302 3303 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3304 3305 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3306 if not self._match(TokenType.UNNEST): 3307 return None 3308 3309 expressions = self._parse_wrapped_csv(self._parse_equality) 3310 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3311 3312 alias = self._parse_table_alias() if with_alias else None 3313 3314 if alias: 3315 if self.dialect.UNNEST_COLUMN_ONLY: 3316 if alias.args.get("columns"): 3317 self.raise_error("Unexpected extra column alias in unnest.") 3318 3319 alias.set("columns", [alias.this]) 3320 alias.set("this", None) 3321 3322 columns = alias.args.get("columns") or [] 3323 if offset and len(expressions) < len(columns): 3324 offset = columns.pop() 3325 3326 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3327 self._match(TokenType.ALIAS) 3328 offset = self._parse_id_var( 3329 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3330 ) or exp.to_identifier("offset") 3331 3332 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3333 3334 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3335 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3336 if not is_derived and not self._match_text_seq("VALUES"): 3337 return None 3338 3339 expressions = self._parse_csv(self._parse_value) 3340 alias = self._parse_table_alias() 3341 3342 if is_derived: 3343 self._match_r_paren() 3344 3345 return self.expression( 3346 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3347 ) 3348 3349 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3350 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3351 as_modifier and self._match_text_seq("USING", "SAMPLE") 3352 ): 3353 return None 3354 3355 bucket_numerator = None 3356 bucket_denominator = None 3357 bucket_field = None 3358 percent = None 3359 size = None 3360 seed = None 3361 3362 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3363 matched_l_paren = self._match(TokenType.L_PAREN) 3364 3365 if self.TABLESAMPLE_CSV: 3366 num = None 3367 expressions = self._parse_csv(self._parse_primary) 3368 else: 3369 expressions = None 3370 num = ( 3371 self._parse_factor() 3372 if self._match(TokenType.NUMBER, advance=False) 3373 else self._parse_primary() or self._parse_placeholder() 3374 ) 3375 3376 if self._match_text_seq("BUCKET"): 3377 bucket_numerator = self._parse_number() 3378 self._match_text_seq("OUT", "OF") 3379 bucket_denominator = bucket_denominator = self._parse_number() 3380 self._match(TokenType.ON) 3381 bucket_field = self._parse_field() 3382 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3383 percent = num 3384 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3385 size = num 3386 else: 3387 percent = num 3388 3389 if matched_l_paren: 3390 self._match_r_paren() 3391 3392 if self._match(TokenType.L_PAREN): 3393 method = self._parse_var(upper=True) 3394 seed = self._match(TokenType.COMMA) and self._parse_number() 3395 self._match_r_paren() 3396 elif self._match_texts(("SEED", "REPEATABLE")): 3397 seed = self._parse_wrapped(self._parse_number) 3398 3399 if not method and self.DEFAULT_SAMPLING_METHOD: 3400 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3401 3402 return self.expression( 3403 exp.TableSample, 3404 expressions=expressions, 3405 method=method, 3406 bucket_numerator=bucket_numerator, 3407 bucket_denominator=bucket_denominator, 3408 bucket_field=bucket_field, 3409 percent=percent, 3410 size=size, 3411 seed=seed, 3412 ) 3413 3414 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3415 return list(iter(self._parse_pivot, None)) or None 3416 3417 def _parse_joins(self) -> t.Iterator[exp.Join]: 3418 return iter(self._parse_join, None) 3419 3420 # https://duckdb.org/docs/sql/statements/pivot 3421 def _parse_simplified_pivot(self) -> exp.Pivot: 3422 def _parse_on() -> t.Optional[exp.Expression]: 3423 this = self._parse_bitwise() 3424 return self._parse_in(this) if self._match(TokenType.IN) else this 3425 3426 this = self._parse_table() 3427 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3428 using = self._match(TokenType.USING) and self._parse_csv( 3429 lambda: self._parse_alias(self._parse_function()) 3430 ) 3431 group = self._parse_group() 3432 return self.expression( 3433 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3434 ) 3435 3436 def _parse_pivot_in(self) -> exp.In: 3437 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3438 this = self._parse_conjunction() 3439 3440 self._match(TokenType.ALIAS) 3441 alias = self._parse_field() 3442 if alias: 3443 return self.expression(exp.PivotAlias, this=this, alias=alias) 3444 3445 return this 3446 3447 value = self._parse_column() 3448 3449 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3450 self.raise_error("Expecting IN (") 3451 3452 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3453 3454 self._match_r_paren() 3455 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3456 3457 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3458 index = self._index 3459 include_nulls = None 3460 3461 if self._match(TokenType.PIVOT): 3462 unpivot = False 3463 elif self._match(TokenType.UNPIVOT): 3464 unpivot = True 3465 3466 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3467 if self._match_text_seq("INCLUDE", "NULLS"): 3468 include_nulls = True 3469 elif self._match_text_seq("EXCLUDE", "NULLS"): 3470 include_nulls = False 3471 else: 3472 return None 3473 3474 expressions = [] 3475 3476 if not self._match(TokenType.L_PAREN): 3477 self._retreat(index) 3478 return None 3479 3480 if unpivot: 3481 expressions = self._parse_csv(self._parse_column) 3482 else: 3483 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3484 3485 if not expressions: 3486 self.raise_error("Failed to parse PIVOT's aggregation list") 3487 3488 if not self._match(TokenType.FOR): 3489 self.raise_error("Expecting FOR") 3490 3491 field = self._parse_pivot_in() 3492 3493 self._match_r_paren() 3494 3495 pivot = self.expression( 3496 exp.Pivot, 3497 expressions=expressions, 3498 field=field, 3499 unpivot=unpivot, 3500 include_nulls=include_nulls, 3501 ) 3502 3503 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3504 pivot.set("alias", self._parse_table_alias()) 3505 3506 if not unpivot: 3507 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3508 3509 columns: t.List[exp.Expression] = [] 3510 for fld in pivot.args["field"].expressions: 3511 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3512 for name in names: 3513 if self.PREFIXED_PIVOT_COLUMNS: 3514 name = f"{name}_{field_name}" if name else field_name 3515 else: 3516 name = f"{field_name}_{name}" if name else field_name 3517 3518 columns.append(exp.to_identifier(name)) 3519 3520 pivot.set("columns", columns) 3521 3522 return pivot 3523 3524 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3525 return [agg.alias for agg in aggregations] 3526 3527 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3528 if not skip_where_token and not self._match(TokenType.PREWHERE): 3529 return None 3530 3531 return self.expression( 3532 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3533 ) 3534 3535 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3536 if not skip_where_token and not self._match(TokenType.WHERE): 3537 return None 3538 3539 return self.expression( 3540 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3541 ) 3542 3543 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3544 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3545 return None 3546 3547 elements: t.Dict[str, t.Any] = defaultdict(list) 3548 3549 if self._match(TokenType.ALL): 3550 elements["all"] = True 3551 elif self._match(TokenType.DISTINCT): 3552 elements["all"] = False 3553 3554 while True: 3555 expressions = self._parse_csv(self._parse_conjunction) 3556 if expressions: 3557 elements["expressions"].extend(expressions) 3558 3559 grouping_sets = self._parse_grouping_sets() 3560 if grouping_sets: 3561 elements["grouping_sets"].extend(grouping_sets) 3562 3563 rollup = None 3564 cube = None 3565 totals = None 3566 3567 index = self._index 3568 with_ = self._match(TokenType.WITH) 3569 if self._match(TokenType.ROLLUP): 3570 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3571 elements["rollup"].extend(ensure_list(rollup)) 3572 3573 if self._match(TokenType.CUBE): 3574 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3575 elements["cube"].extend(ensure_list(cube)) 3576 3577 if self._match_text_seq("TOTALS"): 3578 totals = True 3579 elements["totals"] = True # type: ignore 3580 3581 if not (grouping_sets or rollup or cube or totals): 3582 if with_: 3583 self._retreat(index) 3584 break 3585 3586 return self.expression(exp.Group, **elements) # type: ignore 3587 3588 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3589 if not self._match(TokenType.GROUPING_SETS): 3590 return None 3591 3592 return self._parse_wrapped_csv(self._parse_grouping_set) 3593 3594 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3595 if self._match(TokenType.L_PAREN): 3596 grouping_set = self._parse_csv(self._parse_column) 3597 self._match_r_paren() 3598 return self.expression(exp.Tuple, expressions=grouping_set) 3599 3600 return self._parse_column() 3601 3602 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3603 if not skip_having_token and not self._match(TokenType.HAVING): 3604 return None 3605 return self.expression(exp.Having, this=self._parse_conjunction()) 3606 3607 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3608 if not self._match(TokenType.QUALIFY): 3609 return None 3610 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3611 3612 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3613 if skip_start_token: 3614 start = None 3615 elif self._match(TokenType.START_WITH): 3616 start = self._parse_conjunction() 3617 else: 3618 return None 3619 3620 self._match(TokenType.CONNECT_BY) 3621 nocycle = self._match_text_seq("NOCYCLE") 3622 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3623 exp.Prior, this=self._parse_bitwise() 3624 ) 3625 connect = self._parse_conjunction() 3626 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3627 3628 if not start and self._match(TokenType.START_WITH): 3629 start = self._parse_conjunction() 3630 3631 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3632 3633 def _parse_name_as_expression(self) -> exp.Alias: 3634 return self.expression( 3635 exp.Alias, 3636 alias=self._parse_id_var(any_token=True), 3637 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3638 ) 3639 3640 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3641 if self._match_text_seq("INTERPOLATE"): 3642 return self._parse_wrapped_csv(self._parse_name_as_expression) 3643 return None 3644 3645 def _parse_order( 3646 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3647 ) -> t.Optional[exp.Expression]: 3648 siblings = None 3649 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3650 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3651 return this 3652 3653 siblings = True 3654 3655 return self.expression( 3656 exp.Order, 3657 this=this, 3658 expressions=self._parse_csv(self._parse_ordered), 3659 interpolate=self._parse_interpolate(), 3660 siblings=siblings, 3661 ) 3662 3663 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3664 if not self._match(token): 3665 return None 3666 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3667 3668 def _parse_ordered( 3669 self, parse_method: t.Optional[t.Callable] = None 3670 ) -> t.Optional[exp.Ordered]: 3671 this = parse_method() if parse_method else self._parse_conjunction() 3672 if not this: 3673 return None 3674 3675 asc = self._match(TokenType.ASC) 3676 desc = self._match(TokenType.DESC) or (asc and False) 3677 3678 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3679 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3680 3681 nulls_first = is_nulls_first or False 3682 explicitly_null_ordered = is_nulls_first or is_nulls_last 3683 3684 if ( 3685 not explicitly_null_ordered 3686 and ( 3687 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3688 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3689 ) 3690 and self.dialect.NULL_ORDERING != "nulls_are_last" 3691 ): 3692 nulls_first = True 3693 3694 if self._match_text_seq("WITH", "FILL"): 3695 with_fill = self.expression( 3696 exp.WithFill, 3697 **{ # type: ignore 3698 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3699 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3700 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3701 }, 3702 ) 3703 else: 3704 with_fill = None 3705 3706 return self.expression( 3707 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3708 ) 3709 3710 def _parse_limit( 3711 self, 3712 this: t.Optional[exp.Expression] = None, 3713 top: bool = False, 3714 skip_limit_token: bool = False, 3715 ) -> t.Optional[exp.Expression]: 3716 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3717 comments = self._prev_comments 3718 if top: 3719 limit_paren = self._match(TokenType.L_PAREN) 3720 expression = self._parse_term() if limit_paren else self._parse_number() 3721 3722 if limit_paren: 3723 self._match_r_paren() 3724 else: 3725 expression = self._parse_term() 3726 3727 if self._match(TokenType.COMMA): 3728 offset = expression 3729 expression = self._parse_term() 3730 else: 3731 offset = None 3732 3733 limit_exp = self.expression( 3734 exp.Limit, 3735 this=this, 3736 expression=expression, 3737 offset=offset, 3738 comments=comments, 3739 expressions=self._parse_limit_by(), 3740 ) 3741 3742 return limit_exp 3743 3744 if self._match(TokenType.FETCH): 3745 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3746 direction = self._prev.text.upper() if direction else "FIRST" 3747 3748 count = self._parse_field(tokens=self.FETCH_TOKENS) 3749 percent = self._match(TokenType.PERCENT) 3750 3751 self._match_set((TokenType.ROW, TokenType.ROWS)) 3752 3753 only = self._match_text_seq("ONLY") 3754 with_ties = self._match_text_seq("WITH", "TIES") 3755 3756 if only and with_ties: 3757 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3758 3759 return self.expression( 3760 exp.Fetch, 3761 direction=direction, 3762 count=count, 3763 percent=percent, 3764 with_ties=with_ties, 3765 ) 3766 3767 return this 3768 3769 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3770 if not self._match(TokenType.OFFSET): 3771 return this 3772 3773 count = self._parse_term() 3774 self._match_set((TokenType.ROW, TokenType.ROWS)) 3775 3776 return self.expression( 3777 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3778 ) 3779 3780 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3781 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3782 3783 def _parse_locks(self) -> t.List[exp.Lock]: 3784 locks = [] 3785 while True: 3786 if self._match_text_seq("FOR", "UPDATE"): 3787 update = True 3788 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3789 "LOCK", "IN", "SHARE", "MODE" 3790 ): 3791 update = False 3792 else: 3793 break 3794 3795 expressions = None 3796 if self._match_text_seq("OF"): 3797 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3798 3799 wait: t.Optional[bool | exp.Expression] = None 3800 if self._match_text_seq("NOWAIT"): 3801 wait = True 3802 elif self._match_text_seq("WAIT"): 3803 wait = self._parse_primary() 3804 elif self._match_text_seq("SKIP", "LOCKED"): 3805 wait = False 3806 3807 locks.append( 3808 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3809 ) 3810 3811 return locks 3812 3813 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3814 while this and self._match_set(self.SET_OPERATIONS): 3815 token_type = self._prev.token_type 3816 3817 if token_type == TokenType.UNION: 3818 operation = exp.Union 3819 elif token_type == TokenType.EXCEPT: 3820 operation = exp.Except 3821 else: 3822 operation = exp.Intersect 3823 3824 comments = self._prev.comments 3825 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3826 by_name = self._match_text_seq("BY", "NAME") 3827 expression = self._parse_select(nested=True, parse_set_operation=False) 3828 3829 this = self.expression( 3830 operation, 3831 comments=comments, 3832 this=this, 3833 distinct=distinct, 3834 by_name=by_name, 3835 expression=expression, 3836 ) 3837 3838 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3839 expression = this.expression 3840 3841 if expression: 3842 for arg in self.UNION_MODIFIERS: 3843 expr = expression.args.get(arg) 3844 if expr: 3845 this.set(arg, expr.pop()) 3846 3847 return this 3848 3849 def _parse_expression(self) -> t.Optional[exp.Expression]: 3850 return self._parse_alias(self._parse_conjunction()) 3851 3852 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3853 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3854 3855 def _parse_equality(self) -> t.Optional[exp.Expression]: 3856 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3857 3858 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3859 return self._parse_tokens(self._parse_range, self.COMPARISON) 3860 3861 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3862 this = this or self._parse_bitwise() 3863 negate = self._match(TokenType.NOT) 3864 3865 if self._match_set(self.RANGE_PARSERS): 3866 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3867 if not expression: 3868 return this 3869 3870 this = expression 3871 elif self._match(TokenType.ISNULL): 3872 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3873 3874 # Postgres supports ISNULL and NOTNULL for conditions. 3875 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3876 if self._match(TokenType.NOTNULL): 3877 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3878 this = self.expression(exp.Not, this=this) 3879 3880 if negate: 3881 this = self.expression(exp.Not, this=this) 3882 3883 if self._match(TokenType.IS): 3884 this = self._parse_is(this) 3885 3886 return this 3887 3888 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3889 index = self._index - 1 3890 negate = self._match(TokenType.NOT) 3891 3892 if self._match_text_seq("DISTINCT", "FROM"): 3893 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3894 return self.expression(klass, this=this, expression=self._parse_bitwise()) 3895 3896 expression = self._parse_null() or self._parse_boolean() 3897 if not expression: 3898 self._retreat(index) 3899 return None 3900 3901 this = self.expression(exp.Is, this=this, expression=expression) 3902 return self.expression(exp.Not, this=this) if negate else this 3903 3904 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3905 unnest = self._parse_unnest(with_alias=False) 3906 if unnest: 3907 this = self.expression(exp.In, this=this, unnest=unnest) 3908 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3909 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3910 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3911 3912 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 3913 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 3914 else: 3915 this = self.expression(exp.In, this=this, expressions=expressions) 3916 3917 if matched_l_paren: 3918 self._match_r_paren(this) 3919 elif not self._match(TokenType.R_BRACKET, expression=this): 3920 self.raise_error("Expecting ]") 3921 else: 3922 this = self.expression(exp.In, this=this, field=self._parse_field()) 3923 3924 return this 3925 3926 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3927 low = self._parse_bitwise() 3928 self._match(TokenType.AND) 3929 high = self._parse_bitwise() 3930 return self.expression(exp.Between, this=this, low=low, high=high) 3931 3932 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3933 if not self._match(TokenType.ESCAPE): 3934 return this 3935 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3936 3937 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3938 index = self._index 3939 3940 if not self._match(TokenType.INTERVAL) and match_interval: 3941 return None 3942 3943 if self._match(TokenType.STRING, advance=False): 3944 this = self._parse_primary() 3945 else: 3946 this = self._parse_term() 3947 3948 if not this or ( 3949 isinstance(this, exp.Column) 3950 and not this.table 3951 and not this.this.quoted 3952 and this.name.upper() == "IS" 3953 ): 3954 self._retreat(index) 3955 return None 3956 3957 unit = self._parse_function() or ( 3958 not self._match(TokenType.ALIAS, advance=False) 3959 and self._parse_var(any_token=True, upper=True) 3960 ) 3961 3962 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3963 # each INTERVAL expression into this canonical form so it's easy to transpile 3964 if this and this.is_number: 3965 this = exp.Literal.string(this.name) 3966 elif this and this.is_string: 3967 parts = this.name.split() 3968 3969 if len(parts) == 2: 3970 if unit: 3971 # This is not actually a unit, it's something else (e.g. a "window side") 3972 unit = None 3973 self._retreat(self._index - 1) 3974 3975 this = exp.Literal.string(parts[0]) 3976 unit = self.expression(exp.Var, this=parts[1].upper()) 3977 3978 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 3979 unit = self.expression( 3980 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 3981 ) 3982 3983 return self.expression(exp.Interval, this=this, unit=unit) 3984 3985 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3986 this = self._parse_term() 3987 3988 while True: 3989 if self._match_set(self.BITWISE): 3990 this = self.expression( 3991 self.BITWISE[self._prev.token_type], 3992 this=this, 3993 expression=self._parse_term(), 3994 ) 3995 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3996 this = self.expression( 3997 exp.DPipe, 3998 this=this, 3999 expression=self._parse_term(), 4000 safe=not self.dialect.STRICT_STRING_CONCAT, 4001 ) 4002 elif self._match(TokenType.DQMARK): 4003 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4004 elif self._match_pair(TokenType.LT, TokenType.LT): 4005 this = self.expression( 4006 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4007 ) 4008 elif self._match_pair(TokenType.GT, TokenType.GT): 4009 this = self.expression( 4010 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4011 ) 4012 else: 4013 break 4014 4015 return this 4016 4017 def _parse_term(self) -> t.Optional[exp.Expression]: 4018 return self._parse_tokens(self._parse_factor, self.TERM) 4019 4020 def _parse_factor(self) -> t.Optional[exp.Expression]: 4021 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4022 this = parse_method() 4023 4024 while self._match_set(self.FACTOR): 4025 this = self.expression( 4026 self.FACTOR[self._prev.token_type], 4027 this=this, 4028 comments=self._prev_comments, 4029 expression=parse_method(), 4030 ) 4031 if isinstance(this, exp.Div): 4032 this.args["typed"] = self.dialect.TYPED_DIVISION 4033 this.args["safe"] = self.dialect.SAFE_DIVISION 4034 4035 return this 4036 4037 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4038 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4039 4040 def _parse_unary(self) -> t.Optional[exp.Expression]: 4041 if self._match_set(self.UNARY_PARSERS): 4042 return self.UNARY_PARSERS[self._prev.token_type](self) 4043 return self._parse_at_time_zone(self._parse_type()) 4044 4045 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 4046 interval = parse_interval and self._parse_interval() 4047 if interval: 4048 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4049 while True: 4050 index = self._index 4051 self._match(TokenType.PLUS) 4052 4053 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4054 self._retreat(index) 4055 break 4056 4057 interval = self.expression( # type: ignore 4058 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4059 ) 4060 4061 return interval 4062 4063 index = self._index 4064 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4065 this = self._parse_column() 4066 4067 if data_type: 4068 if isinstance(this, exp.Literal): 4069 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4070 if parser: 4071 return parser(self, this, data_type) 4072 return self.expression(exp.Cast, this=this, to=data_type) 4073 if not data_type.expressions: 4074 self._retreat(index) 4075 return self._parse_column() 4076 return self._parse_column_ops(data_type) 4077 4078 return this and self._parse_column_ops(this) 4079 4080 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4081 this = self._parse_type() 4082 if not this: 4083 return None 4084 4085 if isinstance(this, exp.Column) and not this.table: 4086 this = exp.var(this.name.upper()) 4087 4088 return self.expression( 4089 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4090 ) 4091 4092 def _parse_types( 4093 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4094 ) -> t.Optional[exp.Expression]: 4095 index = self._index 4096 4097 prefix = self._match_text_seq("SYSUDTLIB", ".") 4098 4099 if not self._match_set(self.TYPE_TOKENS): 4100 identifier = allow_identifiers and self._parse_id_var( 4101 any_token=False, tokens=(TokenType.VAR,) 4102 ) 4103 if identifier: 4104 tokens = self.dialect.tokenize(identifier.name) 4105 4106 if len(tokens) != 1: 4107 self.raise_error("Unexpected identifier", self._prev) 4108 4109 if tokens[0].token_type in self.TYPE_TOKENS: 4110 self._prev = tokens[0] 4111 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4112 type_name = identifier.name 4113 4114 while self._match(TokenType.DOT): 4115 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4116 4117 return exp.DataType.build(type_name, udt=True) 4118 else: 4119 self._retreat(self._index - 1) 4120 return None 4121 else: 4122 return None 4123 4124 type_token = self._prev.token_type 4125 4126 if type_token == TokenType.PSEUDO_TYPE: 4127 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4128 4129 if type_token == TokenType.OBJECT_IDENTIFIER: 4130 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4131 4132 nested = type_token in self.NESTED_TYPE_TOKENS 4133 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4134 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4135 expressions = None 4136 maybe_func = False 4137 4138 if self._match(TokenType.L_PAREN): 4139 if is_struct: 4140 expressions = self._parse_csv(self._parse_struct_types) 4141 elif nested: 4142 expressions = self._parse_csv( 4143 lambda: self._parse_types( 4144 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4145 ) 4146 ) 4147 elif type_token in self.ENUM_TYPE_TOKENS: 4148 expressions = self._parse_csv(self._parse_equality) 4149 elif is_aggregate: 4150 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4151 any_token=False, tokens=(TokenType.VAR,) 4152 ) 4153 if not func_or_ident or not self._match(TokenType.COMMA): 4154 return None 4155 expressions = self._parse_csv( 4156 lambda: self._parse_types( 4157 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4158 ) 4159 ) 4160 expressions.insert(0, func_or_ident) 4161 else: 4162 expressions = self._parse_csv(self._parse_type_size) 4163 4164 if not expressions or not self._match(TokenType.R_PAREN): 4165 self._retreat(index) 4166 return None 4167 4168 maybe_func = True 4169 4170 this: t.Optional[exp.Expression] = None 4171 values: t.Optional[t.List[exp.Expression]] = None 4172 4173 if nested and self._match(TokenType.LT): 4174 if is_struct: 4175 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4176 else: 4177 expressions = self._parse_csv( 4178 lambda: self._parse_types( 4179 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4180 ) 4181 ) 4182 4183 if not self._match(TokenType.GT): 4184 self.raise_error("Expecting >") 4185 4186 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4187 values = self._parse_csv(self._parse_conjunction) 4188 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4189 4190 if type_token in self.TIMESTAMPS: 4191 if self._match_text_seq("WITH", "TIME", "ZONE"): 4192 maybe_func = False 4193 tz_type = ( 4194 exp.DataType.Type.TIMETZ 4195 if type_token in self.TIMES 4196 else exp.DataType.Type.TIMESTAMPTZ 4197 ) 4198 this = exp.DataType(this=tz_type, expressions=expressions) 4199 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4200 maybe_func = False 4201 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4202 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4203 maybe_func = False 4204 elif type_token == TokenType.INTERVAL: 4205 unit = self._parse_var(upper=True) 4206 if unit: 4207 if self._match_text_seq("TO"): 4208 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4209 4210 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4211 else: 4212 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4213 4214 if maybe_func and check_func: 4215 index2 = self._index 4216 peek = self._parse_string() 4217 4218 if not peek: 4219 self._retreat(index) 4220 return None 4221 4222 self._retreat(index2) 4223 4224 if not this: 4225 if self._match_text_seq("UNSIGNED"): 4226 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4227 if not unsigned_type_token: 4228 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4229 4230 type_token = unsigned_type_token or type_token 4231 4232 this = exp.DataType( 4233 this=exp.DataType.Type[type_token.value], 4234 expressions=expressions, 4235 nested=nested, 4236 values=values, 4237 prefix=prefix, 4238 ) 4239 4240 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 4241 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 4242 4243 return this 4244 4245 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4246 index = self._index 4247 this = self._parse_type(parse_interval=False) or self._parse_id_var() 4248 self._match(TokenType.COLON) 4249 column_def = self._parse_column_def(this) 4250 4251 if type_required and ( 4252 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 4253 ): 4254 self._retreat(index) 4255 return self._parse_types() 4256 4257 return column_def 4258 4259 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4260 if not self._match_text_seq("AT", "TIME", "ZONE"): 4261 return this 4262 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4263 4264 def _parse_column(self) -> t.Optional[exp.Expression]: 4265 this = self._parse_column_reference() 4266 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4267 4268 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4269 this = self._parse_field() 4270 if ( 4271 not this 4272 and self._match(TokenType.VALUES, advance=False) 4273 and self.VALUES_FOLLOWED_BY_PAREN 4274 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4275 ): 4276 this = self._parse_id_var() 4277 4278 if isinstance(this, exp.Identifier): 4279 # We bubble up comments from the Identifier to the Column 4280 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4281 4282 return this 4283 4284 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4285 this = self._parse_bracket(this) 4286 4287 while self._match_set(self.COLUMN_OPERATORS): 4288 op_token = self._prev.token_type 4289 op = self.COLUMN_OPERATORS.get(op_token) 4290 4291 if op_token == TokenType.DCOLON: 4292 field = self._parse_types() 4293 if not field: 4294 self.raise_error("Expected type") 4295 elif op and self._curr: 4296 field = self._parse_column_reference() 4297 else: 4298 field = self._parse_field(any_token=True, anonymous_func=True) 4299 4300 if isinstance(field, exp.Func) and this: 4301 # bigquery allows function calls like x.y.count(...) 4302 # SAFE.SUBSTR(...) 4303 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4304 this = exp.replace_tree( 4305 this, 4306 lambda n: ( 4307 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4308 if n.table 4309 else n.this 4310 ) 4311 if isinstance(n, exp.Column) 4312 else n, 4313 ) 4314 4315 if op: 4316 this = op(self, this, field) 4317 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4318 this = self.expression( 4319 exp.Column, 4320 this=field, 4321 table=this.this, 4322 db=this.args.get("table"), 4323 catalog=this.args.get("db"), 4324 ) 4325 else: 4326 this = self.expression(exp.Dot, this=this, expression=field) 4327 this = self._parse_bracket(this) 4328 return this 4329 4330 def _parse_primary(self) -> t.Optional[exp.Expression]: 4331 if self._match_set(self.PRIMARY_PARSERS): 4332 token_type = self._prev.token_type 4333 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4334 4335 if token_type == TokenType.STRING: 4336 expressions = [primary] 4337 while self._match(TokenType.STRING): 4338 expressions.append(exp.Literal.string(self._prev.text)) 4339 4340 if len(expressions) > 1: 4341 return self.expression(exp.Concat, expressions=expressions) 4342 4343 return primary 4344 4345 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4346 return exp.Literal.number(f"0.{self._prev.text}") 4347 4348 if self._match(TokenType.L_PAREN): 4349 comments = self._prev_comments 4350 query = self._parse_select() 4351 4352 if query: 4353 expressions = [query] 4354 else: 4355 expressions = self._parse_expressions() 4356 4357 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4358 4359 if not this and self._match(TokenType.R_PAREN, advance=False): 4360 this = self.expression(exp.Tuple) 4361 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4362 this = self._parse_subquery(this=this, parse_alias=False) 4363 elif isinstance(this, exp.Subquery): 4364 this = self._parse_subquery( 4365 this=self._parse_set_operations(this), parse_alias=False 4366 ) 4367 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4368 this = self.expression(exp.Tuple, expressions=expressions) 4369 else: 4370 this = self.expression(exp.Paren, this=this) 4371 4372 if this: 4373 this.add_comments(comments) 4374 4375 self._match_r_paren(expression=this) 4376 return this 4377 4378 return None 4379 4380 def _parse_field( 4381 self, 4382 any_token: bool = False, 4383 tokens: t.Optional[t.Collection[TokenType]] = None, 4384 anonymous_func: bool = False, 4385 ) -> t.Optional[exp.Expression]: 4386 if anonymous_func: 4387 field = ( 4388 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4389 or self._parse_primary() 4390 ) 4391 else: 4392 field = self._parse_primary() or self._parse_function( 4393 anonymous=anonymous_func, any_token=any_token 4394 ) 4395 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4396 4397 def _parse_function( 4398 self, 4399 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4400 anonymous: bool = False, 4401 optional_parens: bool = True, 4402 any_token: bool = False, 4403 ) -> t.Optional[exp.Expression]: 4404 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4405 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4406 fn_syntax = False 4407 if ( 4408 self._match(TokenType.L_BRACE, advance=False) 4409 and self._next 4410 and self._next.text.upper() == "FN" 4411 ): 4412 self._advance(2) 4413 fn_syntax = True 4414 4415 func = self._parse_function_call( 4416 functions=functions, 4417 anonymous=anonymous, 4418 optional_parens=optional_parens, 4419 any_token=any_token, 4420 ) 4421 4422 if fn_syntax: 4423 self._match(TokenType.R_BRACE) 4424 4425 return func 4426 4427 def _parse_function_call( 4428 self, 4429 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4430 anonymous: bool = False, 4431 optional_parens: bool = True, 4432 any_token: bool = False, 4433 ) -> t.Optional[exp.Expression]: 4434 if not self._curr: 4435 return None 4436 4437 comments = self._curr.comments 4438 token_type = self._curr.token_type 4439 this = self._curr.text 4440 upper = this.upper() 4441 4442 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4443 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4444 self._advance() 4445 return self._parse_window(parser(self)) 4446 4447 if not self._next or self._next.token_type != TokenType.L_PAREN: 4448 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4449 self._advance() 4450 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4451 4452 return None 4453 4454 if any_token: 4455 if token_type in self.RESERVED_TOKENS: 4456 return None 4457 elif token_type not in self.FUNC_TOKENS: 4458 return None 4459 4460 self._advance(2) 4461 4462 parser = self.FUNCTION_PARSERS.get(upper) 4463 if parser and not anonymous: 4464 this = parser(self) 4465 else: 4466 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4467 4468 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4469 this = self.expression(subquery_predicate, this=self._parse_select()) 4470 self._match_r_paren() 4471 return this 4472 4473 if functions is None: 4474 functions = self.FUNCTIONS 4475 4476 function = functions.get(upper) 4477 4478 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4479 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4480 4481 if alias: 4482 args = self._kv_to_prop_eq(args) 4483 4484 if function and not anonymous: 4485 if "dialect" in function.__code__.co_varnames: 4486 func = function(args, dialect=self.dialect) 4487 else: 4488 func = function(args) 4489 4490 func = self.validate_expression(func, args) 4491 if not self.dialect.NORMALIZE_FUNCTIONS: 4492 func.meta["name"] = this 4493 4494 this = func 4495 else: 4496 if token_type == TokenType.IDENTIFIER: 4497 this = exp.Identifier(this=this, quoted=True) 4498 this = self.expression(exp.Anonymous, this=this, expressions=args) 4499 4500 if isinstance(this, exp.Expression): 4501 this.add_comments(comments) 4502 4503 self._match_r_paren(this) 4504 return self._parse_window(this) 4505 4506 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4507 transformed = [] 4508 4509 for e in expressions: 4510 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4511 if isinstance(e, exp.Alias): 4512 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4513 4514 if not isinstance(e, exp.PropertyEQ): 4515 e = self.expression( 4516 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4517 ) 4518 4519 if isinstance(e.this, exp.Column): 4520 e.this.replace(e.this.this) 4521 4522 transformed.append(e) 4523 4524 return transformed 4525 4526 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4527 return self._parse_column_def(self._parse_id_var()) 4528 4529 def _parse_user_defined_function( 4530 self, kind: t.Optional[TokenType] = None 4531 ) -> t.Optional[exp.Expression]: 4532 this = self._parse_id_var() 4533 4534 while self._match(TokenType.DOT): 4535 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4536 4537 if not self._match(TokenType.L_PAREN): 4538 return this 4539 4540 expressions = self._parse_csv(self._parse_function_parameter) 4541 self._match_r_paren() 4542 return self.expression( 4543 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4544 ) 4545 4546 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4547 literal = self._parse_primary() 4548 if literal: 4549 return self.expression(exp.Introducer, this=token.text, expression=literal) 4550 4551 return self.expression(exp.Identifier, this=token.text) 4552 4553 def _parse_session_parameter(self) -> exp.SessionParameter: 4554 kind = None 4555 this = self._parse_id_var() or self._parse_primary() 4556 4557 if this and self._match(TokenType.DOT): 4558 kind = this.name 4559 this = self._parse_var() or self._parse_primary() 4560 4561 return self.expression(exp.SessionParameter, this=this, kind=kind) 4562 4563 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4564 index = self._index 4565 4566 if self._match(TokenType.L_PAREN): 4567 expressions = t.cast( 4568 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4569 ) 4570 4571 if not self._match(TokenType.R_PAREN): 4572 self._retreat(index) 4573 else: 4574 expressions = [self._parse_id_var()] 4575 4576 if self._match_set(self.LAMBDAS): 4577 return self.LAMBDAS[self._prev.token_type](self, expressions) 4578 4579 self._retreat(index) 4580 4581 this: t.Optional[exp.Expression] 4582 4583 if self._match(TokenType.DISTINCT): 4584 this = self.expression( 4585 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4586 ) 4587 else: 4588 this = self._parse_select_or_expression(alias=alias) 4589 4590 return self._parse_limit( 4591 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4592 ) 4593 4594 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4595 index = self._index 4596 if not self._match(TokenType.L_PAREN): 4597 return this 4598 4599 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4600 # expr can be of both types 4601 if self._match_set(self.SELECT_START_TOKENS): 4602 self._retreat(index) 4603 return this 4604 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4605 self._match_r_paren() 4606 return self.expression(exp.Schema, this=this, expressions=args) 4607 4608 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4609 return self._parse_column_def(self._parse_field(any_token=True)) 4610 4611 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4612 # column defs are not really columns, they're identifiers 4613 if isinstance(this, exp.Column): 4614 this = this.this 4615 4616 kind = self._parse_types(schema=True) 4617 4618 if self._match_text_seq("FOR", "ORDINALITY"): 4619 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4620 4621 constraints: t.List[exp.Expression] = [] 4622 4623 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4624 ("ALIAS", "MATERIALIZED") 4625 ): 4626 persisted = self._prev.text.upper() == "MATERIALIZED" 4627 constraints.append( 4628 self.expression( 4629 exp.ComputedColumnConstraint, 4630 this=self._parse_conjunction(), 4631 persisted=persisted or self._match_text_seq("PERSISTED"), 4632 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4633 ) 4634 ) 4635 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4636 self._match(TokenType.ALIAS) 4637 constraints.append( 4638 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4639 ) 4640 4641 while True: 4642 constraint = self._parse_column_constraint() 4643 if not constraint: 4644 break 4645 constraints.append(constraint) 4646 4647 if not kind and not constraints: 4648 return this 4649 4650 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4651 4652 def _parse_auto_increment( 4653 self, 4654 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4655 start = None 4656 increment = None 4657 4658 if self._match(TokenType.L_PAREN, advance=False): 4659 args = self._parse_wrapped_csv(self._parse_bitwise) 4660 start = seq_get(args, 0) 4661 increment = seq_get(args, 1) 4662 elif self._match_text_seq("START"): 4663 start = self._parse_bitwise() 4664 self._match_text_seq("INCREMENT") 4665 increment = self._parse_bitwise() 4666 4667 if start and increment: 4668 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4669 4670 return exp.AutoIncrementColumnConstraint() 4671 4672 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4673 if not self._match_text_seq("REFRESH"): 4674 self._retreat(self._index - 1) 4675 return None 4676 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4677 4678 def _parse_compress(self) -> exp.CompressColumnConstraint: 4679 if self._match(TokenType.L_PAREN, advance=False): 4680 return self.expression( 4681 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4682 ) 4683 4684 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4685 4686 def _parse_generated_as_identity( 4687 self, 4688 ) -> ( 4689 exp.GeneratedAsIdentityColumnConstraint 4690 | exp.ComputedColumnConstraint 4691 | exp.GeneratedAsRowColumnConstraint 4692 ): 4693 if self._match_text_seq("BY", "DEFAULT"): 4694 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4695 this = self.expression( 4696 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4697 ) 4698 else: 4699 self._match_text_seq("ALWAYS") 4700 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4701 4702 self._match(TokenType.ALIAS) 4703 4704 if self._match_text_seq("ROW"): 4705 start = self._match_text_seq("START") 4706 if not start: 4707 self._match(TokenType.END) 4708 hidden = self._match_text_seq("HIDDEN") 4709 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4710 4711 identity = self._match_text_seq("IDENTITY") 4712 4713 if self._match(TokenType.L_PAREN): 4714 if self._match(TokenType.START_WITH): 4715 this.set("start", self._parse_bitwise()) 4716 if self._match_text_seq("INCREMENT", "BY"): 4717 this.set("increment", self._parse_bitwise()) 4718 if self._match_text_seq("MINVALUE"): 4719 this.set("minvalue", self._parse_bitwise()) 4720 if self._match_text_seq("MAXVALUE"): 4721 this.set("maxvalue", self._parse_bitwise()) 4722 4723 if self._match_text_seq("CYCLE"): 4724 this.set("cycle", True) 4725 elif self._match_text_seq("NO", "CYCLE"): 4726 this.set("cycle", False) 4727 4728 if not identity: 4729 this.set("expression", self._parse_range()) 4730 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4731 args = self._parse_csv(self._parse_bitwise) 4732 this.set("start", seq_get(args, 0)) 4733 this.set("increment", seq_get(args, 1)) 4734 4735 self._match_r_paren() 4736 4737 return this 4738 4739 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4740 self._match_text_seq("LENGTH") 4741 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4742 4743 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4744 if self._match_text_seq("NULL"): 4745 return self.expression(exp.NotNullColumnConstraint) 4746 if self._match_text_seq("CASESPECIFIC"): 4747 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4748 if self._match_text_seq("FOR", "REPLICATION"): 4749 return self.expression(exp.NotForReplicationColumnConstraint) 4750 return None 4751 4752 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4753 if self._match(TokenType.CONSTRAINT): 4754 this = self._parse_id_var() 4755 else: 4756 this = None 4757 4758 if self._match_texts(self.CONSTRAINT_PARSERS): 4759 return self.expression( 4760 exp.ColumnConstraint, 4761 this=this, 4762 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4763 ) 4764 4765 return this 4766 4767 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4768 if not self._match(TokenType.CONSTRAINT): 4769 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4770 4771 return self.expression( 4772 exp.Constraint, 4773 this=self._parse_id_var(), 4774 expressions=self._parse_unnamed_constraints(), 4775 ) 4776 4777 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4778 constraints = [] 4779 while True: 4780 constraint = self._parse_unnamed_constraint() or self._parse_function() 4781 if not constraint: 4782 break 4783 constraints.append(constraint) 4784 4785 return constraints 4786 4787 def _parse_unnamed_constraint( 4788 self, constraints: t.Optional[t.Collection[str]] = None 4789 ) -> t.Optional[exp.Expression]: 4790 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4791 constraints or self.CONSTRAINT_PARSERS 4792 ): 4793 return None 4794 4795 constraint = self._prev.text.upper() 4796 if constraint not in self.CONSTRAINT_PARSERS: 4797 self.raise_error(f"No parser found for schema constraint {constraint}.") 4798 4799 return self.CONSTRAINT_PARSERS[constraint](self) 4800 4801 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4802 self._match_text_seq("KEY") 4803 return self.expression( 4804 exp.UniqueColumnConstraint, 4805 this=self._parse_schema(self._parse_id_var(any_token=False)), 4806 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4807 on_conflict=self._parse_on_conflict(), 4808 ) 4809 4810 def _parse_key_constraint_options(self) -> t.List[str]: 4811 options = [] 4812 while True: 4813 if not self._curr: 4814 break 4815 4816 if self._match(TokenType.ON): 4817 action = None 4818 on = self._advance_any() and self._prev.text 4819 4820 if self._match_text_seq("NO", "ACTION"): 4821 action = "NO ACTION" 4822 elif self._match_text_seq("CASCADE"): 4823 action = "CASCADE" 4824 elif self._match_text_seq("RESTRICT"): 4825 action = "RESTRICT" 4826 elif self._match_pair(TokenType.SET, TokenType.NULL): 4827 action = "SET NULL" 4828 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4829 action = "SET DEFAULT" 4830 else: 4831 self.raise_error("Invalid key constraint") 4832 4833 options.append(f"ON {on} {action}") 4834 elif self._match_text_seq("NOT", "ENFORCED"): 4835 options.append("NOT ENFORCED") 4836 elif self._match_text_seq("DEFERRABLE"): 4837 options.append("DEFERRABLE") 4838 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4839 options.append("INITIALLY DEFERRED") 4840 elif self._match_text_seq("NORELY"): 4841 options.append("NORELY") 4842 elif self._match_text_seq("MATCH", "FULL"): 4843 options.append("MATCH FULL") 4844 else: 4845 break 4846 4847 return options 4848 4849 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4850 if match and not self._match(TokenType.REFERENCES): 4851 return None 4852 4853 expressions = None 4854 this = self._parse_table(schema=True) 4855 options = self._parse_key_constraint_options() 4856 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4857 4858 def _parse_foreign_key(self) -> exp.ForeignKey: 4859 expressions = self._parse_wrapped_id_vars() 4860 reference = self._parse_references() 4861 options = {} 4862 4863 while self._match(TokenType.ON): 4864 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4865 self.raise_error("Expected DELETE or UPDATE") 4866 4867 kind = self._prev.text.lower() 4868 4869 if self._match_text_seq("NO", "ACTION"): 4870 action = "NO ACTION" 4871 elif self._match(TokenType.SET): 4872 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4873 action = "SET " + self._prev.text.upper() 4874 else: 4875 self._advance() 4876 action = self._prev.text.upper() 4877 4878 options[kind] = action 4879 4880 return self.expression( 4881 exp.ForeignKey, 4882 expressions=expressions, 4883 reference=reference, 4884 **options, # type: ignore 4885 ) 4886 4887 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4888 return self._parse_field() 4889 4890 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4891 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4892 self._retreat(self._index - 1) 4893 return None 4894 4895 id_vars = self._parse_wrapped_id_vars() 4896 return self.expression( 4897 exp.PeriodForSystemTimeConstraint, 4898 this=seq_get(id_vars, 0), 4899 expression=seq_get(id_vars, 1), 4900 ) 4901 4902 def _parse_primary_key( 4903 self, wrapped_optional: bool = False, in_props: bool = False 4904 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4905 desc = ( 4906 self._match_set((TokenType.ASC, TokenType.DESC)) 4907 and self._prev.token_type == TokenType.DESC 4908 ) 4909 4910 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4911 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4912 4913 expressions = self._parse_wrapped_csv( 4914 self._parse_primary_key_part, optional=wrapped_optional 4915 ) 4916 options = self._parse_key_constraint_options() 4917 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4918 4919 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4920 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4921 4922 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4923 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4924 return this 4925 4926 bracket_kind = self._prev.token_type 4927 expressions = self._parse_csv( 4928 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4929 ) 4930 4931 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 4932 self.raise_error("Expected ]") 4933 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 4934 self.raise_error("Expected }") 4935 4936 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4937 if bracket_kind == TokenType.L_BRACE: 4938 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 4939 elif not this or this.name.upper() == "ARRAY": 4940 this = self.expression(exp.Array, expressions=expressions) 4941 else: 4942 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4943 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4944 4945 self._add_comments(this) 4946 return self._parse_bracket(this) 4947 4948 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4949 if self._match(TokenType.COLON): 4950 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4951 return this 4952 4953 def _parse_case(self) -> t.Optional[exp.Expression]: 4954 ifs = [] 4955 default = None 4956 4957 comments = self._prev_comments 4958 expression = self._parse_conjunction() 4959 4960 while self._match(TokenType.WHEN): 4961 this = self._parse_conjunction() 4962 self._match(TokenType.THEN) 4963 then = self._parse_conjunction() 4964 ifs.append(self.expression(exp.If, this=this, true=then)) 4965 4966 if self._match(TokenType.ELSE): 4967 default = self._parse_conjunction() 4968 4969 if not self._match(TokenType.END): 4970 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4971 default = exp.column("interval") 4972 else: 4973 self.raise_error("Expected END after CASE", self._prev) 4974 4975 return self.expression( 4976 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 4977 ) 4978 4979 def _parse_if(self) -> t.Optional[exp.Expression]: 4980 if self._match(TokenType.L_PAREN): 4981 args = self._parse_csv(self._parse_conjunction) 4982 this = self.validate_expression(exp.If.from_arg_list(args), args) 4983 self._match_r_paren() 4984 else: 4985 index = self._index - 1 4986 4987 if self.NO_PAREN_IF_COMMANDS and index == 0: 4988 return self._parse_as_command(self._prev) 4989 4990 condition = self._parse_conjunction() 4991 4992 if not condition: 4993 self._retreat(index) 4994 return None 4995 4996 self._match(TokenType.THEN) 4997 true = self._parse_conjunction() 4998 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4999 self._match(TokenType.END) 5000 this = self.expression(exp.If, this=condition, true=true, false=false) 5001 5002 return this 5003 5004 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5005 if not self._match_text_seq("VALUE", "FOR"): 5006 self._retreat(self._index - 1) 5007 return None 5008 5009 return self.expression( 5010 exp.NextValueFor, 5011 this=self._parse_column(), 5012 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5013 ) 5014 5015 def _parse_extract(self) -> exp.Extract: 5016 this = self._parse_function() or self._parse_var() or self._parse_type() 5017 5018 if self._match(TokenType.FROM): 5019 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5020 5021 if not self._match(TokenType.COMMA): 5022 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5023 5024 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5025 5026 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5027 this = self._parse_conjunction() 5028 5029 if not self._match(TokenType.ALIAS): 5030 if self._match(TokenType.COMMA): 5031 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5032 5033 self.raise_error("Expected AS after CAST") 5034 5035 fmt = None 5036 to = self._parse_types() 5037 5038 if self._match(TokenType.FORMAT): 5039 fmt_string = self._parse_string() 5040 fmt = self._parse_at_time_zone(fmt_string) 5041 5042 if not to: 5043 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5044 if to.this in exp.DataType.TEMPORAL_TYPES: 5045 this = self.expression( 5046 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5047 this=this, 5048 format=exp.Literal.string( 5049 format_time( 5050 fmt_string.this if fmt_string else "", 5051 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5052 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5053 ) 5054 ), 5055 ) 5056 5057 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5058 this.set("zone", fmt.args["zone"]) 5059 return this 5060 elif not to: 5061 self.raise_error("Expected TYPE after CAST") 5062 elif isinstance(to, exp.Identifier): 5063 to = exp.DataType.build(to.name, udt=True) 5064 elif to.this == exp.DataType.Type.CHAR: 5065 if self._match(TokenType.CHARACTER_SET): 5066 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5067 5068 return self.expression( 5069 exp.Cast if strict else exp.TryCast, 5070 this=this, 5071 to=to, 5072 format=fmt, 5073 safe=safe, 5074 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5075 ) 5076 5077 def _parse_string_agg(self) -> exp.Expression: 5078 if self._match(TokenType.DISTINCT): 5079 args: t.List[t.Optional[exp.Expression]] = [ 5080 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 5081 ] 5082 if self._match(TokenType.COMMA): 5083 args.extend(self._parse_csv(self._parse_conjunction)) 5084 else: 5085 args = self._parse_csv(self._parse_conjunction) # type: ignore 5086 5087 index = self._index 5088 if not self._match(TokenType.R_PAREN) and args: 5089 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5090 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5091 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5092 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5093 5094 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5095 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5096 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5097 if not self._match_text_seq("WITHIN", "GROUP"): 5098 self._retreat(index) 5099 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5100 5101 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5102 order = self._parse_order(this=seq_get(args, 0)) 5103 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5104 5105 def _parse_convert( 5106 self, strict: bool, safe: t.Optional[bool] = None 5107 ) -> t.Optional[exp.Expression]: 5108 this = self._parse_bitwise() 5109 5110 if self._match(TokenType.USING): 5111 to: t.Optional[exp.Expression] = self.expression( 5112 exp.CharacterSet, this=self._parse_var() 5113 ) 5114 elif self._match(TokenType.COMMA): 5115 to = self._parse_types() 5116 else: 5117 to = None 5118 5119 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5120 5121 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5122 """ 5123 There are generally two variants of the DECODE function: 5124 5125 - DECODE(bin, charset) 5126 - DECODE(expression, search, result [, search, result] ... [, default]) 5127 5128 The second variant will always be parsed into a CASE expression. Note that NULL 5129 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5130 instead of relying on pattern matching. 5131 """ 5132 args = self._parse_csv(self._parse_conjunction) 5133 5134 if len(args) < 3: 5135 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5136 5137 expression, *expressions = args 5138 if not expression: 5139 return None 5140 5141 ifs = [] 5142 for search, result in zip(expressions[::2], expressions[1::2]): 5143 if not search or not result: 5144 return None 5145 5146 if isinstance(search, exp.Literal): 5147 ifs.append( 5148 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5149 ) 5150 elif isinstance(search, exp.Null): 5151 ifs.append( 5152 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5153 ) 5154 else: 5155 cond = exp.or_( 5156 exp.EQ(this=expression.copy(), expression=search), 5157 exp.and_( 5158 exp.Is(this=expression.copy(), expression=exp.Null()), 5159 exp.Is(this=search.copy(), expression=exp.Null()), 5160 copy=False, 5161 ), 5162 copy=False, 5163 ) 5164 ifs.append(exp.If(this=cond, true=result)) 5165 5166 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5167 5168 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5169 self._match_text_seq("KEY") 5170 key = self._parse_column() 5171 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5172 self._match_text_seq("VALUE") 5173 value = self._parse_bitwise() 5174 5175 if not key and not value: 5176 return None 5177 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5178 5179 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5180 if not this or not self._match_text_seq("FORMAT", "JSON"): 5181 return this 5182 5183 return self.expression(exp.FormatJson, this=this) 5184 5185 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5186 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5187 for value in values: 5188 if self._match_text_seq(value, "ON", on): 5189 return f"{value} ON {on}" 5190 5191 return None 5192 5193 @t.overload 5194 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5195 5196 @t.overload 5197 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5198 5199 def _parse_json_object(self, agg=False): 5200 star = self._parse_star() 5201 expressions = ( 5202 [star] 5203 if star 5204 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5205 ) 5206 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5207 5208 unique_keys = None 5209 if self._match_text_seq("WITH", "UNIQUE"): 5210 unique_keys = True 5211 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5212 unique_keys = False 5213 5214 self._match_text_seq("KEYS") 5215 5216 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5217 self._parse_type() 5218 ) 5219 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5220 5221 return self.expression( 5222 exp.JSONObjectAgg if agg else exp.JSONObject, 5223 expressions=expressions, 5224 null_handling=null_handling, 5225 unique_keys=unique_keys, 5226 return_type=return_type, 5227 encoding=encoding, 5228 ) 5229 5230 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5231 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5232 if not self._match_text_seq("NESTED"): 5233 this = self._parse_id_var() 5234 kind = self._parse_types(allow_identifiers=False) 5235 nested = None 5236 else: 5237 this = None 5238 kind = None 5239 nested = True 5240 5241 path = self._match_text_seq("PATH") and self._parse_string() 5242 nested_schema = nested and self._parse_json_schema() 5243 5244 return self.expression( 5245 exp.JSONColumnDef, 5246 this=this, 5247 kind=kind, 5248 path=path, 5249 nested_schema=nested_schema, 5250 ) 5251 5252 def _parse_json_schema(self) -> exp.JSONSchema: 5253 self._match_text_seq("COLUMNS") 5254 return self.expression( 5255 exp.JSONSchema, 5256 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5257 ) 5258 5259 def _parse_json_table(self) -> exp.JSONTable: 5260 this = self._parse_format_json(self._parse_bitwise()) 5261 path = self._match(TokenType.COMMA) and self._parse_string() 5262 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5263 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5264 schema = self._parse_json_schema() 5265 5266 return exp.JSONTable( 5267 this=this, 5268 schema=schema, 5269 path=path, 5270 error_handling=error_handling, 5271 empty_handling=empty_handling, 5272 ) 5273 5274 def _parse_match_against(self) -> exp.MatchAgainst: 5275 expressions = self._parse_csv(self._parse_column) 5276 5277 self._match_text_seq(")", "AGAINST", "(") 5278 5279 this = self._parse_string() 5280 5281 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5282 modifier = "IN NATURAL LANGUAGE MODE" 5283 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5284 modifier = f"{modifier} WITH QUERY EXPANSION" 5285 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5286 modifier = "IN BOOLEAN MODE" 5287 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5288 modifier = "WITH QUERY EXPANSION" 5289 else: 5290 modifier = None 5291 5292 return self.expression( 5293 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5294 ) 5295 5296 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5297 def _parse_open_json(self) -> exp.OpenJSON: 5298 this = self._parse_bitwise() 5299 path = self._match(TokenType.COMMA) and self._parse_string() 5300 5301 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5302 this = self._parse_field(any_token=True) 5303 kind = self._parse_types() 5304 path = self._parse_string() 5305 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5306 5307 return self.expression( 5308 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5309 ) 5310 5311 expressions = None 5312 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5313 self._match_l_paren() 5314 expressions = self._parse_csv(_parse_open_json_column_def) 5315 5316 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5317 5318 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5319 args = self._parse_csv(self._parse_bitwise) 5320 5321 if self._match(TokenType.IN): 5322 return self.expression( 5323 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5324 ) 5325 5326 if haystack_first: 5327 haystack = seq_get(args, 0) 5328 needle = seq_get(args, 1) 5329 else: 5330 needle = seq_get(args, 0) 5331 haystack = seq_get(args, 1) 5332 5333 return self.expression( 5334 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5335 ) 5336 5337 def _parse_predict(self) -> exp.Predict: 5338 self._match_text_seq("MODEL") 5339 this = self._parse_table() 5340 5341 self._match(TokenType.COMMA) 5342 self._match_text_seq("TABLE") 5343 5344 return self.expression( 5345 exp.Predict, 5346 this=this, 5347 expression=self._parse_table(), 5348 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5349 ) 5350 5351 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5352 args = self._parse_csv(self._parse_table) 5353 return exp.JoinHint(this=func_name.upper(), expressions=args) 5354 5355 def _parse_substring(self) -> exp.Substring: 5356 # Postgres supports the form: substring(string [from int] [for int]) 5357 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5358 5359 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5360 5361 if self._match(TokenType.FROM): 5362 args.append(self._parse_bitwise()) 5363 if self._match(TokenType.FOR): 5364 if len(args) == 1: 5365 args.append(exp.Literal.number(1)) 5366 args.append(self._parse_bitwise()) 5367 5368 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5369 5370 def _parse_trim(self) -> exp.Trim: 5371 # https://www.w3resource.com/sql/character-functions/trim.php 5372 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5373 5374 position = None 5375 collation = None 5376 expression = None 5377 5378 if self._match_texts(self.TRIM_TYPES): 5379 position = self._prev.text.upper() 5380 5381 this = self._parse_bitwise() 5382 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5383 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5384 expression = self._parse_bitwise() 5385 5386 if invert_order: 5387 this, expression = expression, this 5388 5389 if self._match(TokenType.COLLATE): 5390 collation = self._parse_bitwise() 5391 5392 return self.expression( 5393 exp.Trim, this=this, position=position, expression=expression, collation=collation 5394 ) 5395 5396 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5397 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5398 5399 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5400 return self._parse_window(self._parse_id_var(), alias=True) 5401 5402 def _parse_respect_or_ignore_nulls( 5403 self, this: t.Optional[exp.Expression] 5404 ) -> t.Optional[exp.Expression]: 5405 if self._match_text_seq("IGNORE", "NULLS"): 5406 return self.expression(exp.IgnoreNulls, this=this) 5407 if self._match_text_seq("RESPECT", "NULLS"): 5408 return self.expression(exp.RespectNulls, this=this) 5409 return this 5410 5411 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5412 if self._match(TokenType.HAVING): 5413 self._match_texts(("MAX", "MIN")) 5414 max = self._prev.text.upper() != "MIN" 5415 return self.expression( 5416 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5417 ) 5418 5419 return this 5420 5421 def _parse_window( 5422 self, this: t.Optional[exp.Expression], alias: bool = False 5423 ) -> t.Optional[exp.Expression]: 5424 func = this 5425 comments = func.comments if isinstance(func, exp.Expression) else None 5426 5427 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5428 self._match(TokenType.WHERE) 5429 this = self.expression( 5430 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5431 ) 5432 self._match_r_paren() 5433 5434 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5435 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5436 if self._match_text_seq("WITHIN", "GROUP"): 5437 order = self._parse_wrapped(self._parse_order) 5438 this = self.expression(exp.WithinGroup, this=this, expression=order) 5439 5440 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5441 # Some dialects choose to implement and some do not. 5442 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5443 5444 # There is some code above in _parse_lambda that handles 5445 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5446 5447 # The below changes handle 5448 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5449 5450 # Oracle allows both formats 5451 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5452 # and Snowflake chose to do the same for familiarity 5453 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5454 if isinstance(this, exp.AggFunc): 5455 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5456 5457 if ignore_respect and ignore_respect is not this: 5458 ignore_respect.replace(ignore_respect.this) 5459 this = self.expression(ignore_respect.__class__, this=this) 5460 5461 this = self._parse_respect_or_ignore_nulls(this) 5462 5463 # bigquery select from window x AS (partition by ...) 5464 if alias: 5465 over = None 5466 self._match(TokenType.ALIAS) 5467 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5468 return this 5469 else: 5470 over = self._prev.text.upper() 5471 5472 if comments and isinstance(func, exp.Expression): 5473 func.pop_comments() 5474 5475 if not self._match(TokenType.L_PAREN): 5476 return self.expression( 5477 exp.Window, 5478 comments=comments, 5479 this=this, 5480 alias=self._parse_id_var(False), 5481 over=over, 5482 ) 5483 5484 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5485 5486 first = self._match(TokenType.FIRST) 5487 if self._match_text_seq("LAST"): 5488 first = False 5489 5490 partition, order = self._parse_partition_and_order() 5491 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5492 5493 if kind: 5494 self._match(TokenType.BETWEEN) 5495 start = self._parse_window_spec() 5496 self._match(TokenType.AND) 5497 end = self._parse_window_spec() 5498 5499 spec = self.expression( 5500 exp.WindowSpec, 5501 kind=kind, 5502 start=start["value"], 5503 start_side=start["side"], 5504 end=end["value"], 5505 end_side=end["side"], 5506 ) 5507 else: 5508 spec = None 5509 5510 self._match_r_paren() 5511 5512 window = self.expression( 5513 exp.Window, 5514 comments=comments, 5515 this=this, 5516 partition_by=partition, 5517 order=order, 5518 spec=spec, 5519 alias=window_alias, 5520 over=over, 5521 first=first, 5522 ) 5523 5524 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5525 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5526 return self._parse_window(window, alias=alias) 5527 5528 return window 5529 5530 def _parse_partition_and_order( 5531 self, 5532 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5533 return self._parse_partition_by(), self._parse_order() 5534 5535 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5536 self._match(TokenType.BETWEEN) 5537 5538 return { 5539 "value": ( 5540 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5541 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5542 or self._parse_bitwise() 5543 ), 5544 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5545 } 5546 5547 def _parse_alias( 5548 self, this: t.Optional[exp.Expression], explicit: bool = False 5549 ) -> t.Optional[exp.Expression]: 5550 any_token = self._match(TokenType.ALIAS) 5551 comments = self._prev_comments or [] 5552 5553 if explicit and not any_token: 5554 return this 5555 5556 if self._match(TokenType.L_PAREN): 5557 aliases = self.expression( 5558 exp.Aliases, 5559 comments=comments, 5560 this=this, 5561 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5562 ) 5563 self._match_r_paren(aliases) 5564 return aliases 5565 5566 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5567 self.STRING_ALIASES and self._parse_string_as_identifier() 5568 ) 5569 5570 if alias: 5571 comments.extend(alias.pop_comments()) 5572 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5573 column = this.this 5574 5575 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5576 if not this.comments and column and column.comments: 5577 this.comments = column.pop_comments() 5578 5579 return this 5580 5581 def _parse_id_var( 5582 self, 5583 any_token: bool = True, 5584 tokens: t.Optional[t.Collection[TokenType]] = None, 5585 ) -> t.Optional[exp.Expression]: 5586 expression = self._parse_identifier() 5587 if not expression and ( 5588 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5589 ): 5590 quoted = self._prev.token_type == TokenType.STRING 5591 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5592 5593 return expression 5594 5595 def _parse_string(self) -> t.Optional[exp.Expression]: 5596 if self._match_set(self.STRING_PARSERS): 5597 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5598 return self._parse_placeholder() 5599 5600 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5601 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5602 5603 def _parse_number(self) -> t.Optional[exp.Expression]: 5604 if self._match_set(self.NUMERIC_PARSERS): 5605 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5606 return self._parse_placeholder() 5607 5608 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5609 if self._match(TokenType.IDENTIFIER): 5610 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5611 return self._parse_placeholder() 5612 5613 def _parse_var( 5614 self, 5615 any_token: bool = False, 5616 tokens: t.Optional[t.Collection[TokenType]] = None, 5617 upper: bool = False, 5618 ) -> t.Optional[exp.Expression]: 5619 if ( 5620 (any_token and self._advance_any()) 5621 or self._match(TokenType.VAR) 5622 or (self._match_set(tokens) if tokens else False) 5623 ): 5624 return self.expression( 5625 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5626 ) 5627 return self._parse_placeholder() 5628 5629 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5630 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5631 self._advance() 5632 return self._prev 5633 return None 5634 5635 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5636 return self._parse_var() or self._parse_string() 5637 5638 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5639 return self._parse_primary() or self._parse_var(any_token=True) 5640 5641 def _parse_null(self) -> t.Optional[exp.Expression]: 5642 if self._match_set(self.NULL_TOKENS): 5643 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5644 return self._parse_placeholder() 5645 5646 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5647 if self._match(TokenType.TRUE): 5648 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5649 if self._match(TokenType.FALSE): 5650 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5651 return self._parse_placeholder() 5652 5653 def _parse_star(self) -> t.Optional[exp.Expression]: 5654 if self._match(TokenType.STAR): 5655 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5656 return self._parse_placeholder() 5657 5658 def _parse_parameter(self) -> exp.Parameter: 5659 this = self._parse_identifier() or self._parse_primary_or_var() 5660 return self.expression(exp.Parameter, this=this) 5661 5662 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5663 if self._match_set(self.PLACEHOLDER_PARSERS): 5664 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5665 if placeholder: 5666 return placeholder 5667 self._advance(-1) 5668 return None 5669 5670 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5671 if not self._match(TokenType.EXCEPT): 5672 return None 5673 if self._match(TokenType.L_PAREN, advance=False): 5674 return self._parse_wrapped_csv(self._parse_column) 5675 5676 except_column = self._parse_column() 5677 return [except_column] if except_column else None 5678 5679 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5680 if not self._match(TokenType.REPLACE): 5681 return None 5682 if self._match(TokenType.L_PAREN, advance=False): 5683 return self._parse_wrapped_csv(self._parse_expression) 5684 5685 replace_expression = self._parse_expression() 5686 return [replace_expression] if replace_expression else None 5687 5688 def _parse_csv( 5689 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5690 ) -> t.List[exp.Expression]: 5691 parse_result = parse_method() 5692 items = [parse_result] if parse_result is not None else [] 5693 5694 while self._match(sep): 5695 self._add_comments(parse_result) 5696 parse_result = parse_method() 5697 if parse_result is not None: 5698 items.append(parse_result) 5699 5700 return items 5701 5702 def _parse_tokens( 5703 self, parse_method: t.Callable, expressions: t.Dict 5704 ) -> t.Optional[exp.Expression]: 5705 this = parse_method() 5706 5707 while self._match_set(expressions): 5708 this = self.expression( 5709 expressions[self._prev.token_type], 5710 this=this, 5711 comments=self._prev_comments, 5712 expression=parse_method(), 5713 ) 5714 5715 return this 5716 5717 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5718 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5719 5720 def _parse_wrapped_csv( 5721 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5722 ) -> t.List[exp.Expression]: 5723 return self._parse_wrapped( 5724 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5725 ) 5726 5727 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5728 wrapped = self._match(TokenType.L_PAREN) 5729 if not wrapped and not optional: 5730 self.raise_error("Expecting (") 5731 parse_result = parse_method() 5732 if wrapped: 5733 self._match_r_paren() 5734 return parse_result 5735 5736 def _parse_expressions(self) -> t.List[exp.Expression]: 5737 return self._parse_csv(self._parse_expression) 5738 5739 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5740 return self._parse_select() or self._parse_set_operations( 5741 self._parse_expression() if alias else self._parse_conjunction() 5742 ) 5743 5744 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5745 return self._parse_query_modifiers( 5746 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5747 ) 5748 5749 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5750 this = None 5751 if self._match_texts(self.TRANSACTION_KIND): 5752 this = self._prev.text 5753 5754 self._match_texts(("TRANSACTION", "WORK")) 5755 5756 modes = [] 5757 while True: 5758 mode = [] 5759 while self._match(TokenType.VAR): 5760 mode.append(self._prev.text) 5761 5762 if mode: 5763 modes.append(" ".join(mode)) 5764 if not self._match(TokenType.COMMA): 5765 break 5766 5767 return self.expression(exp.Transaction, this=this, modes=modes) 5768 5769 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5770 chain = None 5771 savepoint = None 5772 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5773 5774 self._match_texts(("TRANSACTION", "WORK")) 5775 5776 if self._match_text_seq("TO"): 5777 self._match_text_seq("SAVEPOINT") 5778 savepoint = self._parse_id_var() 5779 5780 if self._match(TokenType.AND): 5781 chain = not self._match_text_seq("NO") 5782 self._match_text_seq("CHAIN") 5783 5784 if is_rollback: 5785 return self.expression(exp.Rollback, savepoint=savepoint) 5786 5787 return self.expression(exp.Commit, chain=chain) 5788 5789 def _parse_refresh(self) -> exp.Refresh: 5790 self._match(TokenType.TABLE) 5791 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5792 5793 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5794 if not self._match_text_seq("ADD"): 5795 return None 5796 5797 self._match(TokenType.COLUMN) 5798 exists_column = self._parse_exists(not_=True) 5799 expression = self._parse_field_def() 5800 5801 if expression: 5802 expression.set("exists", exists_column) 5803 5804 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5805 if self._match_texts(("FIRST", "AFTER")): 5806 position = self._prev.text 5807 column_position = self.expression( 5808 exp.ColumnPosition, this=self._parse_column(), position=position 5809 ) 5810 expression.set("position", column_position) 5811 5812 return expression 5813 5814 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5815 drop = self._match(TokenType.DROP) and self._parse_drop() 5816 if drop and not isinstance(drop, exp.Command): 5817 drop.set("kind", drop.args.get("kind", "COLUMN")) 5818 return drop 5819 5820 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5821 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5822 return self.expression( 5823 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5824 ) 5825 5826 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5827 index = self._index - 1 5828 5829 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5830 return self._parse_csv( 5831 lambda: self.expression( 5832 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 5833 ) 5834 ) 5835 5836 self._retreat(index) 5837 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5838 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5839 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5840 5841 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 5842 if self._match_texts(self.ALTER_ALTER_PARSERS): 5843 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 5844 5845 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 5846 # keyword after ALTER we default to parsing this statement 5847 self._match(TokenType.COLUMN) 5848 column = self._parse_field(any_token=True) 5849 5850 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5851 return self.expression(exp.AlterColumn, this=column, drop=True) 5852 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5853 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5854 if self._match(TokenType.COMMENT): 5855 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5856 5857 self._match_text_seq("SET", "DATA") 5858 self._match_text_seq("TYPE") 5859 return self.expression( 5860 exp.AlterColumn, 5861 this=column, 5862 dtype=self._parse_types(), 5863 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5864 using=self._match(TokenType.USING) and self._parse_conjunction(), 5865 ) 5866 5867 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 5868 if self._match_texts(("ALL", "EVEN", "AUTO")): 5869 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 5870 5871 self._match_text_seq("KEY", "DISTKEY") 5872 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 5873 5874 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 5875 if compound: 5876 self._match_text_seq("SORTKEY") 5877 5878 if self._match(TokenType.L_PAREN, advance=False): 5879 return self.expression( 5880 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 5881 ) 5882 5883 self._match_texts(("AUTO", "NONE")) 5884 return self.expression( 5885 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 5886 ) 5887 5888 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5889 index = self._index - 1 5890 5891 partition_exists = self._parse_exists() 5892 if self._match(TokenType.PARTITION, advance=False): 5893 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5894 5895 self._retreat(index) 5896 return self._parse_csv(self._parse_drop_column) 5897 5898 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5899 if self._match(TokenType.COLUMN): 5900 exists = self._parse_exists() 5901 old_column = self._parse_column() 5902 to = self._match_text_seq("TO") 5903 new_column = self._parse_column() 5904 5905 if old_column is None or to is None or new_column is None: 5906 return None 5907 5908 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5909 5910 self._match_text_seq("TO") 5911 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5912 5913 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5914 start = self._prev 5915 5916 if not self._match(TokenType.TABLE): 5917 return self._parse_as_command(start) 5918 5919 exists = self._parse_exists() 5920 only = self._match_text_seq("ONLY") 5921 this = self._parse_table(schema=True) 5922 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 5923 5924 if self._next: 5925 self._advance() 5926 5927 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5928 if parser: 5929 actions = ensure_list(parser(self)) 5930 options = self._parse_csv(self._parse_property) 5931 5932 if not self._curr and actions: 5933 return self.expression( 5934 exp.AlterTable, 5935 this=this, 5936 exists=exists, 5937 actions=actions, 5938 only=only, 5939 options=options, 5940 cluster=cluster, 5941 ) 5942 5943 return self._parse_as_command(start) 5944 5945 def _parse_merge(self) -> exp.Merge: 5946 self._match(TokenType.INTO) 5947 target = self._parse_table() 5948 5949 if target and self._match(TokenType.ALIAS, advance=False): 5950 target.set("alias", self._parse_table_alias()) 5951 5952 self._match(TokenType.USING) 5953 using = self._parse_table() 5954 5955 self._match(TokenType.ON) 5956 on = self._parse_conjunction() 5957 5958 return self.expression( 5959 exp.Merge, 5960 this=target, 5961 using=using, 5962 on=on, 5963 expressions=self._parse_when_matched(), 5964 ) 5965 5966 def _parse_when_matched(self) -> t.List[exp.When]: 5967 whens = [] 5968 5969 while self._match(TokenType.WHEN): 5970 matched = not self._match(TokenType.NOT) 5971 self._match_text_seq("MATCHED") 5972 source = ( 5973 False 5974 if self._match_text_seq("BY", "TARGET") 5975 else self._match_text_seq("BY", "SOURCE") 5976 ) 5977 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5978 5979 self._match(TokenType.THEN) 5980 5981 if self._match(TokenType.INSERT): 5982 _this = self._parse_star() 5983 if _this: 5984 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5985 else: 5986 then = self.expression( 5987 exp.Insert, 5988 this=self._parse_value(), 5989 expression=self._match_text_seq("VALUES") and self._parse_value(), 5990 ) 5991 elif self._match(TokenType.UPDATE): 5992 expressions = self._parse_star() 5993 if expressions: 5994 then = self.expression(exp.Update, expressions=expressions) 5995 else: 5996 then = self.expression( 5997 exp.Update, 5998 expressions=self._match(TokenType.SET) 5999 and self._parse_csv(self._parse_equality), 6000 ) 6001 elif self._match(TokenType.DELETE): 6002 then = self.expression(exp.Var, this=self._prev.text) 6003 else: 6004 then = None 6005 6006 whens.append( 6007 self.expression( 6008 exp.When, 6009 matched=matched, 6010 source=source, 6011 condition=condition, 6012 then=then, 6013 ) 6014 ) 6015 return whens 6016 6017 def _parse_show(self) -> t.Optional[exp.Expression]: 6018 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6019 if parser: 6020 return parser(self) 6021 return self._parse_as_command(self._prev) 6022 6023 def _parse_set_item_assignment( 6024 self, kind: t.Optional[str] = None 6025 ) -> t.Optional[exp.Expression]: 6026 index = self._index 6027 6028 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6029 return self._parse_set_transaction(global_=kind == "GLOBAL") 6030 6031 left = self._parse_primary() or self._parse_column() 6032 assignment_delimiter = self._match_texts(("=", "TO")) 6033 6034 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6035 self._retreat(index) 6036 return None 6037 6038 right = self._parse_statement() or self._parse_id_var() 6039 this = self.expression(exp.EQ, this=left, expression=right) 6040 6041 return self.expression(exp.SetItem, this=this, kind=kind) 6042 6043 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6044 self._match_text_seq("TRANSACTION") 6045 characteristics = self._parse_csv( 6046 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6047 ) 6048 return self.expression( 6049 exp.SetItem, 6050 expressions=characteristics, 6051 kind="TRANSACTION", 6052 **{"global": global_}, # type: ignore 6053 ) 6054 6055 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6056 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6057 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6058 6059 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6060 index = self._index 6061 set_ = self.expression( 6062 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6063 ) 6064 6065 if self._curr: 6066 self._retreat(index) 6067 return self._parse_as_command(self._prev) 6068 6069 return set_ 6070 6071 def _parse_var_from_options( 6072 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6073 ) -> t.Optional[exp.Var]: 6074 start = self._curr 6075 if not start: 6076 return None 6077 6078 option = start.text.upper() 6079 continuations = options.get(option) 6080 6081 index = self._index 6082 self._advance() 6083 for keywords in continuations or []: 6084 if isinstance(keywords, str): 6085 keywords = (keywords,) 6086 6087 if self._match_text_seq(*keywords): 6088 option = f"{option} {' '.join(keywords)}" 6089 break 6090 else: 6091 if continuations or continuations is None: 6092 if raise_unmatched: 6093 self.raise_error(f"Unknown option {option}") 6094 6095 self._retreat(index) 6096 return None 6097 6098 return exp.var(option) 6099 6100 def _parse_as_command(self, start: Token) -> exp.Command: 6101 while self._curr: 6102 self._advance() 6103 text = self._find_sql(start, self._prev) 6104 size = len(start.text) 6105 self._warn_unsupported() 6106 return exp.Command(this=text[:size], expression=text[size:]) 6107 6108 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6109 settings = [] 6110 6111 self._match_l_paren() 6112 kind = self._parse_id_var() 6113 6114 if self._match(TokenType.L_PAREN): 6115 while True: 6116 key = self._parse_id_var() 6117 value = self._parse_primary() 6118 6119 if not key and value is None: 6120 break 6121 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6122 self._match(TokenType.R_PAREN) 6123 6124 self._match_r_paren() 6125 6126 return self.expression( 6127 exp.DictProperty, 6128 this=this, 6129 kind=kind.this if kind else None, 6130 settings=settings, 6131 ) 6132 6133 def _parse_dict_range(self, this: str) -> exp.DictRange: 6134 self._match_l_paren() 6135 has_min = self._match_text_seq("MIN") 6136 if has_min: 6137 min = self._parse_var() or self._parse_primary() 6138 self._match_text_seq("MAX") 6139 max = self._parse_var() or self._parse_primary() 6140 else: 6141 max = self._parse_var() or self._parse_primary() 6142 min = exp.Literal.number(0) 6143 self._match_r_paren() 6144 return self.expression(exp.DictRange, this=this, min=min, max=max) 6145 6146 def _parse_comprehension( 6147 self, this: t.Optional[exp.Expression] 6148 ) -> t.Optional[exp.Comprehension]: 6149 index = self._index 6150 expression = self._parse_column() 6151 if not self._match(TokenType.IN): 6152 self._retreat(index - 1) 6153 return None 6154 iterator = self._parse_column() 6155 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 6156 return self.expression( 6157 exp.Comprehension, 6158 this=this, 6159 expression=expression, 6160 iterator=iterator, 6161 condition=condition, 6162 ) 6163 6164 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6165 if self._match(TokenType.HEREDOC_STRING): 6166 return self.expression(exp.Heredoc, this=self._prev.text) 6167 6168 if not self._match_text_seq("$"): 6169 return None 6170 6171 tags = ["$"] 6172 tag_text = None 6173 6174 if self._is_connected(): 6175 self._advance() 6176 tags.append(self._prev.text.upper()) 6177 else: 6178 self.raise_error("No closing $ found") 6179 6180 if tags[-1] != "$": 6181 if self._is_connected() and self._match_text_seq("$"): 6182 tag_text = tags[-1] 6183 tags.append("$") 6184 else: 6185 self.raise_error("No closing $ found") 6186 6187 heredoc_start = self._curr 6188 6189 while self._curr: 6190 if self._match_text_seq(*tags, advance=False): 6191 this = self._find_sql(heredoc_start, self._prev) 6192 self._advance(len(tags)) 6193 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6194 6195 self._advance() 6196 6197 self.raise_error(f"No closing {''.join(tags)} found") 6198 return None 6199 6200 def _find_parser( 6201 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6202 ) -> t.Optional[t.Callable]: 6203 if not self._curr: 6204 return None 6205 6206 index = self._index 6207 this = [] 6208 while True: 6209 # The current token might be multiple words 6210 curr = self._curr.text.upper() 6211 key = curr.split(" ") 6212 this.append(curr) 6213 6214 self._advance() 6215 result, trie = in_trie(trie, key) 6216 if result == TrieResult.FAILED: 6217 break 6218 6219 if result == TrieResult.EXISTS: 6220 subparser = parsers[" ".join(this)] 6221 return subparser 6222 6223 self._retreat(index) 6224 return None 6225 6226 def _match(self, token_type, advance=True, expression=None): 6227 if not self._curr: 6228 return None 6229 6230 if self._curr.token_type == token_type: 6231 if advance: 6232 self._advance() 6233 self._add_comments(expression) 6234 return True 6235 6236 return None 6237 6238 def _match_set(self, types, advance=True): 6239 if not self._curr: 6240 return None 6241 6242 if self._curr.token_type in types: 6243 if advance: 6244 self._advance() 6245 return True 6246 6247 return None 6248 6249 def _match_pair(self, token_type_a, token_type_b, advance=True): 6250 if not self._curr or not self._next: 6251 return None 6252 6253 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6254 if advance: 6255 self._advance(2) 6256 return True 6257 6258 return None 6259 6260 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6261 if not self._match(TokenType.L_PAREN, expression=expression): 6262 self.raise_error("Expecting (") 6263 6264 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6265 if not self._match(TokenType.R_PAREN, expression=expression): 6266 self.raise_error("Expecting )") 6267 6268 def _match_texts(self, texts, advance=True): 6269 if self._curr and self._curr.text.upper() in texts: 6270 if advance: 6271 self._advance() 6272 return True 6273 return None 6274 6275 def _match_text_seq(self, *texts, advance=True): 6276 index = self._index 6277 for text in texts: 6278 if self._curr and self._curr.text.upper() == text: 6279 self._advance() 6280 else: 6281 self._retreat(index) 6282 return None 6283 6284 if not advance: 6285 self._retreat(index) 6286 6287 return True 6288 6289 def _replace_lambda( 6290 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 6291 ) -> t.Optional[exp.Expression]: 6292 if not node: 6293 return node 6294 6295 for column in node.find_all(exp.Column): 6296 if column.parts[0].name in lambda_variables: 6297 dot_or_id = column.to_dot() if column.table else column.this 6298 parent = column.parent 6299 6300 while isinstance(parent, exp.Dot): 6301 if not isinstance(parent.parent, exp.Dot): 6302 parent.replace(dot_or_id) 6303 break 6304 parent = parent.parent 6305 else: 6306 if column is node: 6307 node = dot_or_id 6308 else: 6309 column.replace(dot_or_id) 6310 return node 6311 6312 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6313 start = self._prev 6314 6315 # Not to be confused with TRUNCATE(number, decimals) function call 6316 if self._match(TokenType.L_PAREN): 6317 self._retreat(self._index - 2) 6318 return self._parse_function() 6319 6320 # Clickhouse supports TRUNCATE DATABASE as well 6321 is_database = self._match(TokenType.DATABASE) 6322 6323 self._match(TokenType.TABLE) 6324 6325 exists = self._parse_exists(not_=False) 6326 6327 expressions = self._parse_csv( 6328 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6329 ) 6330 6331 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6332 6333 if self._match_text_seq("RESTART", "IDENTITY"): 6334 identity = "RESTART" 6335 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6336 identity = "CONTINUE" 6337 else: 6338 identity = None 6339 6340 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6341 option = self._prev.text 6342 else: 6343 option = None 6344 6345 partition = self._parse_partition() 6346 6347 # Fallback case 6348 if self._curr: 6349 return self._parse_as_command(start) 6350 6351 return self.expression( 6352 exp.TruncateTable, 6353 expressions=expressions, 6354 is_database=is_database, 6355 exists=exists, 6356 cluster=cluster, 6357 identity=identity, 6358 option=option, 6359 partition=partition, 6360 ) 6361 6362 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6363 this = self._parse_ordered(self._parse_opclass) 6364 6365 if not self._match(TokenType.WITH): 6366 return this 6367 6368 op = self._parse_var(any_token=True) 6369 6370 return self.expression(exp.WithOperator, this=this, op=op) 6371 6372 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6373 opts = [] 6374 self._match(TokenType.EQ) 6375 self._match(TokenType.L_PAREN) 6376 while self._curr and not self._match(TokenType.R_PAREN): 6377 opts.append(self._parse_conjunction()) 6378 self._match(TokenType.COMMA) 6379 return opts 6380 6381 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6382 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6383 6384 options = [] 6385 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6386 option = self._parse_unquoted_field() 6387 value = None 6388 6389 # Some options are defined as functions with the values as params 6390 if not isinstance(option, exp.Func): 6391 prev = self._prev.text.upper() 6392 # Different dialects might separate options and values by white space, "=" and "AS" 6393 self._match(TokenType.EQ) 6394 self._match(TokenType.ALIAS) 6395 6396 if prev == "FILE_FORMAT" and self._match(TokenType.L_PAREN): 6397 # Snowflake FILE_FORMAT case 6398 value = self._parse_wrapped_options() 6399 else: 6400 value = self._parse_unquoted_field() 6401 6402 param = self.expression(exp.CopyParameter, this=option, expression=value) 6403 options.append(param) 6404 6405 if sep: 6406 self._match(sep) 6407 6408 return options 6409 6410 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6411 expr = self.expression(exp.Credentials) 6412 6413 if self._match_text_seq("STORAGE_INTEGRATION", advance=False): 6414 expr.set("storage", self._parse_conjunction()) 6415 if self._match_text_seq("CREDENTIALS"): 6416 # Snowflake supports CREDENTIALS = (...), while Redshift CREDENTIALS <string> 6417 creds = ( 6418 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6419 ) 6420 expr.set("credentials", creds) 6421 if self._match_text_seq("ENCRYPTION"): 6422 expr.set("encryption", self._parse_wrapped_options()) 6423 if self._match_text_seq("IAM_ROLE"): 6424 expr.set("iam_role", self._parse_field()) 6425 if self._match_text_seq("REGION"): 6426 expr.set("region", self._parse_field()) 6427 6428 return expr 6429 6430 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6431 return self._parse_field() 6432 6433 def _parse_copy(self) -> exp.Copy | exp.Command: 6434 start = self._prev 6435 6436 self._match(TokenType.INTO) 6437 6438 this = ( 6439 self._parse_conjunction() 6440 if self._match(TokenType.L_PAREN, advance=False) 6441 else self._parse_table(schema=True) 6442 ) 6443 6444 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6445 6446 files = self._parse_csv(self._parse_file_location) 6447 credentials = self._parse_credentials() 6448 6449 self._match_text_seq("WITH") 6450 6451 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6452 6453 # Fallback case 6454 if self._curr: 6455 return self._parse_as_command(start) 6456 6457 return self.expression( 6458 exp.Copy, 6459 this=this, 6460 kind=kind, 6461 credentials=credentials, 6462 files=files, 6463 params=params, 6464 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1161 def __init__( 1162 self, 1163 error_level: t.Optional[ErrorLevel] = None, 1164 error_message_context: int = 100, 1165 max_errors: int = 3, 1166 dialect: DialectType = None, 1167 ): 1168 from sqlglot.dialects import Dialect 1169 1170 self.error_level = error_level or ErrorLevel.IMMEDIATE 1171 self.error_message_context = error_message_context 1172 self.max_errors = max_errors 1173 self.dialect = Dialect.get_or_raise(dialect) 1174 self.reset()
1186 def parse( 1187 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1188 ) -> t.List[t.Optional[exp.Expression]]: 1189 """ 1190 Parses a list of tokens and returns a list of syntax trees, one tree 1191 per parsed SQL statement. 1192 1193 Args: 1194 raw_tokens: The list of tokens. 1195 sql: The original SQL string, used to produce helpful debug messages. 1196 1197 Returns: 1198 The list of the produced syntax trees. 1199 """ 1200 return self._parse( 1201 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1202 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1204 def parse_into( 1205 self, 1206 expression_types: exp.IntoType, 1207 raw_tokens: t.List[Token], 1208 sql: t.Optional[str] = None, 1209 ) -> t.List[t.Optional[exp.Expression]]: 1210 """ 1211 Parses a list of tokens into a given Expression type. If a collection of Expression 1212 types is given instead, this method will try to parse the token list into each one 1213 of them, stopping at the first for which the parsing succeeds. 1214 1215 Args: 1216 expression_types: The expression type(s) to try and parse the token list into. 1217 raw_tokens: The list of tokens. 1218 sql: The original SQL string, used to produce helpful debug messages. 1219 1220 Returns: 1221 The target Expression. 1222 """ 1223 errors = [] 1224 for expression_type in ensure_list(expression_types): 1225 parser = self.EXPRESSION_PARSERS.get(expression_type) 1226 if not parser: 1227 raise TypeError(f"No parser registered for {expression_type}") 1228 1229 try: 1230 return self._parse(parser, raw_tokens, sql) 1231 except ParseError as e: 1232 e.errors[0]["into_expression"] = expression_type 1233 errors.append(e) 1234 1235 raise ParseError( 1236 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1237 errors=merge_errors(errors), 1238 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1278 def check_errors(self) -> None: 1279 """Logs or raises any found errors, depending on the chosen error level setting.""" 1280 if self.error_level == ErrorLevel.WARN: 1281 for error in self.errors: 1282 logger.error(str(error)) 1283 elif self.error_level == ErrorLevel.RAISE and self.errors: 1284 raise ParseError( 1285 concat_messages(self.errors, self.max_errors), 1286 errors=merge_errors(self.errors), 1287 )
Logs or raises any found errors, depending on the chosen error level setting.
1289 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1290 """ 1291 Appends an error in the list of recorded errors or raises it, depending on the chosen 1292 error level setting. 1293 """ 1294 token = token or self._curr or self._prev or Token.string("") 1295 start = token.start 1296 end = token.end + 1 1297 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1298 highlight = self.sql[start:end] 1299 end_context = self.sql[end : end + self.error_message_context] 1300 1301 error = ParseError.new( 1302 f"{message}. Line {token.line}, Col: {token.col}.\n" 1303 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1304 description=message, 1305 line=token.line, 1306 col=token.col, 1307 start_context=start_context, 1308 highlight=highlight, 1309 end_context=end_context, 1310 ) 1311 1312 if self.error_level == ErrorLevel.IMMEDIATE: 1313 raise error 1314 1315 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1317 def expression( 1318 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1319 ) -> E: 1320 """ 1321 Creates a new, validated Expression. 1322 1323 Args: 1324 exp_class: The expression class to instantiate. 1325 comments: An optional list of comments to attach to the expression. 1326 kwargs: The arguments to set for the expression along with their respective values. 1327 1328 Returns: 1329 The target expression. 1330 """ 1331 instance = exp_class(**kwargs) 1332 instance.add_comments(comments) if comments else self._add_comments(instance) 1333 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1340 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1341 """ 1342 Validates an Expression, making sure that all its mandatory arguments are set. 1343 1344 Args: 1345 expression: The expression to validate. 1346 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1347 1348 Returns: 1349 The validated expression. 1350 """ 1351 if self.error_level != ErrorLevel.IGNORE: 1352 for error_message in expression.error_messages(args): 1353 self.raise_error(error_message) 1354 1355 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.