sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 date_delta_sql, 11 date_trunc_to_time, 12 datestrtodate_sql, 13 build_formatted_time, 14 if_sql, 15 inline_array_sql, 16 max_or_greatest, 17 min_or_least, 18 rename_func, 19 timestamptrunc_sql, 20 timestrtotime_sql, 21 var_map_sql, 22) 23from sqlglot.helper import flatten, is_float, is_int, seq_get 24from sqlglot.tokens import TokenType 25 26if t.TYPE_CHECKING: 27 from sqlglot._typing import E 28 29 30# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 31def _build_datetime( 32 name: str, kind: exp.DataType.Type, safe: bool = False 33) -> t.Callable[[t.List], exp.Func]: 34 def _builder(args: t.List) -> exp.Func: 35 value = seq_get(args, 0) 36 int_value = value is not None and is_int(value.name) 37 38 if isinstance(value, exp.Literal): 39 # Converts calls like `TO_TIME('01:02:03')` into casts 40 if len(args) == 1 and value.is_string and not int_value: 41 return exp.cast(value, kind) 42 43 # Handles `TO_TIMESTAMP(str, fmt)` and `TO_TIMESTAMP(num, scale)` as special 44 # cases so we can transpile them, since they're relatively common 45 if kind == exp.DataType.Type.TIMESTAMP: 46 if int_value: 47 return exp.UnixToTime(this=value, scale=seq_get(args, 1)) 48 if not is_float(value.this): 49 return build_formatted_time(exp.StrToTime, "snowflake")(args) 50 51 if kind == exp.DataType.Type.DATE and not int_value: 52 formatted_exp = build_formatted_time(exp.TsOrDsToDate, "snowflake")(args) 53 formatted_exp.set("safe", safe) 54 return formatted_exp 55 56 return exp.Anonymous(this=name, expressions=args) 57 58 return _builder 59 60 61def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 62 expression = parser.build_var_map(args) 63 64 if isinstance(expression, exp.StarMap): 65 return expression 66 67 return exp.Struct( 68 expressions=[ 69 exp.PropertyEQ(this=k, expression=v) for k, v in zip(expression.keys, expression.values) 70 ] 71 ) 72 73 74def _build_datediff(args: t.List) -> exp.DateDiff: 75 return exp.DateDiff( 76 this=seq_get(args, 2), expression=seq_get(args, 1), unit=_map_date_part(seq_get(args, 0)) 77 ) 78 79 80def _build_date_time_add(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 81 def _builder(args: t.List) -> E: 82 return expr_type( 83 this=seq_get(args, 2), 84 expression=seq_get(args, 1), 85 unit=_map_date_part(seq_get(args, 0)), 86 ) 87 88 return _builder 89 90 91# https://docs.snowflake.com/en/sql-reference/functions/div0 92def _build_if_from_div0(args: t.List) -> exp.If: 93 cond = exp.EQ(this=seq_get(args, 1), expression=exp.Literal.number(0)) 94 true = exp.Literal.number(0) 95 false = exp.Div(this=seq_get(args, 0), expression=seq_get(args, 1)) 96 return exp.If(this=cond, true=true, false=false) 97 98 99# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 100def _build_if_from_zeroifnull(args: t.List) -> exp.If: 101 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 102 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 103 104 105# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 106def _build_if_from_nullifzero(args: t.List) -> exp.If: 107 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 108 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 109 110 111def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 112 flag = expression.text("flag") 113 114 if "i" not in flag: 115 flag += "i" 116 117 return self.func( 118 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 119 ) 120 121 122def _build_convert_timezone(args: t.List) -> t.Union[exp.Anonymous, exp.AtTimeZone]: 123 if len(args) == 3: 124 return exp.Anonymous(this="CONVERT_TIMEZONE", expressions=args) 125 return exp.AtTimeZone(this=seq_get(args, 1), zone=seq_get(args, 0)) 126 127 128def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: 129 regexp_replace = exp.RegexpReplace.from_arg_list(args) 130 131 if not regexp_replace.args.get("replacement"): 132 regexp_replace.set("replacement", exp.Literal.string("")) 133 134 return regexp_replace 135 136 137def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 138 def _parse(self: Snowflake.Parser) -> exp.Show: 139 return self._parse_show_snowflake(*args, **kwargs) 140 141 return _parse 142 143 144DATE_PART_MAPPING = { 145 "Y": "YEAR", 146 "YY": "YEAR", 147 "YYY": "YEAR", 148 "YYYY": "YEAR", 149 "YR": "YEAR", 150 "YEARS": "YEAR", 151 "YRS": "YEAR", 152 "MM": "MONTH", 153 "MON": "MONTH", 154 "MONS": "MONTH", 155 "MONTHS": "MONTH", 156 "D": "DAY", 157 "DD": "DAY", 158 "DAYS": "DAY", 159 "DAYOFMONTH": "DAY", 160 "WEEKDAY": "DAYOFWEEK", 161 "DOW": "DAYOFWEEK", 162 "DW": "DAYOFWEEK", 163 "WEEKDAY_ISO": "DAYOFWEEKISO", 164 "DOW_ISO": "DAYOFWEEKISO", 165 "DW_ISO": "DAYOFWEEKISO", 166 "YEARDAY": "DAYOFYEAR", 167 "DOY": "DAYOFYEAR", 168 "DY": "DAYOFYEAR", 169 "W": "WEEK", 170 "WK": "WEEK", 171 "WEEKOFYEAR": "WEEK", 172 "WOY": "WEEK", 173 "WY": "WEEK", 174 "WEEK_ISO": "WEEKISO", 175 "WEEKOFYEARISO": "WEEKISO", 176 "WEEKOFYEAR_ISO": "WEEKISO", 177 "Q": "QUARTER", 178 "QTR": "QUARTER", 179 "QTRS": "QUARTER", 180 "QUARTERS": "QUARTER", 181 "H": "HOUR", 182 "HH": "HOUR", 183 "HR": "HOUR", 184 "HOURS": "HOUR", 185 "HRS": "HOUR", 186 "M": "MINUTE", 187 "MI": "MINUTE", 188 "MIN": "MINUTE", 189 "MINUTES": "MINUTE", 190 "MINS": "MINUTE", 191 "S": "SECOND", 192 "SEC": "SECOND", 193 "SECONDS": "SECOND", 194 "SECS": "SECOND", 195 "MS": "MILLISECOND", 196 "MSEC": "MILLISECOND", 197 "MILLISECONDS": "MILLISECOND", 198 "US": "MICROSECOND", 199 "USEC": "MICROSECOND", 200 "MICROSECONDS": "MICROSECOND", 201 "NS": "NANOSECOND", 202 "NSEC": "NANOSECOND", 203 "NANOSEC": "NANOSECOND", 204 "NSECOND": "NANOSECOND", 205 "NSECONDS": "NANOSECOND", 206 "NANOSECS": "NANOSECOND", 207 "EPOCH": "EPOCH_SECOND", 208 "EPOCH_SECONDS": "EPOCH_SECOND", 209 "EPOCH_MILLISECONDS": "EPOCH_MILLISECOND", 210 "EPOCH_MICROSECONDS": "EPOCH_MICROSECOND", 211 "EPOCH_NANOSECONDS": "EPOCH_NANOSECOND", 212 "TZH": "TIMEZONE_HOUR", 213 "TZM": "TIMEZONE_MINUTE", 214} 215 216 217@t.overload 218def _map_date_part(part: exp.Expression) -> exp.Var: 219 pass 220 221 222@t.overload 223def _map_date_part(part: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 224 pass 225 226 227def _map_date_part(part): 228 mapped = DATE_PART_MAPPING.get(part.name.upper()) if part else None 229 return exp.var(mapped) if mapped else part 230 231 232def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 233 trunc = date_trunc_to_time(args) 234 trunc.set("unit", _map_date_part(trunc.args["unit"])) 235 return trunc 236 237 238def _build_timestamp_from_parts(args: t.List) -> exp.Func: 239 if len(args) == 2: 240 # Other dialects don't have the TIMESTAMP_FROM_PARTS(date, time) concept, 241 # so we parse this into Anonymous for now instead of introducing complexity 242 return exp.Anonymous(this="TIMESTAMP_FROM_PARTS", expressions=args) 243 244 return exp.TimestampFromParts.from_arg_list(args) 245 246 247def _unqualify_unpivot_columns(expression: exp.Expression) -> exp.Expression: 248 """ 249 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 250 so we need to unqualify them. 251 252 Example: 253 >>> from sqlglot import parse_one 254 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 255 >>> print(_unqualify_unpivot_columns(expr).sql(dialect="snowflake")) 256 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 257 """ 258 if isinstance(expression, exp.Pivot) and expression.unpivot: 259 expression = transforms.unqualify_columns(expression) 260 261 return expression 262 263 264def _flatten_structured_types_unless_iceberg(expression: exp.Expression) -> exp.Expression: 265 assert isinstance(expression, exp.Create) 266 267 def _flatten_structured_type(expression: exp.DataType) -> exp.DataType: 268 if expression.this in exp.DataType.NESTED_TYPES: 269 expression.set("expressions", None) 270 return expression 271 272 props = expression.args.get("properties") 273 if isinstance(expression.this, exp.Schema) and not (props and props.find(exp.IcebergProperty)): 274 for schema_expression in expression.this.expressions: 275 if isinstance(schema_expression, exp.ColumnDef): 276 column_type = schema_expression.kind 277 if isinstance(column_type, exp.DataType): 278 column_type.transform(_flatten_structured_type, copy=False) 279 280 return expression 281 282 283class Snowflake(Dialect): 284 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 285 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 286 NULL_ORDERING = "nulls_are_large" 287 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 288 SUPPORTS_USER_DEFINED_TYPES = False 289 SUPPORTS_SEMI_ANTI_JOIN = False 290 PREFER_CTE_ALIAS_COLUMN = True 291 TABLESAMPLE_SIZE_IS_PERCENT = True 292 COPY_PARAMS_ARE_CSV = False 293 294 TIME_MAPPING = { 295 "YYYY": "%Y", 296 "yyyy": "%Y", 297 "YY": "%y", 298 "yy": "%y", 299 "MMMM": "%B", 300 "mmmm": "%B", 301 "MON": "%b", 302 "mon": "%b", 303 "MM": "%m", 304 "mm": "%m", 305 "DD": "%d", 306 "dd": "%-d", 307 "DY": "%a", 308 "dy": "%w", 309 "HH24": "%H", 310 "hh24": "%H", 311 "HH12": "%I", 312 "hh12": "%I", 313 "MI": "%M", 314 "mi": "%M", 315 "SS": "%S", 316 "ss": "%S", 317 "FF": "%f", 318 "ff": "%f", 319 "FF6": "%f", 320 "ff6": "%f", 321 } 322 323 def quote_identifier(self, expression: E, identify: bool = True) -> E: 324 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 325 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 326 if ( 327 isinstance(expression, exp.Identifier) 328 and isinstance(expression.parent, exp.Table) 329 and expression.name.lower() == "dual" 330 ): 331 return expression # type: ignore 332 333 return super().quote_identifier(expression, identify=identify) 334 335 class Parser(parser.Parser): 336 IDENTIFY_PIVOT_STRINGS = True 337 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 338 339 ID_VAR_TOKENS = { 340 *parser.Parser.ID_VAR_TOKENS, 341 TokenType.MATCH_CONDITION, 342 } 343 344 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 345 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 346 347 FUNCTIONS = { 348 **parser.Parser.FUNCTIONS, 349 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 350 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 351 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 352 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 353 this=seq_get(args, 1), expression=seq_get(args, 0) 354 ), 355 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 356 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 357 start=seq_get(args, 0), 358 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 359 step=seq_get(args, 2), 360 ), 361 "BITXOR": binary_from_function(exp.BitwiseXor), 362 "BIT_XOR": binary_from_function(exp.BitwiseXor), 363 "BOOLXOR": binary_from_function(exp.Xor), 364 "CONVERT_TIMEZONE": _build_convert_timezone, 365 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 366 "DATE_TRUNC": _date_trunc_to_time, 367 "DATEADD": _build_date_time_add(exp.DateAdd), 368 "DATEDIFF": _build_datediff, 369 "DIV0": _build_if_from_div0, 370 "FLATTEN": exp.Explode.from_arg_list, 371 "GET_PATH": lambda args, dialect: exp.JSONExtract( 372 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 373 ), 374 "IFF": exp.If.from_arg_list, 375 "LAST_DAY": lambda args: exp.LastDay( 376 this=seq_get(args, 0), unit=_map_date_part(seq_get(args, 1)) 377 ), 378 "LISTAGG": exp.GroupConcat.from_arg_list, 379 "MEDIAN": lambda args: exp.PercentileCont( 380 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 381 ), 382 "NULLIFZERO": _build_if_from_nullifzero, 383 "OBJECT_CONSTRUCT": _build_object_construct, 384 "REGEXP_REPLACE": _build_regexp_replace, 385 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 386 "RLIKE": exp.RegexpLike.from_arg_list, 387 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 388 "TIMEADD": _build_date_time_add(exp.TimeAdd), 389 "TIMEDIFF": _build_datediff, 390 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 391 "TIMESTAMPDIFF": _build_datediff, 392 "TIMESTAMPFROMPARTS": _build_timestamp_from_parts, 393 "TIMESTAMP_FROM_PARTS": _build_timestamp_from_parts, 394 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 395 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 396 "TO_NUMBER": lambda args: exp.ToNumber( 397 this=seq_get(args, 0), 398 format=seq_get(args, 1), 399 precision=seq_get(args, 2), 400 scale=seq_get(args, 3), 401 ), 402 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 403 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 404 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 405 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 406 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 407 "TO_VARCHAR": exp.ToChar.from_arg_list, 408 "ZEROIFNULL": _build_if_from_zeroifnull, 409 } 410 411 FUNCTION_PARSERS = { 412 **parser.Parser.FUNCTION_PARSERS, 413 "DATE_PART": lambda self: self._parse_date_part(), 414 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 415 } 416 FUNCTION_PARSERS.pop("TRIM") 417 418 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 419 420 RANGE_PARSERS = { 421 **parser.Parser.RANGE_PARSERS, 422 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 423 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 424 } 425 426 ALTER_PARSERS = { 427 **parser.Parser.ALTER_PARSERS, 428 "SET": lambda self: self._parse_set(tag=self._match_text_seq("TAG")), 429 "UNSET": lambda self: self.expression( 430 exp.Set, 431 tag=self._match_text_seq("TAG"), 432 expressions=self._parse_csv(self._parse_id_var), 433 unset=True, 434 ), 435 "SWAP": lambda self: self._parse_alter_table_swap(), 436 } 437 438 STATEMENT_PARSERS = { 439 **parser.Parser.STATEMENT_PARSERS, 440 TokenType.SHOW: lambda self: self._parse_show(), 441 } 442 443 PROPERTY_PARSERS = { 444 **parser.Parser.PROPERTY_PARSERS, 445 "LOCATION": lambda self: self._parse_location_property(), 446 } 447 448 SHOW_PARSERS = { 449 "SCHEMAS": _show_parser("SCHEMAS"), 450 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 451 "OBJECTS": _show_parser("OBJECTS"), 452 "TERSE OBJECTS": _show_parser("OBJECTS"), 453 "TABLES": _show_parser("TABLES"), 454 "TERSE TABLES": _show_parser("TABLES"), 455 "VIEWS": _show_parser("VIEWS"), 456 "TERSE VIEWS": _show_parser("VIEWS"), 457 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 458 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 459 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 460 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 461 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 462 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 463 "SEQUENCES": _show_parser("SEQUENCES"), 464 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 465 "COLUMNS": _show_parser("COLUMNS"), 466 "USERS": _show_parser("USERS"), 467 "TERSE USERS": _show_parser("USERS"), 468 } 469 470 STAGED_FILE_SINGLE_TOKENS = { 471 TokenType.DOT, 472 TokenType.MOD, 473 TokenType.SLASH, 474 } 475 476 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 477 478 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 479 480 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 481 this = super()._parse_column_ops(this) 482 483 casts = [] 484 json_path = [] 485 486 while self._match(TokenType.COLON): 487 path = super()._parse_column_ops(self._parse_field(any_token=True)) 488 489 # The cast :: operator has a lower precedence than the extraction operator :, so 490 # we rearrange the AST appropriately to avoid casting the 2nd argument of GET_PATH 491 while isinstance(path, exp.Cast): 492 casts.append(path.to) 493 path = path.this 494 495 if path: 496 json_path.append(path.sql(dialect="snowflake", copy=False)) 497 498 if json_path: 499 this = self.expression( 500 exp.JSONExtract, 501 this=this, 502 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 503 ) 504 505 while casts: 506 this = self.expression(exp.Cast, this=this, to=casts.pop()) 507 508 return this 509 510 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 511 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 512 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 513 this = self._parse_var() or self._parse_type() 514 515 if not this: 516 return None 517 518 self._match(TokenType.COMMA) 519 expression = self._parse_bitwise() 520 this = _map_date_part(this) 521 name = this.name.upper() 522 523 if name.startswith("EPOCH"): 524 if name == "EPOCH_MILLISECOND": 525 scale = 10**3 526 elif name == "EPOCH_MICROSECOND": 527 scale = 10**6 528 elif name == "EPOCH_NANOSECOND": 529 scale = 10**9 530 else: 531 scale = None 532 533 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 534 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 535 536 if scale: 537 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 538 539 return to_unix 540 541 return self.expression(exp.Extract, this=this, expression=expression) 542 543 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 544 if is_map: 545 # Keys are strings in Snowflake's objects, see also: 546 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 547 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 548 return self._parse_slice(self._parse_string()) 549 550 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 551 552 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 553 lateral = super()._parse_lateral() 554 if not lateral: 555 return lateral 556 557 if isinstance(lateral.this, exp.Explode): 558 table_alias = lateral.args.get("alias") 559 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 560 if table_alias and not table_alias.args.get("columns"): 561 table_alias.set("columns", columns) 562 elif not table_alias: 563 exp.alias_(lateral, "_flattened", table=columns, copy=False) 564 565 return lateral 566 567 def _parse_at_before(self, table: exp.Table) -> exp.Table: 568 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 569 index = self._index 570 if self._match_texts(("AT", "BEFORE")): 571 this = self._prev.text.upper() 572 kind = ( 573 self._match(TokenType.L_PAREN) 574 and self._match_texts(self.HISTORICAL_DATA_KIND) 575 and self._prev.text.upper() 576 ) 577 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 578 579 if expression: 580 self._match_r_paren() 581 when = self.expression( 582 exp.HistoricalData, this=this, kind=kind, expression=expression 583 ) 584 table.set("when", when) 585 else: 586 self._retreat(index) 587 588 return table 589 590 def _parse_table_parts( 591 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 592 ) -> exp.Table: 593 # https://docs.snowflake.com/en/user-guide/querying-stage 594 if self._match(TokenType.STRING, advance=False): 595 table = self._parse_string() 596 elif self._match_text_seq("@", advance=False): 597 table = self._parse_location_path() 598 else: 599 table = None 600 601 if table: 602 file_format = None 603 pattern = None 604 605 wrapped = self._match(TokenType.L_PAREN) 606 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 607 if self._match_text_seq("FILE_FORMAT", "=>"): 608 file_format = self._parse_string() or super()._parse_table_parts( 609 is_db_reference=is_db_reference 610 ) 611 elif self._match_text_seq("PATTERN", "=>"): 612 pattern = self._parse_string() 613 else: 614 break 615 616 self._match(TokenType.COMMA) 617 618 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 619 else: 620 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 621 622 return self._parse_at_before(table) 623 624 def _parse_id_var( 625 self, 626 any_token: bool = True, 627 tokens: t.Optional[t.Collection[TokenType]] = None, 628 ) -> t.Optional[exp.Expression]: 629 if self._match_text_seq("IDENTIFIER", "("): 630 identifier = ( 631 super()._parse_id_var(any_token=any_token, tokens=tokens) 632 or self._parse_string() 633 ) 634 self._match_r_paren() 635 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 636 637 return super()._parse_id_var(any_token=any_token, tokens=tokens) 638 639 def _parse_show_snowflake(self, this: str) -> exp.Show: 640 scope = None 641 scope_kind = None 642 643 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 644 # which is syntactically valid but has no effect on the output 645 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 646 647 history = self._match_text_seq("HISTORY") 648 649 like = self._parse_string() if self._match(TokenType.LIKE) else None 650 651 if self._match(TokenType.IN): 652 if self._match_text_seq("ACCOUNT"): 653 scope_kind = "ACCOUNT" 654 elif self._match_set(self.DB_CREATABLES): 655 scope_kind = self._prev.text.upper() 656 if self._curr: 657 scope = self._parse_table_parts() 658 elif self._curr: 659 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 660 scope = self._parse_table_parts() 661 662 return self.expression( 663 exp.Show, 664 **{ 665 "terse": terse, 666 "this": this, 667 "history": history, 668 "like": like, 669 "scope": scope, 670 "scope_kind": scope_kind, 671 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 672 "limit": self._parse_limit(), 673 "from": self._parse_string() if self._match(TokenType.FROM) else None, 674 }, 675 ) 676 677 def _parse_alter_table_swap(self) -> exp.SwapTable: 678 self._match_text_seq("WITH") 679 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 680 681 def _parse_location_property(self) -> exp.LocationProperty: 682 self._match(TokenType.EQ) 683 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 684 685 def _parse_file_location(self) -> t.Optional[exp.Expression]: 686 # Parse either a subquery or a staged file 687 return ( 688 self._parse_select(table=True) 689 if self._match(TokenType.L_PAREN, advance=False) 690 else self._parse_table_parts() 691 ) 692 693 def _parse_location_path(self) -> exp.Var: 694 parts = [self._advance_any(ignore_reserved=True)] 695 696 # We avoid consuming a comma token because external tables like @foo and @bar 697 # can be joined in a query with a comma separator, as well as closing paren 698 # in case of subqueries 699 while self._is_connected() and not self._match_set( 700 (TokenType.COMMA, TokenType.R_PAREN), advance=False 701 ): 702 parts.append(self._advance_any(ignore_reserved=True)) 703 704 return exp.var("".join(part.text for part in parts if part)) 705 706 class Tokenizer(tokens.Tokenizer): 707 STRING_ESCAPES = ["\\", "'"] 708 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 709 RAW_STRINGS = ["$$"] 710 COMMENTS = ["--", "//", ("/*", "*/")] 711 712 KEYWORDS = { 713 **tokens.Tokenizer.KEYWORDS, 714 "BYTEINT": TokenType.INT, 715 "CHAR VARYING": TokenType.VARCHAR, 716 "CHARACTER VARYING": TokenType.VARCHAR, 717 "EXCLUDE": TokenType.EXCEPT, 718 "ILIKE ANY": TokenType.ILIKE_ANY, 719 "LIKE ANY": TokenType.LIKE_ANY, 720 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 721 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 722 "MINUS": TokenType.EXCEPT, 723 "NCHAR VARYING": TokenType.VARCHAR, 724 "PUT": TokenType.COMMAND, 725 "REMOVE": TokenType.COMMAND, 726 "RENAME": TokenType.REPLACE, 727 "RM": TokenType.COMMAND, 728 "SAMPLE": TokenType.TABLE_SAMPLE, 729 "SQL_DOUBLE": TokenType.DOUBLE, 730 "SQL_VARCHAR": TokenType.VARCHAR, 731 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 732 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 733 "TOP": TokenType.TOP, 734 } 735 736 SINGLE_TOKENS = { 737 **tokens.Tokenizer.SINGLE_TOKENS, 738 "$": TokenType.PARAMETER, 739 } 740 741 VAR_SINGLE_TOKENS = {"$"} 742 743 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 744 745 class Generator(generator.Generator): 746 PARAMETER_TOKEN = "$" 747 MATCHED_BY_SOURCE = False 748 SINGLE_STRING_INTERVAL = True 749 JOIN_HINTS = False 750 TABLE_HINTS = False 751 QUERY_HINTS = False 752 AGGREGATE_FILTER_SUPPORTED = False 753 SUPPORTS_TABLE_COPY = False 754 COLLATE_IS_FUNC = True 755 LIMIT_ONLY_LITERALS = True 756 JSON_KEY_VALUE_PAIR_SEP = "," 757 INSERT_OVERWRITE = " OVERWRITE INTO" 758 STRUCT_DELIMITER = ("(", ")") 759 COPY_PARAMS_ARE_WRAPPED = False 760 COPY_PARAMS_EQ_REQUIRED = True 761 762 TRANSFORMS = { 763 **generator.Generator.TRANSFORMS, 764 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 765 exp.ArgMax: rename_func("MAX_BY"), 766 exp.ArgMin: rename_func("MIN_BY"), 767 exp.Array: inline_array_sql, 768 exp.ArrayConcat: rename_func("ARRAY_CAT"), 769 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 770 exp.AtTimeZone: lambda self, e: self.func( 771 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 772 ), 773 exp.BitwiseXor: rename_func("BITXOR"), 774 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 775 exp.DateAdd: date_delta_sql("DATEADD"), 776 exp.DateDiff: date_delta_sql("DATEDIFF"), 777 exp.DateStrToDate: datestrtodate_sql, 778 exp.DayOfMonth: rename_func("DAYOFMONTH"), 779 exp.DayOfWeek: rename_func("DAYOFWEEK"), 780 exp.DayOfYear: rename_func("DAYOFYEAR"), 781 exp.Explode: rename_func("FLATTEN"), 782 exp.Extract: rename_func("DATE_PART"), 783 exp.FromTimeZone: lambda self, e: self.func( 784 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 785 ), 786 exp.GenerateSeries: lambda self, e: self.func( 787 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 788 ), 789 exp.GroupConcat: rename_func("LISTAGG"), 790 exp.If: if_sql(name="IFF", false_value="NULL"), 791 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 792 exp.JSONExtractScalar: lambda self, e: self.func( 793 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 794 ), 795 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 796 exp.JSONPathRoot: lambda *_: "", 797 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 798 exp.LogicalOr: rename_func("BOOLOR_AGG"), 799 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 800 exp.Max: max_or_greatest, 801 exp.Min: min_or_least, 802 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 803 exp.PercentileCont: transforms.preprocess( 804 [transforms.add_within_group_for_percentiles] 805 ), 806 exp.PercentileDisc: transforms.preprocess( 807 [transforms.add_within_group_for_percentiles] 808 ), 809 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 810 exp.RegexpILike: _regexpilike_sql, 811 exp.Rand: rename_func("RANDOM"), 812 exp.Select: transforms.preprocess( 813 [ 814 transforms.eliminate_distinct_on, 815 transforms.explode_to_unnest(), 816 transforms.eliminate_semi_and_anti_joins, 817 ] 818 ), 819 exp.SHA: rename_func("SHA1"), 820 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 821 exp.StartsWith: rename_func("STARTSWITH"), 822 exp.StrPosition: lambda self, e: self.func( 823 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 824 ), 825 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 826 exp.Stuff: rename_func("INSERT"), 827 exp.TimeAdd: date_delta_sql("TIMEADD"), 828 exp.TimestampDiff: lambda self, e: self.func( 829 "TIMESTAMPDIFF", e.unit, e.expression, e.this 830 ), 831 exp.TimestampTrunc: timestamptrunc_sql, 832 exp.TimeStrToTime: timestrtotime_sql, 833 exp.TimeToStr: lambda self, e: self.func( 834 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 835 ), 836 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 837 exp.ToArray: rename_func("TO_ARRAY"), 838 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 839 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 840 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 841 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 842 exp.TsOrDsToDate: lambda self, e: self.func( 843 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 844 ), 845 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 846 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 847 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 848 exp.Xor: rename_func("BOOLXOR"), 849 } 850 851 SUPPORTED_JSON_PATH_PARTS = { 852 exp.JSONPathKey, 853 exp.JSONPathRoot, 854 exp.JSONPathSubscript, 855 } 856 857 TYPE_MAPPING = { 858 **generator.Generator.TYPE_MAPPING, 859 exp.DataType.Type.NESTED: "OBJECT", 860 exp.DataType.Type.STRUCT: "OBJECT", 861 } 862 863 STAR_MAPPING = { 864 "except": "EXCLUDE", 865 "replace": "RENAME", 866 } 867 868 PROPERTIES_LOCATION = { 869 **generator.Generator.PROPERTIES_LOCATION, 870 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 871 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 872 } 873 874 UNSUPPORTED_VALUES_EXPRESSIONS = { 875 exp.Map, 876 exp.StarMap, 877 exp.Struct, 878 exp.VarMap, 879 } 880 881 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 882 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 883 values_as_table = False 884 885 return super().values_sql(expression, values_as_table=values_as_table) 886 887 def datatype_sql(self, expression: exp.DataType) -> str: 888 expressions = expression.expressions 889 if ( 890 expressions 891 and expression.is_type(*exp.DataType.STRUCT_TYPES) 892 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 893 ): 894 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 895 return "OBJECT" 896 897 return super().datatype_sql(expression) 898 899 def tonumber_sql(self, expression: exp.ToNumber) -> str: 900 return self.func( 901 "TO_NUMBER", 902 expression.this, 903 expression.args.get("format"), 904 expression.args.get("precision"), 905 expression.args.get("scale"), 906 ) 907 908 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 909 milli = expression.args.get("milli") 910 if milli is not None: 911 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 912 expression.set("nano", milli_to_nano) 913 914 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 915 916 def trycast_sql(self, expression: exp.TryCast) -> str: 917 value = expression.this 918 919 if value.type is None: 920 from sqlglot.optimizer.annotate_types import annotate_types 921 922 value = annotate_types(value) 923 924 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 925 return super().trycast_sql(expression) 926 927 # TRY_CAST only works for string values in Snowflake 928 return self.cast_sql(expression) 929 930 def log_sql(self, expression: exp.Log) -> str: 931 if not expression.expression: 932 return self.func("LN", expression.this) 933 934 return super().log_sql(expression) 935 936 def unnest_sql(self, expression: exp.Unnest) -> str: 937 unnest_alias = expression.args.get("alias") 938 offset = expression.args.get("offset") 939 940 columns = [ 941 exp.to_identifier("seq"), 942 exp.to_identifier("key"), 943 exp.to_identifier("path"), 944 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 945 seq_get(unnest_alias.columns if unnest_alias else [], 0) 946 or exp.to_identifier("value"), 947 exp.to_identifier("this"), 948 ] 949 950 if unnest_alias: 951 unnest_alias.set("columns", columns) 952 else: 953 unnest_alias = exp.TableAlias(this="_u", columns=columns) 954 955 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 956 alias = self.sql(unnest_alias) 957 alias = f" AS {alias}" if alias else "" 958 return f"{explode}{alias}" 959 960 def show_sql(self, expression: exp.Show) -> str: 961 terse = "TERSE " if expression.args.get("terse") else "" 962 history = " HISTORY" if expression.args.get("history") else "" 963 like = self.sql(expression, "like") 964 like = f" LIKE {like}" if like else "" 965 966 scope = self.sql(expression, "scope") 967 scope = f" {scope}" if scope else "" 968 969 scope_kind = self.sql(expression, "scope_kind") 970 if scope_kind: 971 scope_kind = f" IN {scope_kind}" 972 973 starts_with = self.sql(expression, "starts_with") 974 if starts_with: 975 starts_with = f" STARTS WITH {starts_with}" 976 977 limit = self.sql(expression, "limit") 978 979 from_ = self.sql(expression, "from") 980 if from_: 981 from_ = f" FROM {from_}" 982 983 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 984 985 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 986 # Other dialects don't support all of the following parameters, so we need to 987 # generate default values as necessary to ensure the transpilation is correct 988 group = expression.args.get("group") 989 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 990 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 991 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 992 993 return self.func( 994 "REGEXP_SUBSTR", 995 expression.this, 996 expression.expression, 997 position, 998 occurrence, 999 parameters, 1000 group, 1001 ) 1002 1003 def except_op(self, expression: exp.Except) -> str: 1004 if not expression.args.get("distinct"): 1005 self.unsupported("EXCEPT with All is not supported in Snowflake") 1006 return super().except_op(expression) 1007 1008 def intersect_op(self, expression: exp.Intersect) -> str: 1009 if not expression.args.get("distinct"): 1010 self.unsupported("INTERSECT with All is not supported in Snowflake") 1011 return super().intersect_op(expression) 1012 1013 def describe_sql(self, expression: exp.Describe) -> str: 1014 # Default to table if kind is unknown 1015 kind_value = expression.args.get("kind") or "TABLE" 1016 kind = f" {kind_value}" if kind_value else "" 1017 this = f" {self.sql(expression, 'this')}" 1018 expressions = self.expressions(expression, flat=True) 1019 expressions = f" {expressions}" if expressions else "" 1020 return f"DESCRIBE{kind}{this}{expressions}" 1021 1022 def generatedasidentitycolumnconstraint_sql( 1023 self, expression: exp.GeneratedAsIdentityColumnConstraint 1024 ) -> str: 1025 start = expression.args.get("start") 1026 start = f" START {start}" if start else "" 1027 increment = expression.args.get("increment") 1028 increment = f" INCREMENT {increment}" if increment else "" 1029 return f"AUTOINCREMENT{start}{increment}" 1030 1031 def swaptable_sql(self, expression: exp.SwapTable) -> str: 1032 this = self.sql(expression, "this") 1033 return f"SWAP WITH {this}" 1034 1035 def with_properties(self, properties: exp.Properties) -> str: 1036 return self.properties(properties, wrapped=False, prefix=self.seg(""), sep=" ") 1037 1038 def cluster_sql(self, expression: exp.Cluster) -> str: 1039 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1040 1041 def struct_sql(self, expression: exp.Struct) -> str: 1042 keys = [] 1043 values = [] 1044 1045 for i, e in enumerate(expression.expressions): 1046 if isinstance(e, exp.PropertyEQ): 1047 keys.append( 1048 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1049 ) 1050 values.append(e.expression) 1051 else: 1052 keys.append(exp.Literal.string(f"_{i}")) 1053 values.append(e) 1054 1055 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1056 1057 def copyparameter_sql(self, expression: exp.CopyParameter) -> str: 1058 option = self.sql(expression, "this").upper() 1059 if option == "FILE_FORMAT": 1060 values = self.expressions(expression, key="expression", flat=True, sep=" ") 1061 return f"{option} = ({values})" 1062 1063 return super().copyparameter_sql(expression) 1064 1065 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1066 if expression.args.get("weight") or expression.args.get("accuracy"): 1067 self.unsupported( 1068 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1069 ) 1070 1071 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile"))
284class Snowflake(Dialect): 285 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 286 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 287 NULL_ORDERING = "nulls_are_large" 288 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 289 SUPPORTS_USER_DEFINED_TYPES = False 290 SUPPORTS_SEMI_ANTI_JOIN = False 291 PREFER_CTE_ALIAS_COLUMN = True 292 TABLESAMPLE_SIZE_IS_PERCENT = True 293 COPY_PARAMS_ARE_CSV = False 294 295 TIME_MAPPING = { 296 "YYYY": "%Y", 297 "yyyy": "%Y", 298 "YY": "%y", 299 "yy": "%y", 300 "MMMM": "%B", 301 "mmmm": "%B", 302 "MON": "%b", 303 "mon": "%b", 304 "MM": "%m", 305 "mm": "%m", 306 "DD": "%d", 307 "dd": "%-d", 308 "DY": "%a", 309 "dy": "%w", 310 "HH24": "%H", 311 "hh24": "%H", 312 "HH12": "%I", 313 "hh12": "%I", 314 "MI": "%M", 315 "mi": "%M", 316 "SS": "%S", 317 "ss": "%S", 318 "FF": "%f", 319 "ff": "%f", 320 "FF6": "%f", 321 "ff6": "%f", 322 } 323 324 def quote_identifier(self, expression: E, identify: bool = True) -> E: 325 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 326 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 327 if ( 328 isinstance(expression, exp.Identifier) 329 and isinstance(expression.parent, exp.Table) 330 and expression.name.lower() == "dual" 331 ): 332 return expression # type: ignore 333 334 return super().quote_identifier(expression, identify=identify) 335 336 class Parser(parser.Parser): 337 IDENTIFY_PIVOT_STRINGS = True 338 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 339 340 ID_VAR_TOKENS = { 341 *parser.Parser.ID_VAR_TOKENS, 342 TokenType.MATCH_CONDITION, 343 } 344 345 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 346 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 347 348 FUNCTIONS = { 349 **parser.Parser.FUNCTIONS, 350 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 351 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 352 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 353 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 354 this=seq_get(args, 1), expression=seq_get(args, 0) 355 ), 356 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 357 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 358 start=seq_get(args, 0), 359 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 360 step=seq_get(args, 2), 361 ), 362 "BITXOR": binary_from_function(exp.BitwiseXor), 363 "BIT_XOR": binary_from_function(exp.BitwiseXor), 364 "BOOLXOR": binary_from_function(exp.Xor), 365 "CONVERT_TIMEZONE": _build_convert_timezone, 366 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 367 "DATE_TRUNC": _date_trunc_to_time, 368 "DATEADD": _build_date_time_add(exp.DateAdd), 369 "DATEDIFF": _build_datediff, 370 "DIV0": _build_if_from_div0, 371 "FLATTEN": exp.Explode.from_arg_list, 372 "GET_PATH": lambda args, dialect: exp.JSONExtract( 373 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 374 ), 375 "IFF": exp.If.from_arg_list, 376 "LAST_DAY": lambda args: exp.LastDay( 377 this=seq_get(args, 0), unit=_map_date_part(seq_get(args, 1)) 378 ), 379 "LISTAGG": exp.GroupConcat.from_arg_list, 380 "MEDIAN": lambda args: exp.PercentileCont( 381 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 382 ), 383 "NULLIFZERO": _build_if_from_nullifzero, 384 "OBJECT_CONSTRUCT": _build_object_construct, 385 "REGEXP_REPLACE": _build_regexp_replace, 386 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 387 "RLIKE": exp.RegexpLike.from_arg_list, 388 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 389 "TIMEADD": _build_date_time_add(exp.TimeAdd), 390 "TIMEDIFF": _build_datediff, 391 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 392 "TIMESTAMPDIFF": _build_datediff, 393 "TIMESTAMPFROMPARTS": _build_timestamp_from_parts, 394 "TIMESTAMP_FROM_PARTS": _build_timestamp_from_parts, 395 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 396 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 397 "TO_NUMBER": lambda args: exp.ToNumber( 398 this=seq_get(args, 0), 399 format=seq_get(args, 1), 400 precision=seq_get(args, 2), 401 scale=seq_get(args, 3), 402 ), 403 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 404 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 405 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 406 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 407 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 408 "TO_VARCHAR": exp.ToChar.from_arg_list, 409 "ZEROIFNULL": _build_if_from_zeroifnull, 410 } 411 412 FUNCTION_PARSERS = { 413 **parser.Parser.FUNCTION_PARSERS, 414 "DATE_PART": lambda self: self._parse_date_part(), 415 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 416 } 417 FUNCTION_PARSERS.pop("TRIM") 418 419 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 420 421 RANGE_PARSERS = { 422 **parser.Parser.RANGE_PARSERS, 423 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 424 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 425 } 426 427 ALTER_PARSERS = { 428 **parser.Parser.ALTER_PARSERS, 429 "SET": lambda self: self._parse_set(tag=self._match_text_seq("TAG")), 430 "UNSET": lambda self: self.expression( 431 exp.Set, 432 tag=self._match_text_seq("TAG"), 433 expressions=self._parse_csv(self._parse_id_var), 434 unset=True, 435 ), 436 "SWAP": lambda self: self._parse_alter_table_swap(), 437 } 438 439 STATEMENT_PARSERS = { 440 **parser.Parser.STATEMENT_PARSERS, 441 TokenType.SHOW: lambda self: self._parse_show(), 442 } 443 444 PROPERTY_PARSERS = { 445 **parser.Parser.PROPERTY_PARSERS, 446 "LOCATION": lambda self: self._parse_location_property(), 447 } 448 449 SHOW_PARSERS = { 450 "SCHEMAS": _show_parser("SCHEMAS"), 451 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 452 "OBJECTS": _show_parser("OBJECTS"), 453 "TERSE OBJECTS": _show_parser("OBJECTS"), 454 "TABLES": _show_parser("TABLES"), 455 "TERSE TABLES": _show_parser("TABLES"), 456 "VIEWS": _show_parser("VIEWS"), 457 "TERSE VIEWS": _show_parser("VIEWS"), 458 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 459 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 460 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 461 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 462 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 463 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 464 "SEQUENCES": _show_parser("SEQUENCES"), 465 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 466 "COLUMNS": _show_parser("COLUMNS"), 467 "USERS": _show_parser("USERS"), 468 "TERSE USERS": _show_parser("USERS"), 469 } 470 471 STAGED_FILE_SINGLE_TOKENS = { 472 TokenType.DOT, 473 TokenType.MOD, 474 TokenType.SLASH, 475 } 476 477 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 478 479 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 480 481 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 482 this = super()._parse_column_ops(this) 483 484 casts = [] 485 json_path = [] 486 487 while self._match(TokenType.COLON): 488 path = super()._parse_column_ops(self._parse_field(any_token=True)) 489 490 # The cast :: operator has a lower precedence than the extraction operator :, so 491 # we rearrange the AST appropriately to avoid casting the 2nd argument of GET_PATH 492 while isinstance(path, exp.Cast): 493 casts.append(path.to) 494 path = path.this 495 496 if path: 497 json_path.append(path.sql(dialect="snowflake", copy=False)) 498 499 if json_path: 500 this = self.expression( 501 exp.JSONExtract, 502 this=this, 503 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 504 ) 505 506 while casts: 507 this = self.expression(exp.Cast, this=this, to=casts.pop()) 508 509 return this 510 511 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 512 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 513 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 514 this = self._parse_var() or self._parse_type() 515 516 if not this: 517 return None 518 519 self._match(TokenType.COMMA) 520 expression = self._parse_bitwise() 521 this = _map_date_part(this) 522 name = this.name.upper() 523 524 if name.startswith("EPOCH"): 525 if name == "EPOCH_MILLISECOND": 526 scale = 10**3 527 elif name == "EPOCH_MICROSECOND": 528 scale = 10**6 529 elif name == "EPOCH_NANOSECOND": 530 scale = 10**9 531 else: 532 scale = None 533 534 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 535 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 536 537 if scale: 538 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 539 540 return to_unix 541 542 return self.expression(exp.Extract, this=this, expression=expression) 543 544 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 545 if is_map: 546 # Keys are strings in Snowflake's objects, see also: 547 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 548 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 549 return self._parse_slice(self._parse_string()) 550 551 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 552 553 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 554 lateral = super()._parse_lateral() 555 if not lateral: 556 return lateral 557 558 if isinstance(lateral.this, exp.Explode): 559 table_alias = lateral.args.get("alias") 560 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 561 if table_alias and not table_alias.args.get("columns"): 562 table_alias.set("columns", columns) 563 elif not table_alias: 564 exp.alias_(lateral, "_flattened", table=columns, copy=False) 565 566 return lateral 567 568 def _parse_at_before(self, table: exp.Table) -> exp.Table: 569 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 570 index = self._index 571 if self._match_texts(("AT", "BEFORE")): 572 this = self._prev.text.upper() 573 kind = ( 574 self._match(TokenType.L_PAREN) 575 and self._match_texts(self.HISTORICAL_DATA_KIND) 576 and self._prev.text.upper() 577 ) 578 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 579 580 if expression: 581 self._match_r_paren() 582 when = self.expression( 583 exp.HistoricalData, this=this, kind=kind, expression=expression 584 ) 585 table.set("when", when) 586 else: 587 self._retreat(index) 588 589 return table 590 591 def _parse_table_parts( 592 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 593 ) -> exp.Table: 594 # https://docs.snowflake.com/en/user-guide/querying-stage 595 if self._match(TokenType.STRING, advance=False): 596 table = self._parse_string() 597 elif self._match_text_seq("@", advance=False): 598 table = self._parse_location_path() 599 else: 600 table = None 601 602 if table: 603 file_format = None 604 pattern = None 605 606 wrapped = self._match(TokenType.L_PAREN) 607 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 608 if self._match_text_seq("FILE_FORMAT", "=>"): 609 file_format = self._parse_string() or super()._parse_table_parts( 610 is_db_reference=is_db_reference 611 ) 612 elif self._match_text_seq("PATTERN", "=>"): 613 pattern = self._parse_string() 614 else: 615 break 616 617 self._match(TokenType.COMMA) 618 619 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 620 else: 621 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 622 623 return self._parse_at_before(table) 624 625 def _parse_id_var( 626 self, 627 any_token: bool = True, 628 tokens: t.Optional[t.Collection[TokenType]] = None, 629 ) -> t.Optional[exp.Expression]: 630 if self._match_text_seq("IDENTIFIER", "("): 631 identifier = ( 632 super()._parse_id_var(any_token=any_token, tokens=tokens) 633 or self._parse_string() 634 ) 635 self._match_r_paren() 636 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 637 638 return super()._parse_id_var(any_token=any_token, tokens=tokens) 639 640 def _parse_show_snowflake(self, this: str) -> exp.Show: 641 scope = None 642 scope_kind = None 643 644 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 645 # which is syntactically valid but has no effect on the output 646 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 647 648 history = self._match_text_seq("HISTORY") 649 650 like = self._parse_string() if self._match(TokenType.LIKE) else None 651 652 if self._match(TokenType.IN): 653 if self._match_text_seq("ACCOUNT"): 654 scope_kind = "ACCOUNT" 655 elif self._match_set(self.DB_CREATABLES): 656 scope_kind = self._prev.text.upper() 657 if self._curr: 658 scope = self._parse_table_parts() 659 elif self._curr: 660 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 661 scope = self._parse_table_parts() 662 663 return self.expression( 664 exp.Show, 665 **{ 666 "terse": terse, 667 "this": this, 668 "history": history, 669 "like": like, 670 "scope": scope, 671 "scope_kind": scope_kind, 672 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 673 "limit": self._parse_limit(), 674 "from": self._parse_string() if self._match(TokenType.FROM) else None, 675 }, 676 ) 677 678 def _parse_alter_table_swap(self) -> exp.SwapTable: 679 self._match_text_seq("WITH") 680 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 681 682 def _parse_location_property(self) -> exp.LocationProperty: 683 self._match(TokenType.EQ) 684 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 685 686 def _parse_file_location(self) -> t.Optional[exp.Expression]: 687 # Parse either a subquery or a staged file 688 return ( 689 self._parse_select(table=True) 690 if self._match(TokenType.L_PAREN, advance=False) 691 else self._parse_table_parts() 692 ) 693 694 def _parse_location_path(self) -> exp.Var: 695 parts = [self._advance_any(ignore_reserved=True)] 696 697 # We avoid consuming a comma token because external tables like @foo and @bar 698 # can be joined in a query with a comma separator, as well as closing paren 699 # in case of subqueries 700 while self._is_connected() and not self._match_set( 701 (TokenType.COMMA, TokenType.R_PAREN), advance=False 702 ): 703 parts.append(self._advance_any(ignore_reserved=True)) 704 705 return exp.var("".join(part.text for part in parts if part)) 706 707 class Tokenizer(tokens.Tokenizer): 708 STRING_ESCAPES = ["\\", "'"] 709 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 710 RAW_STRINGS = ["$$"] 711 COMMENTS = ["--", "//", ("/*", "*/")] 712 713 KEYWORDS = { 714 **tokens.Tokenizer.KEYWORDS, 715 "BYTEINT": TokenType.INT, 716 "CHAR VARYING": TokenType.VARCHAR, 717 "CHARACTER VARYING": TokenType.VARCHAR, 718 "EXCLUDE": TokenType.EXCEPT, 719 "ILIKE ANY": TokenType.ILIKE_ANY, 720 "LIKE ANY": TokenType.LIKE_ANY, 721 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 722 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 723 "MINUS": TokenType.EXCEPT, 724 "NCHAR VARYING": TokenType.VARCHAR, 725 "PUT": TokenType.COMMAND, 726 "REMOVE": TokenType.COMMAND, 727 "RENAME": TokenType.REPLACE, 728 "RM": TokenType.COMMAND, 729 "SAMPLE": TokenType.TABLE_SAMPLE, 730 "SQL_DOUBLE": TokenType.DOUBLE, 731 "SQL_VARCHAR": TokenType.VARCHAR, 732 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 733 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 734 "TOP": TokenType.TOP, 735 } 736 737 SINGLE_TOKENS = { 738 **tokens.Tokenizer.SINGLE_TOKENS, 739 "$": TokenType.PARAMETER, 740 } 741 742 VAR_SINGLE_TOKENS = {"$"} 743 744 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 745 746 class Generator(generator.Generator): 747 PARAMETER_TOKEN = "$" 748 MATCHED_BY_SOURCE = False 749 SINGLE_STRING_INTERVAL = True 750 JOIN_HINTS = False 751 TABLE_HINTS = False 752 QUERY_HINTS = False 753 AGGREGATE_FILTER_SUPPORTED = False 754 SUPPORTS_TABLE_COPY = False 755 COLLATE_IS_FUNC = True 756 LIMIT_ONLY_LITERALS = True 757 JSON_KEY_VALUE_PAIR_SEP = "," 758 INSERT_OVERWRITE = " OVERWRITE INTO" 759 STRUCT_DELIMITER = ("(", ")") 760 COPY_PARAMS_ARE_WRAPPED = False 761 COPY_PARAMS_EQ_REQUIRED = True 762 763 TRANSFORMS = { 764 **generator.Generator.TRANSFORMS, 765 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 766 exp.ArgMax: rename_func("MAX_BY"), 767 exp.ArgMin: rename_func("MIN_BY"), 768 exp.Array: inline_array_sql, 769 exp.ArrayConcat: rename_func("ARRAY_CAT"), 770 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 771 exp.AtTimeZone: lambda self, e: self.func( 772 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 773 ), 774 exp.BitwiseXor: rename_func("BITXOR"), 775 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 776 exp.DateAdd: date_delta_sql("DATEADD"), 777 exp.DateDiff: date_delta_sql("DATEDIFF"), 778 exp.DateStrToDate: datestrtodate_sql, 779 exp.DayOfMonth: rename_func("DAYOFMONTH"), 780 exp.DayOfWeek: rename_func("DAYOFWEEK"), 781 exp.DayOfYear: rename_func("DAYOFYEAR"), 782 exp.Explode: rename_func("FLATTEN"), 783 exp.Extract: rename_func("DATE_PART"), 784 exp.FromTimeZone: lambda self, e: self.func( 785 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 786 ), 787 exp.GenerateSeries: lambda self, e: self.func( 788 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 789 ), 790 exp.GroupConcat: rename_func("LISTAGG"), 791 exp.If: if_sql(name="IFF", false_value="NULL"), 792 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 793 exp.JSONExtractScalar: lambda self, e: self.func( 794 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 795 ), 796 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 797 exp.JSONPathRoot: lambda *_: "", 798 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 799 exp.LogicalOr: rename_func("BOOLOR_AGG"), 800 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 801 exp.Max: max_or_greatest, 802 exp.Min: min_or_least, 803 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 804 exp.PercentileCont: transforms.preprocess( 805 [transforms.add_within_group_for_percentiles] 806 ), 807 exp.PercentileDisc: transforms.preprocess( 808 [transforms.add_within_group_for_percentiles] 809 ), 810 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 811 exp.RegexpILike: _regexpilike_sql, 812 exp.Rand: rename_func("RANDOM"), 813 exp.Select: transforms.preprocess( 814 [ 815 transforms.eliminate_distinct_on, 816 transforms.explode_to_unnest(), 817 transforms.eliminate_semi_and_anti_joins, 818 ] 819 ), 820 exp.SHA: rename_func("SHA1"), 821 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 822 exp.StartsWith: rename_func("STARTSWITH"), 823 exp.StrPosition: lambda self, e: self.func( 824 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 825 ), 826 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 827 exp.Stuff: rename_func("INSERT"), 828 exp.TimeAdd: date_delta_sql("TIMEADD"), 829 exp.TimestampDiff: lambda self, e: self.func( 830 "TIMESTAMPDIFF", e.unit, e.expression, e.this 831 ), 832 exp.TimestampTrunc: timestamptrunc_sql, 833 exp.TimeStrToTime: timestrtotime_sql, 834 exp.TimeToStr: lambda self, e: self.func( 835 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 836 ), 837 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 838 exp.ToArray: rename_func("TO_ARRAY"), 839 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 840 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 841 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 842 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 843 exp.TsOrDsToDate: lambda self, e: self.func( 844 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 845 ), 846 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 847 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 848 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 849 exp.Xor: rename_func("BOOLXOR"), 850 } 851 852 SUPPORTED_JSON_PATH_PARTS = { 853 exp.JSONPathKey, 854 exp.JSONPathRoot, 855 exp.JSONPathSubscript, 856 } 857 858 TYPE_MAPPING = { 859 **generator.Generator.TYPE_MAPPING, 860 exp.DataType.Type.NESTED: "OBJECT", 861 exp.DataType.Type.STRUCT: "OBJECT", 862 } 863 864 STAR_MAPPING = { 865 "except": "EXCLUDE", 866 "replace": "RENAME", 867 } 868 869 PROPERTIES_LOCATION = { 870 **generator.Generator.PROPERTIES_LOCATION, 871 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 872 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 873 } 874 875 UNSUPPORTED_VALUES_EXPRESSIONS = { 876 exp.Map, 877 exp.StarMap, 878 exp.Struct, 879 exp.VarMap, 880 } 881 882 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 883 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 884 values_as_table = False 885 886 return super().values_sql(expression, values_as_table=values_as_table) 887 888 def datatype_sql(self, expression: exp.DataType) -> str: 889 expressions = expression.expressions 890 if ( 891 expressions 892 and expression.is_type(*exp.DataType.STRUCT_TYPES) 893 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 894 ): 895 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 896 return "OBJECT" 897 898 return super().datatype_sql(expression) 899 900 def tonumber_sql(self, expression: exp.ToNumber) -> str: 901 return self.func( 902 "TO_NUMBER", 903 expression.this, 904 expression.args.get("format"), 905 expression.args.get("precision"), 906 expression.args.get("scale"), 907 ) 908 909 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 910 milli = expression.args.get("milli") 911 if milli is not None: 912 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 913 expression.set("nano", milli_to_nano) 914 915 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 916 917 def trycast_sql(self, expression: exp.TryCast) -> str: 918 value = expression.this 919 920 if value.type is None: 921 from sqlglot.optimizer.annotate_types import annotate_types 922 923 value = annotate_types(value) 924 925 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 926 return super().trycast_sql(expression) 927 928 # TRY_CAST only works for string values in Snowflake 929 return self.cast_sql(expression) 930 931 def log_sql(self, expression: exp.Log) -> str: 932 if not expression.expression: 933 return self.func("LN", expression.this) 934 935 return super().log_sql(expression) 936 937 def unnest_sql(self, expression: exp.Unnest) -> str: 938 unnest_alias = expression.args.get("alias") 939 offset = expression.args.get("offset") 940 941 columns = [ 942 exp.to_identifier("seq"), 943 exp.to_identifier("key"), 944 exp.to_identifier("path"), 945 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 946 seq_get(unnest_alias.columns if unnest_alias else [], 0) 947 or exp.to_identifier("value"), 948 exp.to_identifier("this"), 949 ] 950 951 if unnest_alias: 952 unnest_alias.set("columns", columns) 953 else: 954 unnest_alias = exp.TableAlias(this="_u", columns=columns) 955 956 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 957 alias = self.sql(unnest_alias) 958 alias = f" AS {alias}" if alias else "" 959 return f"{explode}{alias}" 960 961 def show_sql(self, expression: exp.Show) -> str: 962 terse = "TERSE " if expression.args.get("terse") else "" 963 history = " HISTORY" if expression.args.get("history") else "" 964 like = self.sql(expression, "like") 965 like = f" LIKE {like}" if like else "" 966 967 scope = self.sql(expression, "scope") 968 scope = f" {scope}" if scope else "" 969 970 scope_kind = self.sql(expression, "scope_kind") 971 if scope_kind: 972 scope_kind = f" IN {scope_kind}" 973 974 starts_with = self.sql(expression, "starts_with") 975 if starts_with: 976 starts_with = f" STARTS WITH {starts_with}" 977 978 limit = self.sql(expression, "limit") 979 980 from_ = self.sql(expression, "from") 981 if from_: 982 from_ = f" FROM {from_}" 983 984 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 985 986 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 987 # Other dialects don't support all of the following parameters, so we need to 988 # generate default values as necessary to ensure the transpilation is correct 989 group = expression.args.get("group") 990 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 991 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 992 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 993 994 return self.func( 995 "REGEXP_SUBSTR", 996 expression.this, 997 expression.expression, 998 position, 999 occurrence, 1000 parameters, 1001 group, 1002 ) 1003 1004 def except_op(self, expression: exp.Except) -> str: 1005 if not expression.args.get("distinct"): 1006 self.unsupported("EXCEPT with All is not supported in Snowflake") 1007 return super().except_op(expression) 1008 1009 def intersect_op(self, expression: exp.Intersect) -> str: 1010 if not expression.args.get("distinct"): 1011 self.unsupported("INTERSECT with All is not supported in Snowflake") 1012 return super().intersect_op(expression) 1013 1014 def describe_sql(self, expression: exp.Describe) -> str: 1015 # Default to table if kind is unknown 1016 kind_value = expression.args.get("kind") or "TABLE" 1017 kind = f" {kind_value}" if kind_value else "" 1018 this = f" {self.sql(expression, 'this')}" 1019 expressions = self.expressions(expression, flat=True) 1020 expressions = f" {expressions}" if expressions else "" 1021 return f"DESCRIBE{kind}{this}{expressions}" 1022 1023 def generatedasidentitycolumnconstraint_sql( 1024 self, expression: exp.GeneratedAsIdentityColumnConstraint 1025 ) -> str: 1026 start = expression.args.get("start") 1027 start = f" START {start}" if start else "" 1028 increment = expression.args.get("increment") 1029 increment = f" INCREMENT {increment}" if increment else "" 1030 return f"AUTOINCREMENT{start}{increment}" 1031 1032 def swaptable_sql(self, expression: exp.SwapTable) -> str: 1033 this = self.sql(expression, "this") 1034 return f"SWAP WITH {this}" 1035 1036 def with_properties(self, properties: exp.Properties) -> str: 1037 return self.properties(properties, wrapped=False, prefix=self.seg(""), sep=" ") 1038 1039 def cluster_sql(self, expression: exp.Cluster) -> str: 1040 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1041 1042 def struct_sql(self, expression: exp.Struct) -> str: 1043 keys = [] 1044 values = [] 1045 1046 for i, e in enumerate(expression.expressions): 1047 if isinstance(e, exp.PropertyEQ): 1048 keys.append( 1049 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1050 ) 1051 values.append(e.expression) 1052 else: 1053 keys.append(exp.Literal.string(f"_{i}")) 1054 values.append(e) 1055 1056 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1057 1058 def copyparameter_sql(self, expression: exp.CopyParameter) -> str: 1059 option = self.sql(expression, "this").upper() 1060 if option == "FILE_FORMAT": 1061 values = self.expressions(expression, key="expression", flat=True, sep=" ") 1062 return f"{option} = ({values})" 1063 1064 return super().copyparameter_sql(expression) 1065 1066 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1067 if expression.args.get("weight") or expression.args.get("accuracy"): 1068 self.unsupported( 1069 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1070 ) 1071 1072 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile"))
Specifies the strategy according to which identifiers should be normalized.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Associates this dialect's time formats with their equivalent Python strftime
formats.
324 def quote_identifier(self, expression: E, identify: bool = True) -> E: 325 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 326 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 327 if ( 328 isinstance(expression, exp.Identifier) 329 and isinstance(expression.parent, exp.Table) 330 and expression.name.lower() == "dual" 331 ): 332 return expression # type: ignore 333 334 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier
, this method is a no-op. - identify: If set to
False
, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- get_or_raise
- format_time
- normalize_identifier
- case_sensitive
- can_identify
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
336 class Parser(parser.Parser): 337 IDENTIFY_PIVOT_STRINGS = True 338 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 339 340 ID_VAR_TOKENS = { 341 *parser.Parser.ID_VAR_TOKENS, 342 TokenType.MATCH_CONDITION, 343 } 344 345 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 346 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 347 348 FUNCTIONS = { 349 **parser.Parser.FUNCTIONS, 350 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 351 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 352 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 353 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 354 this=seq_get(args, 1), expression=seq_get(args, 0) 355 ), 356 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 357 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 358 start=seq_get(args, 0), 359 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 360 step=seq_get(args, 2), 361 ), 362 "BITXOR": binary_from_function(exp.BitwiseXor), 363 "BIT_XOR": binary_from_function(exp.BitwiseXor), 364 "BOOLXOR": binary_from_function(exp.Xor), 365 "CONVERT_TIMEZONE": _build_convert_timezone, 366 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 367 "DATE_TRUNC": _date_trunc_to_time, 368 "DATEADD": _build_date_time_add(exp.DateAdd), 369 "DATEDIFF": _build_datediff, 370 "DIV0": _build_if_from_div0, 371 "FLATTEN": exp.Explode.from_arg_list, 372 "GET_PATH": lambda args, dialect: exp.JSONExtract( 373 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 374 ), 375 "IFF": exp.If.from_arg_list, 376 "LAST_DAY": lambda args: exp.LastDay( 377 this=seq_get(args, 0), unit=_map_date_part(seq_get(args, 1)) 378 ), 379 "LISTAGG": exp.GroupConcat.from_arg_list, 380 "MEDIAN": lambda args: exp.PercentileCont( 381 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 382 ), 383 "NULLIFZERO": _build_if_from_nullifzero, 384 "OBJECT_CONSTRUCT": _build_object_construct, 385 "REGEXP_REPLACE": _build_regexp_replace, 386 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 387 "RLIKE": exp.RegexpLike.from_arg_list, 388 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 389 "TIMEADD": _build_date_time_add(exp.TimeAdd), 390 "TIMEDIFF": _build_datediff, 391 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 392 "TIMESTAMPDIFF": _build_datediff, 393 "TIMESTAMPFROMPARTS": _build_timestamp_from_parts, 394 "TIMESTAMP_FROM_PARTS": _build_timestamp_from_parts, 395 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 396 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 397 "TO_NUMBER": lambda args: exp.ToNumber( 398 this=seq_get(args, 0), 399 format=seq_get(args, 1), 400 precision=seq_get(args, 2), 401 scale=seq_get(args, 3), 402 ), 403 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 404 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 405 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 406 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 407 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 408 "TO_VARCHAR": exp.ToChar.from_arg_list, 409 "ZEROIFNULL": _build_if_from_zeroifnull, 410 } 411 412 FUNCTION_PARSERS = { 413 **parser.Parser.FUNCTION_PARSERS, 414 "DATE_PART": lambda self: self._parse_date_part(), 415 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 416 } 417 FUNCTION_PARSERS.pop("TRIM") 418 419 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 420 421 RANGE_PARSERS = { 422 **parser.Parser.RANGE_PARSERS, 423 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 424 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 425 } 426 427 ALTER_PARSERS = { 428 **parser.Parser.ALTER_PARSERS, 429 "SET": lambda self: self._parse_set(tag=self._match_text_seq("TAG")), 430 "UNSET": lambda self: self.expression( 431 exp.Set, 432 tag=self._match_text_seq("TAG"), 433 expressions=self._parse_csv(self._parse_id_var), 434 unset=True, 435 ), 436 "SWAP": lambda self: self._parse_alter_table_swap(), 437 } 438 439 STATEMENT_PARSERS = { 440 **parser.Parser.STATEMENT_PARSERS, 441 TokenType.SHOW: lambda self: self._parse_show(), 442 } 443 444 PROPERTY_PARSERS = { 445 **parser.Parser.PROPERTY_PARSERS, 446 "LOCATION": lambda self: self._parse_location_property(), 447 } 448 449 SHOW_PARSERS = { 450 "SCHEMAS": _show_parser("SCHEMAS"), 451 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 452 "OBJECTS": _show_parser("OBJECTS"), 453 "TERSE OBJECTS": _show_parser("OBJECTS"), 454 "TABLES": _show_parser("TABLES"), 455 "TERSE TABLES": _show_parser("TABLES"), 456 "VIEWS": _show_parser("VIEWS"), 457 "TERSE VIEWS": _show_parser("VIEWS"), 458 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 459 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 460 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 461 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 462 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 463 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 464 "SEQUENCES": _show_parser("SEQUENCES"), 465 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 466 "COLUMNS": _show_parser("COLUMNS"), 467 "USERS": _show_parser("USERS"), 468 "TERSE USERS": _show_parser("USERS"), 469 } 470 471 STAGED_FILE_SINGLE_TOKENS = { 472 TokenType.DOT, 473 TokenType.MOD, 474 TokenType.SLASH, 475 } 476 477 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 478 479 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 480 481 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 482 this = super()._parse_column_ops(this) 483 484 casts = [] 485 json_path = [] 486 487 while self._match(TokenType.COLON): 488 path = super()._parse_column_ops(self._parse_field(any_token=True)) 489 490 # The cast :: operator has a lower precedence than the extraction operator :, so 491 # we rearrange the AST appropriately to avoid casting the 2nd argument of GET_PATH 492 while isinstance(path, exp.Cast): 493 casts.append(path.to) 494 path = path.this 495 496 if path: 497 json_path.append(path.sql(dialect="snowflake", copy=False)) 498 499 if json_path: 500 this = self.expression( 501 exp.JSONExtract, 502 this=this, 503 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 504 ) 505 506 while casts: 507 this = self.expression(exp.Cast, this=this, to=casts.pop()) 508 509 return this 510 511 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 512 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 513 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 514 this = self._parse_var() or self._parse_type() 515 516 if not this: 517 return None 518 519 self._match(TokenType.COMMA) 520 expression = self._parse_bitwise() 521 this = _map_date_part(this) 522 name = this.name.upper() 523 524 if name.startswith("EPOCH"): 525 if name == "EPOCH_MILLISECOND": 526 scale = 10**3 527 elif name == "EPOCH_MICROSECOND": 528 scale = 10**6 529 elif name == "EPOCH_NANOSECOND": 530 scale = 10**9 531 else: 532 scale = None 533 534 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 535 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 536 537 if scale: 538 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 539 540 return to_unix 541 542 return self.expression(exp.Extract, this=this, expression=expression) 543 544 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 545 if is_map: 546 # Keys are strings in Snowflake's objects, see also: 547 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 548 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 549 return self._parse_slice(self._parse_string()) 550 551 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 552 553 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 554 lateral = super()._parse_lateral() 555 if not lateral: 556 return lateral 557 558 if isinstance(lateral.this, exp.Explode): 559 table_alias = lateral.args.get("alias") 560 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 561 if table_alias and not table_alias.args.get("columns"): 562 table_alias.set("columns", columns) 563 elif not table_alias: 564 exp.alias_(lateral, "_flattened", table=columns, copy=False) 565 566 return lateral 567 568 def _parse_at_before(self, table: exp.Table) -> exp.Table: 569 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 570 index = self._index 571 if self._match_texts(("AT", "BEFORE")): 572 this = self._prev.text.upper() 573 kind = ( 574 self._match(TokenType.L_PAREN) 575 and self._match_texts(self.HISTORICAL_DATA_KIND) 576 and self._prev.text.upper() 577 ) 578 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 579 580 if expression: 581 self._match_r_paren() 582 when = self.expression( 583 exp.HistoricalData, this=this, kind=kind, expression=expression 584 ) 585 table.set("when", when) 586 else: 587 self._retreat(index) 588 589 return table 590 591 def _parse_table_parts( 592 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 593 ) -> exp.Table: 594 # https://docs.snowflake.com/en/user-guide/querying-stage 595 if self._match(TokenType.STRING, advance=False): 596 table = self._parse_string() 597 elif self._match_text_seq("@", advance=False): 598 table = self._parse_location_path() 599 else: 600 table = None 601 602 if table: 603 file_format = None 604 pattern = None 605 606 wrapped = self._match(TokenType.L_PAREN) 607 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 608 if self._match_text_seq("FILE_FORMAT", "=>"): 609 file_format = self._parse_string() or super()._parse_table_parts( 610 is_db_reference=is_db_reference 611 ) 612 elif self._match_text_seq("PATTERN", "=>"): 613 pattern = self._parse_string() 614 else: 615 break 616 617 self._match(TokenType.COMMA) 618 619 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 620 else: 621 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 622 623 return self._parse_at_before(table) 624 625 def _parse_id_var( 626 self, 627 any_token: bool = True, 628 tokens: t.Optional[t.Collection[TokenType]] = None, 629 ) -> t.Optional[exp.Expression]: 630 if self._match_text_seq("IDENTIFIER", "("): 631 identifier = ( 632 super()._parse_id_var(any_token=any_token, tokens=tokens) 633 or self._parse_string() 634 ) 635 self._match_r_paren() 636 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 637 638 return super()._parse_id_var(any_token=any_token, tokens=tokens) 639 640 def _parse_show_snowflake(self, this: str) -> exp.Show: 641 scope = None 642 scope_kind = None 643 644 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 645 # which is syntactically valid but has no effect on the output 646 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 647 648 history = self._match_text_seq("HISTORY") 649 650 like = self._parse_string() if self._match(TokenType.LIKE) else None 651 652 if self._match(TokenType.IN): 653 if self._match_text_seq("ACCOUNT"): 654 scope_kind = "ACCOUNT" 655 elif self._match_set(self.DB_CREATABLES): 656 scope_kind = self._prev.text.upper() 657 if self._curr: 658 scope = self._parse_table_parts() 659 elif self._curr: 660 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 661 scope = self._parse_table_parts() 662 663 return self.expression( 664 exp.Show, 665 **{ 666 "terse": terse, 667 "this": this, 668 "history": history, 669 "like": like, 670 "scope": scope, 671 "scope_kind": scope_kind, 672 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 673 "limit": self._parse_limit(), 674 "from": self._parse_string() if self._match(TokenType.FROM) else None, 675 }, 676 ) 677 678 def _parse_alter_table_swap(self) -> exp.SwapTable: 679 self._match_text_seq("WITH") 680 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 681 682 def _parse_location_property(self) -> exp.LocationProperty: 683 self._match(TokenType.EQ) 684 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 685 686 def _parse_file_location(self) -> t.Optional[exp.Expression]: 687 # Parse either a subquery or a staged file 688 return ( 689 self._parse_select(table=True) 690 if self._match(TokenType.L_PAREN, advance=False) 691 else self._parse_table_parts() 692 ) 693 694 def _parse_location_path(self) -> exp.Var: 695 parts = [self._advance_any(ignore_reserved=True)] 696 697 # We avoid consuming a comma token because external tables like @foo and @bar 698 # can be joined in a query with a comma separator, as well as closing paren 699 # in case of subqueries 700 while self._is_connected() and not self._match_set( 701 (TokenType.COMMA, TokenType.R_PAREN), advance=False 702 ): 703 parts.append(self._advance_any(ignore_reserved=True)) 704 705 return exp.var("".join(part.text for part in parts if part))
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- CONSTRAINT_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
707 class Tokenizer(tokens.Tokenizer): 708 STRING_ESCAPES = ["\\", "'"] 709 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 710 RAW_STRINGS = ["$$"] 711 COMMENTS = ["--", "//", ("/*", "*/")] 712 713 KEYWORDS = { 714 **tokens.Tokenizer.KEYWORDS, 715 "BYTEINT": TokenType.INT, 716 "CHAR VARYING": TokenType.VARCHAR, 717 "CHARACTER VARYING": TokenType.VARCHAR, 718 "EXCLUDE": TokenType.EXCEPT, 719 "ILIKE ANY": TokenType.ILIKE_ANY, 720 "LIKE ANY": TokenType.LIKE_ANY, 721 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 722 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 723 "MINUS": TokenType.EXCEPT, 724 "NCHAR VARYING": TokenType.VARCHAR, 725 "PUT": TokenType.COMMAND, 726 "REMOVE": TokenType.COMMAND, 727 "RENAME": TokenType.REPLACE, 728 "RM": TokenType.COMMAND, 729 "SAMPLE": TokenType.TABLE_SAMPLE, 730 "SQL_DOUBLE": TokenType.DOUBLE, 731 "SQL_VARCHAR": TokenType.VARCHAR, 732 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 733 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 734 "TOP": TokenType.TOP, 735 } 736 737 SINGLE_TOKENS = { 738 **tokens.Tokenizer.SINGLE_TOKENS, 739 "$": TokenType.PARAMETER, 740 } 741 742 VAR_SINGLE_TOKENS = {"$"} 743 744 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
746 class Generator(generator.Generator): 747 PARAMETER_TOKEN = "$" 748 MATCHED_BY_SOURCE = False 749 SINGLE_STRING_INTERVAL = True 750 JOIN_HINTS = False 751 TABLE_HINTS = False 752 QUERY_HINTS = False 753 AGGREGATE_FILTER_SUPPORTED = False 754 SUPPORTS_TABLE_COPY = False 755 COLLATE_IS_FUNC = True 756 LIMIT_ONLY_LITERALS = True 757 JSON_KEY_VALUE_PAIR_SEP = "," 758 INSERT_OVERWRITE = " OVERWRITE INTO" 759 STRUCT_DELIMITER = ("(", ")") 760 COPY_PARAMS_ARE_WRAPPED = False 761 COPY_PARAMS_EQ_REQUIRED = True 762 763 TRANSFORMS = { 764 **generator.Generator.TRANSFORMS, 765 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 766 exp.ArgMax: rename_func("MAX_BY"), 767 exp.ArgMin: rename_func("MIN_BY"), 768 exp.Array: inline_array_sql, 769 exp.ArrayConcat: rename_func("ARRAY_CAT"), 770 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 771 exp.AtTimeZone: lambda self, e: self.func( 772 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 773 ), 774 exp.BitwiseXor: rename_func("BITXOR"), 775 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 776 exp.DateAdd: date_delta_sql("DATEADD"), 777 exp.DateDiff: date_delta_sql("DATEDIFF"), 778 exp.DateStrToDate: datestrtodate_sql, 779 exp.DayOfMonth: rename_func("DAYOFMONTH"), 780 exp.DayOfWeek: rename_func("DAYOFWEEK"), 781 exp.DayOfYear: rename_func("DAYOFYEAR"), 782 exp.Explode: rename_func("FLATTEN"), 783 exp.Extract: rename_func("DATE_PART"), 784 exp.FromTimeZone: lambda self, e: self.func( 785 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 786 ), 787 exp.GenerateSeries: lambda self, e: self.func( 788 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 789 ), 790 exp.GroupConcat: rename_func("LISTAGG"), 791 exp.If: if_sql(name="IFF", false_value="NULL"), 792 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 793 exp.JSONExtractScalar: lambda self, e: self.func( 794 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 795 ), 796 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 797 exp.JSONPathRoot: lambda *_: "", 798 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 799 exp.LogicalOr: rename_func("BOOLOR_AGG"), 800 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 801 exp.Max: max_or_greatest, 802 exp.Min: min_or_least, 803 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 804 exp.PercentileCont: transforms.preprocess( 805 [transforms.add_within_group_for_percentiles] 806 ), 807 exp.PercentileDisc: transforms.preprocess( 808 [transforms.add_within_group_for_percentiles] 809 ), 810 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 811 exp.RegexpILike: _regexpilike_sql, 812 exp.Rand: rename_func("RANDOM"), 813 exp.Select: transforms.preprocess( 814 [ 815 transforms.eliminate_distinct_on, 816 transforms.explode_to_unnest(), 817 transforms.eliminate_semi_and_anti_joins, 818 ] 819 ), 820 exp.SHA: rename_func("SHA1"), 821 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 822 exp.StartsWith: rename_func("STARTSWITH"), 823 exp.StrPosition: lambda self, e: self.func( 824 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 825 ), 826 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 827 exp.Stuff: rename_func("INSERT"), 828 exp.TimeAdd: date_delta_sql("TIMEADD"), 829 exp.TimestampDiff: lambda self, e: self.func( 830 "TIMESTAMPDIFF", e.unit, e.expression, e.this 831 ), 832 exp.TimestampTrunc: timestamptrunc_sql, 833 exp.TimeStrToTime: timestrtotime_sql, 834 exp.TimeToStr: lambda self, e: self.func( 835 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 836 ), 837 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 838 exp.ToArray: rename_func("TO_ARRAY"), 839 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 840 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 841 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 842 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 843 exp.TsOrDsToDate: lambda self, e: self.func( 844 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 845 ), 846 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 847 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 848 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 849 exp.Xor: rename_func("BOOLXOR"), 850 } 851 852 SUPPORTED_JSON_PATH_PARTS = { 853 exp.JSONPathKey, 854 exp.JSONPathRoot, 855 exp.JSONPathSubscript, 856 } 857 858 TYPE_MAPPING = { 859 **generator.Generator.TYPE_MAPPING, 860 exp.DataType.Type.NESTED: "OBJECT", 861 exp.DataType.Type.STRUCT: "OBJECT", 862 } 863 864 STAR_MAPPING = { 865 "except": "EXCLUDE", 866 "replace": "RENAME", 867 } 868 869 PROPERTIES_LOCATION = { 870 **generator.Generator.PROPERTIES_LOCATION, 871 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 872 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 873 } 874 875 UNSUPPORTED_VALUES_EXPRESSIONS = { 876 exp.Map, 877 exp.StarMap, 878 exp.Struct, 879 exp.VarMap, 880 } 881 882 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 883 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 884 values_as_table = False 885 886 return super().values_sql(expression, values_as_table=values_as_table) 887 888 def datatype_sql(self, expression: exp.DataType) -> str: 889 expressions = expression.expressions 890 if ( 891 expressions 892 and expression.is_type(*exp.DataType.STRUCT_TYPES) 893 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 894 ): 895 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 896 return "OBJECT" 897 898 return super().datatype_sql(expression) 899 900 def tonumber_sql(self, expression: exp.ToNumber) -> str: 901 return self.func( 902 "TO_NUMBER", 903 expression.this, 904 expression.args.get("format"), 905 expression.args.get("precision"), 906 expression.args.get("scale"), 907 ) 908 909 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 910 milli = expression.args.get("milli") 911 if milli is not None: 912 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 913 expression.set("nano", milli_to_nano) 914 915 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 916 917 def trycast_sql(self, expression: exp.TryCast) -> str: 918 value = expression.this 919 920 if value.type is None: 921 from sqlglot.optimizer.annotate_types import annotate_types 922 923 value = annotate_types(value) 924 925 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 926 return super().trycast_sql(expression) 927 928 # TRY_CAST only works for string values in Snowflake 929 return self.cast_sql(expression) 930 931 def log_sql(self, expression: exp.Log) -> str: 932 if not expression.expression: 933 return self.func("LN", expression.this) 934 935 return super().log_sql(expression) 936 937 def unnest_sql(self, expression: exp.Unnest) -> str: 938 unnest_alias = expression.args.get("alias") 939 offset = expression.args.get("offset") 940 941 columns = [ 942 exp.to_identifier("seq"), 943 exp.to_identifier("key"), 944 exp.to_identifier("path"), 945 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 946 seq_get(unnest_alias.columns if unnest_alias else [], 0) 947 or exp.to_identifier("value"), 948 exp.to_identifier("this"), 949 ] 950 951 if unnest_alias: 952 unnest_alias.set("columns", columns) 953 else: 954 unnest_alias = exp.TableAlias(this="_u", columns=columns) 955 956 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 957 alias = self.sql(unnest_alias) 958 alias = f" AS {alias}" if alias else "" 959 return f"{explode}{alias}" 960 961 def show_sql(self, expression: exp.Show) -> str: 962 terse = "TERSE " if expression.args.get("terse") else "" 963 history = " HISTORY" if expression.args.get("history") else "" 964 like = self.sql(expression, "like") 965 like = f" LIKE {like}" if like else "" 966 967 scope = self.sql(expression, "scope") 968 scope = f" {scope}" if scope else "" 969 970 scope_kind = self.sql(expression, "scope_kind") 971 if scope_kind: 972 scope_kind = f" IN {scope_kind}" 973 974 starts_with = self.sql(expression, "starts_with") 975 if starts_with: 976 starts_with = f" STARTS WITH {starts_with}" 977 978 limit = self.sql(expression, "limit") 979 980 from_ = self.sql(expression, "from") 981 if from_: 982 from_ = f" FROM {from_}" 983 984 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 985 986 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 987 # Other dialects don't support all of the following parameters, so we need to 988 # generate default values as necessary to ensure the transpilation is correct 989 group = expression.args.get("group") 990 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 991 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 992 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 993 994 return self.func( 995 "REGEXP_SUBSTR", 996 expression.this, 997 expression.expression, 998 position, 999 occurrence, 1000 parameters, 1001 group, 1002 ) 1003 1004 def except_op(self, expression: exp.Except) -> str: 1005 if not expression.args.get("distinct"): 1006 self.unsupported("EXCEPT with All is not supported in Snowflake") 1007 return super().except_op(expression) 1008 1009 def intersect_op(self, expression: exp.Intersect) -> str: 1010 if not expression.args.get("distinct"): 1011 self.unsupported("INTERSECT with All is not supported in Snowflake") 1012 return super().intersect_op(expression) 1013 1014 def describe_sql(self, expression: exp.Describe) -> str: 1015 # Default to table if kind is unknown 1016 kind_value = expression.args.get("kind") or "TABLE" 1017 kind = f" {kind_value}" if kind_value else "" 1018 this = f" {self.sql(expression, 'this')}" 1019 expressions = self.expressions(expression, flat=True) 1020 expressions = f" {expressions}" if expressions else "" 1021 return f"DESCRIBE{kind}{this}{expressions}" 1022 1023 def generatedasidentitycolumnconstraint_sql( 1024 self, expression: exp.GeneratedAsIdentityColumnConstraint 1025 ) -> str: 1026 start = expression.args.get("start") 1027 start = f" START {start}" if start else "" 1028 increment = expression.args.get("increment") 1029 increment = f" INCREMENT {increment}" if increment else "" 1030 return f"AUTOINCREMENT{start}{increment}" 1031 1032 def swaptable_sql(self, expression: exp.SwapTable) -> str: 1033 this = self.sql(expression, "this") 1034 return f"SWAP WITH {this}" 1035 1036 def with_properties(self, properties: exp.Properties) -> str: 1037 return self.properties(properties, wrapped=False, prefix=self.seg(""), sep=" ") 1038 1039 def cluster_sql(self, expression: exp.Cluster) -> str: 1040 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1041 1042 def struct_sql(self, expression: exp.Struct) -> str: 1043 keys = [] 1044 values = [] 1045 1046 for i, e in enumerate(expression.expressions): 1047 if isinstance(e, exp.PropertyEQ): 1048 keys.append( 1049 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1050 ) 1051 values.append(e.expression) 1052 else: 1053 keys.append(exp.Literal.string(f"_{i}")) 1054 values.append(e) 1055 1056 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1057 1058 def copyparameter_sql(self, expression: exp.CopyParameter) -> str: 1059 option = self.sql(expression, "this").upper() 1060 if option == "FILE_FORMAT": 1061 values = self.expressions(expression, key="expression", flat=True, sep=" ") 1062 return f"{option} = ({values})" 1063 1064 return super().copyparameter_sql(expression) 1065 1066 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1067 if expression.args.get("weight") or expression.args.get("accuracy"): 1068 self.unsupported( 1069 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1070 ) 1071 1072 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile"))
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
888 def datatype_sql(self, expression: exp.DataType) -> str: 889 expressions = expression.expressions 890 if ( 891 expressions 892 and expression.is_type(*exp.DataType.STRUCT_TYPES) 893 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 894 ): 895 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 896 return "OBJECT" 897 898 return super().datatype_sql(expression)
909 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 910 milli = expression.args.get("milli") 911 if milli is not None: 912 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 913 expression.set("nano", milli_to_nano) 914 915 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
917 def trycast_sql(self, expression: exp.TryCast) -> str: 918 value = expression.this 919 920 if value.type is None: 921 from sqlglot.optimizer.annotate_types import annotate_types 922 923 value = annotate_types(value) 924 925 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 926 return super().trycast_sql(expression) 927 928 # TRY_CAST only works for string values in Snowflake 929 return self.cast_sql(expression)
937 def unnest_sql(self, expression: exp.Unnest) -> str: 938 unnest_alias = expression.args.get("alias") 939 offset = expression.args.get("offset") 940 941 columns = [ 942 exp.to_identifier("seq"), 943 exp.to_identifier("key"), 944 exp.to_identifier("path"), 945 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 946 seq_get(unnest_alias.columns if unnest_alias else [], 0) 947 or exp.to_identifier("value"), 948 exp.to_identifier("this"), 949 ] 950 951 if unnest_alias: 952 unnest_alias.set("columns", columns) 953 else: 954 unnest_alias = exp.TableAlias(this="_u", columns=columns) 955 956 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 957 alias = self.sql(unnest_alias) 958 alias = f" AS {alias}" if alias else "" 959 return f"{explode}{alias}"
961 def show_sql(self, expression: exp.Show) -> str: 962 terse = "TERSE " if expression.args.get("terse") else "" 963 history = " HISTORY" if expression.args.get("history") else "" 964 like = self.sql(expression, "like") 965 like = f" LIKE {like}" if like else "" 966 967 scope = self.sql(expression, "scope") 968 scope = f" {scope}" if scope else "" 969 970 scope_kind = self.sql(expression, "scope_kind") 971 if scope_kind: 972 scope_kind = f" IN {scope_kind}" 973 974 starts_with = self.sql(expression, "starts_with") 975 if starts_with: 976 starts_with = f" STARTS WITH {starts_with}" 977 978 limit = self.sql(expression, "limit") 979 980 from_ = self.sql(expression, "from") 981 if from_: 982 from_ = f" FROM {from_}" 983 984 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}"
986 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 987 # Other dialects don't support all of the following parameters, so we need to 988 # generate default values as necessary to ensure the transpilation is correct 989 group = expression.args.get("group") 990 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 991 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 992 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 993 994 return self.func( 995 "REGEXP_SUBSTR", 996 expression.this, 997 expression.expression, 998 position, 999 occurrence, 1000 parameters, 1001 group, 1002 )
1014 def describe_sql(self, expression: exp.Describe) -> str: 1015 # Default to table if kind is unknown 1016 kind_value = expression.args.get("kind") or "TABLE" 1017 kind = f" {kind_value}" if kind_value else "" 1018 this = f" {self.sql(expression, 'this')}" 1019 expressions = self.expressions(expression, flat=True) 1020 expressions = f" {expressions}" if expressions else "" 1021 return f"DESCRIBE{kind}{this}{expressions}"
1023 def generatedasidentitycolumnconstraint_sql( 1024 self, expression: exp.GeneratedAsIdentityColumnConstraint 1025 ) -> str: 1026 start = expression.args.get("start") 1027 start = f" START {start}" if start else "" 1028 increment = expression.args.get("increment") 1029 increment = f" INCREMENT {increment}" if increment else "" 1030 return f"AUTOINCREMENT{start}{increment}"
1042 def struct_sql(self, expression: exp.Struct) -> str: 1043 keys = [] 1044 values = [] 1045 1046 for i, e in enumerate(expression.expressions): 1047 if isinstance(e, exp.PropertyEQ): 1048 keys.append( 1049 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1050 ) 1051 values.append(e.expression) 1052 else: 1053 keys.append(exp.Literal.string(f"_{i}")) 1054 values.append(e) 1055 1056 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
1058 def copyparameter_sql(self, expression: exp.CopyParameter) -> str: 1059 option = self.sql(expression, "this").upper() 1060 if option == "FILE_FORMAT": 1061 values = self.expressions(expression, key="expression", flat=True, sep=" ") 1062 return f"{option} = ({values})" 1063 1064 return super().copyparameter_sql(expression)
1066 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1067 if expression.args.get("weight") or expression.args.get("accuracy"): 1068 self.unsupported( 1069 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1070 ) 1071 1072 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile"))
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXPLICIT_UNION
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_TO_NUMBER
- OUTER_UNION_MODIFIERS
- COPY_HAS_INTO_KEYWORD
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- NAMED_PLACEHOLDER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- generateseries_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- credentials_sql
- copy_sql
- semicolon_sql