sqlglot.dialects.clickhouse
1from __future__ import annotations 2import typing as t 3import datetime 4from sqlglot import exp, generator, parser, tokens 5from sqlglot.dialects.dialect import ( 6 Dialect, 7 NormalizationStrategy, 8 arg_max_or_min_no_count, 9 build_date_delta, 10 build_formatted_time, 11 inline_array_sql, 12 json_extract_segments, 13 json_path_key_only_name, 14 length_or_char_length_sql, 15 no_pivot_sql, 16 build_json_extract_path, 17 rename_func, 18 sha256_sql, 19 var_map_sql, 20 timestamptrunc_sql, 21 unit_to_var, 22 trim_sql, 23) 24from sqlglot.generator import Generator 25from sqlglot.helper import is_int, seq_get 26from sqlglot.tokens import Token, TokenType 27from sqlglot.generator import unsupported_args 28 29DATEΤΙΜΕ_DELTA = t.Union[exp.DateAdd, exp.DateDiff, exp.DateSub, exp.TimestampSub, exp.TimestampAdd] 30 31 32def _build_date_format(args: t.List) -> exp.TimeToStr: 33 expr = build_formatted_time(exp.TimeToStr, "clickhouse")(args) 34 35 timezone = seq_get(args, 2) 36 if timezone: 37 expr.set("zone", timezone) 38 39 return expr 40 41 42def _unix_to_time_sql(self: ClickHouse.Generator, expression: exp.UnixToTime) -> str: 43 scale = expression.args.get("scale") 44 timestamp = expression.this 45 46 if scale in (None, exp.UnixToTime.SECONDS): 47 return self.func("fromUnixTimestamp", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 48 if scale == exp.UnixToTime.MILLIS: 49 return self.func("fromUnixTimestamp64Milli", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 50 if scale == exp.UnixToTime.MICROS: 51 return self.func("fromUnixTimestamp64Micro", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 52 if scale == exp.UnixToTime.NANOS: 53 return self.func("fromUnixTimestamp64Nano", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 54 55 return self.func( 56 "fromUnixTimestamp", 57 exp.cast( 58 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 59 ), 60 ) 61 62 63def _lower_func(sql: str) -> str: 64 index = sql.index("(") 65 return sql[:index].lower() + sql[index:] 66 67 68def _quantile_sql(self: ClickHouse.Generator, expression: exp.Quantile) -> str: 69 quantile = expression.args["quantile"] 70 args = f"({self.sql(expression, 'this')})" 71 72 if isinstance(quantile, exp.Array): 73 func = self.func("quantiles", *quantile) 74 else: 75 func = self.func("quantile", quantile) 76 77 return func + args 78 79 80def _build_count_if(args: t.List) -> exp.CountIf | exp.CombinedAggFunc: 81 if len(args) == 1: 82 return exp.CountIf(this=seq_get(args, 0)) 83 84 return exp.CombinedAggFunc(this="countIf", expressions=args, parts=("count", "If")) 85 86 87def _build_str_to_date(args: t.List) -> exp.Cast | exp.Anonymous: 88 if len(args) == 3: 89 return exp.Anonymous(this="STR_TO_DATE", expressions=args) 90 91 strtodate = exp.StrToDate.from_arg_list(args) 92 return exp.cast(strtodate, exp.DataType.build(exp.DataType.Type.DATETIME)) 93 94 95def _datetime_delta_sql(name: str) -> t.Callable[[Generator, DATEΤΙΜΕ_DELTA], str]: 96 def _delta_sql(self: Generator, expression: DATEΤΙΜΕ_DELTA) -> str: 97 if not expression.unit: 98 return rename_func(name)(self, expression) 99 100 return self.func( 101 name, 102 unit_to_var(expression), 103 expression.expression, 104 expression.this, 105 ) 106 107 return _delta_sql 108 109 110def _timestrtotime_sql(self: ClickHouse.Generator, expression: exp.TimeStrToTime): 111 ts = expression.this 112 113 tz = expression.args.get("zone") 114 if tz and isinstance(ts, exp.Literal): 115 # Clickhouse will not accept timestamps that include a UTC offset, so we must remove them. 116 # The first step to removing is parsing the string with `datetime.datetime.fromisoformat`. 117 # 118 # In python <3.11, `fromisoformat()` can only parse timestamps of millisecond (3 digit) 119 # or microsecond (6 digit) precision. It will error if passed any other number of fractional 120 # digits, so we extract the fractional seconds and pad to 6 digits before parsing. 121 ts_string = ts.name.strip() 122 123 # separate [date and time] from [fractional seconds and UTC offset] 124 ts_parts = ts_string.split(".") 125 if len(ts_parts) == 2: 126 # separate fractional seconds and UTC offset 127 offset_sep = "+" if "+" in ts_parts[1] else "-" 128 ts_frac_parts = ts_parts[1].split(offset_sep) 129 num_frac_parts = len(ts_frac_parts) 130 131 # pad to 6 digits if fractional seconds present 132 ts_frac_parts[0] = ts_frac_parts[0].ljust(6, "0") 133 ts_string = "".join( 134 [ 135 ts_parts[0], # date and time 136 ".", 137 ts_frac_parts[0], # fractional seconds 138 offset_sep if num_frac_parts > 1 else "", 139 ts_frac_parts[1] if num_frac_parts > 1 else "", # utc offset (if present) 140 ] 141 ) 142 143 # return literal with no timezone, eg turn '2020-01-01 12:13:14-08:00' into '2020-01-01 12:13:14' 144 # this is because Clickhouse encodes the timezone as a data type parameter and throws an error if 145 # it's part of the timestamp string 146 ts_without_tz = ( 147 datetime.datetime.fromisoformat(ts_string).replace(tzinfo=None).isoformat(sep=" ") 148 ) 149 ts = exp.Literal.string(ts_without_tz) 150 151 # Non-nullable DateTime64 with microsecond precision 152 expressions = [exp.DataTypeParam(this=tz)] if tz else [] 153 datatype = exp.DataType.build( 154 exp.DataType.Type.DATETIME64, 155 expressions=[exp.DataTypeParam(this=exp.Literal.number(6)), *expressions], 156 nullable=False, 157 ) 158 159 return self.sql(exp.cast(ts, datatype, dialect=self.dialect)) 160 161 162class ClickHouse(Dialect): 163 NORMALIZE_FUNCTIONS: bool | str = False 164 NULL_ORDERING = "nulls_are_last" 165 SUPPORTS_USER_DEFINED_TYPES = False 166 SAFE_DIVISION = True 167 LOG_BASE_FIRST: t.Optional[bool] = None 168 FORCE_EARLY_ALIAS_REF_EXPANSION = True 169 PRESERVE_ORIGINAL_NAMES = True 170 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 171 IDENTIFIERS_CAN_START_WITH_DIGIT = True 172 173 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 174 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 175 176 UNESCAPED_SEQUENCES = { 177 "\\0": "\0", 178 } 179 180 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 181 182 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 183 exp.Except: False, 184 exp.Intersect: False, 185 exp.Union: None, 186 } 187 188 class Tokenizer(tokens.Tokenizer): 189 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 190 IDENTIFIERS = ['"', "`"] 191 IDENTIFIER_ESCAPES = ["\\"] 192 STRING_ESCAPES = ["'", "\\"] 193 BIT_STRINGS = [("0b", "")] 194 HEX_STRINGS = [("0x", ""), ("0X", "")] 195 HEREDOC_STRINGS = ["$"] 196 197 KEYWORDS = { 198 **tokens.Tokenizer.KEYWORDS, 199 "ATTACH": TokenType.COMMAND, 200 "DATE32": TokenType.DATE32, 201 "DATETIME64": TokenType.DATETIME64, 202 "DICTIONARY": TokenType.DICTIONARY, 203 "ENUM8": TokenType.ENUM8, 204 "ENUM16": TokenType.ENUM16, 205 "FINAL": TokenType.FINAL, 206 "FIXEDSTRING": TokenType.FIXEDSTRING, 207 "FLOAT32": TokenType.FLOAT, 208 "FLOAT64": TokenType.DOUBLE, 209 "GLOBAL": TokenType.GLOBAL, 210 "INT256": TokenType.INT256, 211 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 212 "MAP": TokenType.MAP, 213 "NESTED": TokenType.NESTED, 214 "SAMPLE": TokenType.TABLE_SAMPLE, 215 "TUPLE": TokenType.STRUCT, 216 "UINT128": TokenType.UINT128, 217 "UINT16": TokenType.USMALLINT, 218 "UINT256": TokenType.UINT256, 219 "UINT32": TokenType.UINT, 220 "UINT64": TokenType.UBIGINT, 221 "UINT8": TokenType.UTINYINT, 222 "IPV4": TokenType.IPV4, 223 "IPV6": TokenType.IPV6, 224 "POINT": TokenType.POINT, 225 "RING": TokenType.RING, 226 "LINESTRING": TokenType.LINESTRING, 227 "MULTILINESTRING": TokenType.MULTILINESTRING, 228 "POLYGON": TokenType.POLYGON, 229 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 230 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 231 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 232 "SYSTEM": TokenType.COMMAND, 233 "PREWHERE": TokenType.PREWHERE, 234 } 235 KEYWORDS.pop("/*+") 236 237 SINGLE_TOKENS = { 238 **tokens.Tokenizer.SINGLE_TOKENS, 239 "$": TokenType.HEREDOC_STRING, 240 } 241 242 class Parser(parser.Parser): 243 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 244 # * select x from t1 union all select x from t2 limit 1; 245 # * select x from t1 union all (select x from t2 limit 1); 246 MODIFIERS_ATTACHED_TO_SET_OP = False 247 INTERVAL_SPANS = False 248 249 FUNCTIONS = { 250 **parser.Parser.FUNCTIONS, 251 "ANY": exp.AnyValue.from_arg_list, 252 "ARRAYSUM": exp.ArraySum.from_arg_list, 253 "COUNTIF": _build_count_if, 254 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 255 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 256 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 257 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 258 "DATE_FORMAT": _build_date_format, 259 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 260 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 261 "FORMATDATETIME": _build_date_format, 262 "JSONEXTRACTSTRING": build_json_extract_path( 263 exp.JSONExtractScalar, zero_based_indexing=False 264 ), 265 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 266 "MAP": parser.build_var_map, 267 "MATCH": exp.RegexpLike.from_arg_list, 268 "RANDCANONICAL": exp.Rand.from_arg_list, 269 "STR_TO_DATE": _build_str_to_date, 270 "TUPLE": exp.Struct.from_arg_list, 271 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 272 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 273 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 274 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 275 "UNIQ": exp.ApproxDistinct.from_arg_list, 276 "XOR": lambda args: exp.Xor(expressions=args), 277 "MD5": exp.MD5Digest.from_arg_list, 278 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 279 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 280 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 281 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 282 } 283 FUNCTIONS.pop("TRANSFORM") 284 285 AGG_FUNCTIONS = { 286 "count", 287 "min", 288 "max", 289 "sum", 290 "avg", 291 "any", 292 "stddevPop", 293 "stddevSamp", 294 "varPop", 295 "varSamp", 296 "corr", 297 "covarPop", 298 "covarSamp", 299 "entropy", 300 "exponentialMovingAverage", 301 "intervalLengthSum", 302 "kolmogorovSmirnovTest", 303 "mannWhitneyUTest", 304 "median", 305 "rankCorr", 306 "sumKahan", 307 "studentTTest", 308 "welchTTest", 309 "anyHeavy", 310 "anyLast", 311 "boundingRatio", 312 "first_value", 313 "last_value", 314 "argMin", 315 "argMax", 316 "avgWeighted", 317 "topK", 318 "topKWeighted", 319 "deltaSum", 320 "deltaSumTimestamp", 321 "groupArray", 322 "groupArrayLast", 323 "groupUniqArray", 324 "groupArrayInsertAt", 325 "groupArrayMovingAvg", 326 "groupArrayMovingSum", 327 "groupArraySample", 328 "groupBitAnd", 329 "groupBitOr", 330 "groupBitXor", 331 "groupBitmap", 332 "groupBitmapAnd", 333 "groupBitmapOr", 334 "groupBitmapXor", 335 "sumWithOverflow", 336 "sumMap", 337 "minMap", 338 "maxMap", 339 "skewSamp", 340 "skewPop", 341 "kurtSamp", 342 "kurtPop", 343 "uniq", 344 "uniqExact", 345 "uniqCombined", 346 "uniqCombined64", 347 "uniqHLL12", 348 "uniqTheta", 349 "quantile", 350 "quantiles", 351 "quantileExact", 352 "quantilesExact", 353 "quantileExactLow", 354 "quantilesExactLow", 355 "quantileExactHigh", 356 "quantilesExactHigh", 357 "quantileExactWeighted", 358 "quantilesExactWeighted", 359 "quantileTiming", 360 "quantilesTiming", 361 "quantileTimingWeighted", 362 "quantilesTimingWeighted", 363 "quantileDeterministic", 364 "quantilesDeterministic", 365 "quantileTDigest", 366 "quantilesTDigest", 367 "quantileTDigestWeighted", 368 "quantilesTDigestWeighted", 369 "quantileBFloat16", 370 "quantilesBFloat16", 371 "quantileBFloat16Weighted", 372 "quantilesBFloat16Weighted", 373 "simpleLinearRegression", 374 "stochasticLinearRegression", 375 "stochasticLogisticRegression", 376 "categoricalInformationValue", 377 "contingency", 378 "cramersV", 379 "cramersVBiasCorrected", 380 "theilsU", 381 "maxIntersections", 382 "maxIntersectionsPosition", 383 "meanZTest", 384 "quantileInterpolatedWeighted", 385 "quantilesInterpolatedWeighted", 386 "quantileGK", 387 "quantilesGK", 388 "sparkBar", 389 "sumCount", 390 "largestTriangleThreeBuckets", 391 "histogram", 392 "sequenceMatch", 393 "sequenceCount", 394 "windowFunnel", 395 "retention", 396 "uniqUpTo", 397 "sequenceNextNode", 398 "exponentialTimeDecayedAvg", 399 } 400 401 AGG_FUNCTIONS_SUFFIXES = [ 402 "If", 403 "Array", 404 "ArrayIf", 405 "Map", 406 "SimpleState", 407 "State", 408 "Merge", 409 "MergeState", 410 "ForEach", 411 "Distinct", 412 "OrDefault", 413 "OrNull", 414 "Resample", 415 "ArgMin", 416 "ArgMax", 417 ] 418 419 FUNC_TOKENS = { 420 *parser.Parser.FUNC_TOKENS, 421 TokenType.SET, 422 } 423 424 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 425 426 ID_VAR_TOKENS = { 427 *parser.Parser.ID_VAR_TOKENS, 428 TokenType.LIKE, 429 } 430 431 AGG_FUNC_MAPPING = ( 432 lambda functions, suffixes: { 433 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 434 } 435 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 436 437 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 438 439 FUNCTION_PARSERS = { 440 **parser.Parser.FUNCTION_PARSERS, 441 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 442 "QUANTILE": lambda self: self._parse_quantile(), 443 "MEDIAN": lambda self: self._parse_quantile(), 444 "COLUMNS": lambda self: self._parse_columns(), 445 } 446 447 FUNCTION_PARSERS.pop("MATCH") 448 449 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 450 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 451 452 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 453 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 454 455 RANGE_PARSERS = { 456 **parser.Parser.RANGE_PARSERS, 457 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 458 and self._parse_in(this, is_global=True), 459 } 460 461 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 462 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 463 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 464 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 465 466 JOIN_KINDS = { 467 *parser.Parser.JOIN_KINDS, 468 TokenType.ANY, 469 TokenType.ASOF, 470 TokenType.ARRAY, 471 } 472 473 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 474 TokenType.ANY, 475 TokenType.ARRAY, 476 TokenType.FINAL, 477 TokenType.FORMAT, 478 TokenType.SETTINGS, 479 } 480 481 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 482 TokenType.FORMAT, 483 } 484 485 LOG_DEFAULTS_TO_LN = True 486 487 QUERY_MODIFIER_PARSERS = { 488 **parser.Parser.QUERY_MODIFIER_PARSERS, 489 TokenType.SETTINGS: lambda self: ( 490 "settings", 491 self._advance() or self._parse_csv(self._parse_assignment), 492 ), 493 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 494 } 495 496 CONSTRAINT_PARSERS = { 497 **parser.Parser.CONSTRAINT_PARSERS, 498 "INDEX": lambda self: self._parse_index_constraint(), 499 "CODEC": lambda self: self._parse_compress(), 500 } 501 502 ALTER_PARSERS = { 503 **parser.Parser.ALTER_PARSERS, 504 "REPLACE": lambda self: self._parse_alter_table_replace(), 505 } 506 507 SCHEMA_UNNAMED_CONSTRAINTS = { 508 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 509 "INDEX", 510 } 511 512 PLACEHOLDER_PARSERS = { 513 **parser.Parser.PLACEHOLDER_PARSERS, 514 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 515 } 516 517 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 518 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 519 return self._parse_lambda() 520 521 def _parse_types( 522 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 523 ) -> t.Optional[exp.Expression]: 524 dtype = super()._parse_types( 525 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 526 ) 527 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 528 # Mark every type as non-nullable which is ClickHouse's default, unless it's 529 # already marked as nullable. This marker helps us transpile types from other 530 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 531 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 532 # fail in ClickHouse without the `Nullable` type constructor. 533 dtype.set("nullable", False) 534 535 return dtype 536 537 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 538 index = self._index 539 this = self._parse_bitwise() 540 if self._match(TokenType.FROM): 541 self._retreat(index) 542 return super()._parse_extract() 543 544 # We return Anonymous here because extract and regexpExtract have different semantics, 545 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 546 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 547 # 548 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 549 self._match(TokenType.COMMA) 550 return self.expression( 551 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 552 ) 553 554 def _parse_assignment(self) -> t.Optional[exp.Expression]: 555 this = super()._parse_assignment() 556 557 if self._match(TokenType.PLACEHOLDER): 558 return self.expression( 559 exp.If, 560 this=this, 561 true=self._parse_assignment(), 562 false=self._match(TokenType.COLON) and self._parse_assignment(), 563 ) 564 565 return this 566 567 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 568 """ 569 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 570 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 571 """ 572 index = self._index 573 574 this = self._parse_id_var() 575 self._match(TokenType.COLON) 576 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 577 self._match_text_seq("IDENTIFIER") and "Identifier" 578 ) 579 580 if not kind: 581 self._retreat(index) 582 return None 583 elif not self._match(TokenType.R_BRACE): 584 self.raise_error("Expecting }") 585 586 return self.expression(exp.Placeholder, this=this, kind=kind) 587 588 def _parse_bracket( 589 self, this: t.Optional[exp.Expression] = None 590 ) -> t.Optional[exp.Expression]: 591 l_brace = self._match(TokenType.L_BRACE, advance=False) 592 bracket = super()._parse_bracket(this) 593 594 if l_brace and isinstance(bracket, exp.Struct): 595 varmap = exp.VarMap(keys=exp.Array(), values=exp.Array()) 596 for expression in bracket.expressions: 597 if not isinstance(expression, exp.PropertyEQ): 598 break 599 600 varmap.args["keys"].append("expressions", exp.Literal.string(expression.name)) 601 varmap.args["values"].append("expressions", expression.expression) 602 603 return varmap 604 605 return bracket 606 607 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 608 this = super()._parse_in(this) 609 this.set("is_global", is_global) 610 return this 611 612 def _parse_table( 613 self, 614 schema: bool = False, 615 joins: bool = False, 616 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 617 parse_bracket: bool = False, 618 is_db_reference: bool = False, 619 parse_partition: bool = False, 620 ) -> t.Optional[exp.Expression]: 621 this = super()._parse_table( 622 schema=schema, 623 joins=joins, 624 alias_tokens=alias_tokens, 625 parse_bracket=parse_bracket, 626 is_db_reference=is_db_reference, 627 ) 628 629 if self._match(TokenType.FINAL): 630 this = self.expression(exp.Final, this=this) 631 632 return this 633 634 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 635 return super()._parse_position(haystack_first=True) 636 637 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 638 def _parse_cte(self) -> t.Optional[exp.CTE]: 639 # WITH <identifier> AS <subquery expression> 640 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 641 642 if not cte: 643 # WITH <expression> AS <identifier> 644 cte = self.expression( 645 exp.CTE, 646 this=self._parse_assignment(), 647 alias=self._parse_table_alias(), 648 scalar=True, 649 ) 650 651 return cte 652 653 def _parse_join_parts( 654 self, 655 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 656 is_global = self._match(TokenType.GLOBAL) and self._prev 657 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 658 659 if kind_pre: 660 kind = self._match_set(self.JOIN_KINDS) and self._prev 661 side = self._match_set(self.JOIN_SIDES) and self._prev 662 return is_global, side, kind 663 664 return ( 665 is_global, 666 self._match_set(self.JOIN_SIDES) and self._prev, 667 self._match_set(self.JOIN_KINDS) and self._prev, 668 ) 669 670 def _parse_join( 671 self, skip_join_token: bool = False, parse_bracket: bool = False 672 ) -> t.Optional[exp.Join]: 673 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 674 if join: 675 join.set("global", join.args.pop("method", None)) 676 677 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 678 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 679 if join.kind == "ARRAY": 680 for table in join.find_all(exp.Table): 681 table.replace(table.to_column()) 682 683 return join 684 685 def _parse_function( 686 self, 687 functions: t.Optional[t.Dict[str, t.Callable]] = None, 688 anonymous: bool = False, 689 optional_parens: bool = True, 690 any_token: bool = False, 691 ) -> t.Optional[exp.Expression]: 692 expr = super()._parse_function( 693 functions=functions, 694 anonymous=anonymous, 695 optional_parens=optional_parens, 696 any_token=any_token, 697 ) 698 699 func = expr.this if isinstance(expr, exp.Window) else expr 700 701 # Aggregate functions can be split in 2 parts: <func_name><suffix> 702 parts = ( 703 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 704 ) 705 706 if parts: 707 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 708 params = self._parse_func_params(anon_func) 709 710 kwargs = { 711 "this": anon_func.this, 712 "expressions": anon_func.expressions, 713 } 714 if parts[1]: 715 kwargs["parts"] = parts 716 exp_class: t.Type[exp.Expression] = ( 717 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 718 ) 719 else: 720 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 721 722 kwargs["exp_class"] = exp_class 723 if params: 724 kwargs["params"] = params 725 726 func = self.expression(**kwargs) 727 728 if isinstance(expr, exp.Window): 729 # The window's func was parsed as Anonymous in base parser, fix its 730 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 731 expr.set("this", func) 732 elif params: 733 # Params have blocked super()._parse_function() from parsing the following window 734 # (if that exists) as they're standing between the function call and the window spec 735 expr = self._parse_window(func) 736 else: 737 expr = func 738 739 return expr 740 741 def _parse_func_params( 742 self, this: t.Optional[exp.Func] = None 743 ) -> t.Optional[t.List[exp.Expression]]: 744 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 745 return self._parse_csv(self._parse_lambda) 746 747 if self._match(TokenType.L_PAREN): 748 params = self._parse_csv(self._parse_lambda) 749 self._match_r_paren(this) 750 return params 751 752 return None 753 754 def _parse_quantile(self) -> exp.Quantile: 755 this = self._parse_lambda() 756 params = self._parse_func_params() 757 if params: 758 return self.expression(exp.Quantile, this=params[0], quantile=this) 759 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 760 761 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 762 return super()._parse_wrapped_id_vars(optional=True) 763 764 def _parse_primary_key( 765 self, wrapped_optional: bool = False, in_props: bool = False 766 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 767 return super()._parse_primary_key( 768 wrapped_optional=wrapped_optional or in_props, in_props=in_props 769 ) 770 771 def _parse_on_property(self) -> t.Optional[exp.Expression]: 772 index = self._index 773 if self._match_text_seq("CLUSTER"): 774 this = self._parse_id_var() 775 if this: 776 return self.expression(exp.OnCluster, this=this) 777 else: 778 self._retreat(index) 779 return None 780 781 def _parse_index_constraint( 782 self, kind: t.Optional[str] = None 783 ) -> exp.IndexColumnConstraint: 784 # INDEX name1 expr TYPE type1(args) GRANULARITY value 785 this = self._parse_id_var() 786 expression = self._parse_assignment() 787 788 index_type = self._match_text_seq("TYPE") and ( 789 self._parse_function() or self._parse_var() 790 ) 791 792 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 793 794 return self.expression( 795 exp.IndexColumnConstraint, 796 this=this, 797 expression=expression, 798 index_type=index_type, 799 granularity=granularity, 800 ) 801 802 def _parse_partition(self) -> t.Optional[exp.Partition]: 803 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 804 if not self._match(TokenType.PARTITION): 805 return None 806 807 if self._match_text_seq("ID"): 808 # Corresponds to the PARTITION ID <string_value> syntax 809 expressions: t.List[exp.Expression] = [ 810 self.expression(exp.PartitionId, this=self._parse_string()) 811 ] 812 else: 813 expressions = self._parse_expressions() 814 815 return self.expression(exp.Partition, expressions=expressions) 816 817 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 818 partition = self._parse_partition() 819 820 if not partition or not self._match(TokenType.FROM): 821 return None 822 823 return self.expression( 824 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 825 ) 826 827 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 828 if not self._match_text_seq("PROJECTION"): 829 return None 830 831 return self.expression( 832 exp.ProjectionDef, 833 this=self._parse_id_var(), 834 expression=self._parse_wrapped(self._parse_statement), 835 ) 836 837 def _parse_constraint(self) -> t.Optional[exp.Expression]: 838 return super()._parse_constraint() or self._parse_projection_def() 839 840 def _parse_alias( 841 self, this: t.Optional[exp.Expression], explicit: bool = False 842 ) -> t.Optional[exp.Expression]: 843 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 844 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 845 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 846 return this 847 848 return super()._parse_alias(this=this, explicit=explicit) 849 850 def _parse_expression(self) -> t.Optional[exp.Expression]: 851 this = super()._parse_expression() 852 853 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 854 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 855 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 856 self._match(TokenType.R_PAREN) 857 858 return this 859 860 def _parse_columns(self) -> exp.Expression: 861 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 862 863 while self._next and self._match_text_seq(")", "APPLY", "("): 864 self._match(TokenType.R_PAREN) 865 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 866 return this 867 868 class Generator(generator.Generator): 869 QUERY_HINTS = False 870 STRUCT_DELIMITER = ("(", ")") 871 NVL2_SUPPORTED = False 872 TABLESAMPLE_REQUIRES_PARENS = False 873 TABLESAMPLE_SIZE_IS_ROWS = False 874 TABLESAMPLE_KEYWORDS = "SAMPLE" 875 LAST_DAY_SUPPORTS_DATE_PART = False 876 CAN_IMPLEMENT_ARRAY_ANY = True 877 SUPPORTS_TO_NUMBER = False 878 JOIN_HINTS = False 879 TABLE_HINTS = False 880 GROUPINGS_SEP = "" 881 SET_OP_MODIFIERS = False 882 SUPPORTS_TABLE_ALIAS_COLUMNS = False 883 VALUES_AS_TABLE = False 884 ARRAY_SIZE_NAME = "LENGTH" 885 886 STRING_TYPE_MAPPING = { 887 exp.DataType.Type.CHAR: "String", 888 exp.DataType.Type.LONGBLOB: "String", 889 exp.DataType.Type.LONGTEXT: "String", 890 exp.DataType.Type.MEDIUMBLOB: "String", 891 exp.DataType.Type.MEDIUMTEXT: "String", 892 exp.DataType.Type.TINYBLOB: "String", 893 exp.DataType.Type.TINYTEXT: "String", 894 exp.DataType.Type.TEXT: "String", 895 exp.DataType.Type.VARBINARY: "String", 896 exp.DataType.Type.VARCHAR: "String", 897 } 898 899 SUPPORTED_JSON_PATH_PARTS = { 900 exp.JSONPathKey, 901 exp.JSONPathRoot, 902 exp.JSONPathSubscript, 903 } 904 905 TYPE_MAPPING = { 906 **generator.Generator.TYPE_MAPPING, 907 **STRING_TYPE_MAPPING, 908 exp.DataType.Type.ARRAY: "Array", 909 exp.DataType.Type.BOOLEAN: "Bool", 910 exp.DataType.Type.BIGINT: "Int64", 911 exp.DataType.Type.DATE32: "Date32", 912 exp.DataType.Type.DATETIME: "DateTime", 913 exp.DataType.Type.DATETIME2: "DateTime", 914 exp.DataType.Type.SMALLDATETIME: "DateTime", 915 exp.DataType.Type.DATETIME64: "DateTime64", 916 exp.DataType.Type.DECIMAL: "Decimal", 917 exp.DataType.Type.DECIMAL32: "Decimal32", 918 exp.DataType.Type.DECIMAL64: "Decimal64", 919 exp.DataType.Type.DECIMAL128: "Decimal128", 920 exp.DataType.Type.DECIMAL256: "Decimal256", 921 exp.DataType.Type.TIMESTAMP: "DateTime", 922 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 923 exp.DataType.Type.DOUBLE: "Float64", 924 exp.DataType.Type.ENUM: "Enum", 925 exp.DataType.Type.ENUM8: "Enum8", 926 exp.DataType.Type.ENUM16: "Enum16", 927 exp.DataType.Type.FIXEDSTRING: "FixedString", 928 exp.DataType.Type.FLOAT: "Float32", 929 exp.DataType.Type.INT: "Int32", 930 exp.DataType.Type.MEDIUMINT: "Int32", 931 exp.DataType.Type.INT128: "Int128", 932 exp.DataType.Type.INT256: "Int256", 933 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 934 exp.DataType.Type.MAP: "Map", 935 exp.DataType.Type.NESTED: "Nested", 936 exp.DataType.Type.SMALLINT: "Int16", 937 exp.DataType.Type.STRUCT: "Tuple", 938 exp.DataType.Type.TINYINT: "Int8", 939 exp.DataType.Type.UBIGINT: "UInt64", 940 exp.DataType.Type.UINT: "UInt32", 941 exp.DataType.Type.UINT128: "UInt128", 942 exp.DataType.Type.UINT256: "UInt256", 943 exp.DataType.Type.USMALLINT: "UInt16", 944 exp.DataType.Type.UTINYINT: "UInt8", 945 exp.DataType.Type.IPV4: "IPv4", 946 exp.DataType.Type.IPV6: "IPv6", 947 exp.DataType.Type.POINT: "Point", 948 exp.DataType.Type.RING: "Ring", 949 exp.DataType.Type.LINESTRING: "LineString", 950 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 951 exp.DataType.Type.POLYGON: "Polygon", 952 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 953 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 954 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 955 } 956 957 TRANSFORMS = { 958 **generator.Generator.TRANSFORMS, 959 exp.AnyValue: rename_func("any"), 960 exp.ApproxDistinct: rename_func("uniq"), 961 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 962 exp.ArraySum: rename_func("arraySum"), 963 exp.ArgMax: arg_max_or_min_no_count("argMax"), 964 exp.ArgMin: arg_max_or_min_no_count("argMin"), 965 exp.Array: inline_array_sql, 966 exp.CastToStrType: rename_func("CAST"), 967 exp.CountIf: rename_func("countIf"), 968 exp.CompressColumnConstraint: lambda self, 969 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 970 exp.ComputedColumnConstraint: lambda self, 971 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 972 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 973 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 974 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 975 exp.DateStrToDate: rename_func("toDate"), 976 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 977 exp.Explode: rename_func("arrayJoin"), 978 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 979 exp.IsNan: rename_func("isNaN"), 980 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 981 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 982 exp.JSONPathKey: json_path_key_only_name, 983 exp.JSONPathRoot: lambda *_: "", 984 exp.Length: length_or_char_length_sql, 985 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 986 exp.Median: rename_func("median"), 987 exp.Nullif: rename_func("nullIf"), 988 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 989 exp.Pivot: no_pivot_sql, 990 exp.Quantile: _quantile_sql, 991 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 992 exp.Rand: rename_func("randCanonical"), 993 exp.StartsWith: rename_func("startsWith"), 994 exp.StrPosition: lambda self, e: self.func( 995 "position", e.this, e.args.get("substr"), e.args.get("position") 996 ), 997 exp.TimeToStr: lambda self, e: self.func( 998 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 999 ), 1000 exp.TimeStrToTime: _timestrtotime_sql, 1001 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 1002 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 1003 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 1004 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 1005 exp.MD5Digest: rename_func("MD5"), 1006 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 1007 exp.SHA: rename_func("SHA1"), 1008 exp.SHA2: sha256_sql, 1009 exp.UnixToTime: _unix_to_time_sql, 1010 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 1011 exp.Trim: trim_sql, 1012 exp.Variance: rename_func("varSamp"), 1013 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 1014 exp.Stddev: rename_func("stddevSamp"), 1015 exp.Chr: rename_func("CHAR"), 1016 exp.Lag: lambda self, e: self.func( 1017 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 1018 ), 1019 exp.Lead: lambda self, e: self.func( 1020 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 1021 ), 1022 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 1023 rename_func("editDistance") 1024 ), 1025 } 1026 1027 PROPERTIES_LOCATION = { 1028 **generator.Generator.PROPERTIES_LOCATION, 1029 exp.OnCluster: exp.Properties.Location.POST_NAME, 1030 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1031 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1032 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1033 } 1034 1035 # There's no list in docs, but it can be found in Clickhouse code 1036 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1037 ON_CLUSTER_TARGETS = { 1038 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1039 "DATABASE", 1040 "TABLE", 1041 "VIEW", 1042 "DICTIONARY", 1043 "INDEX", 1044 "FUNCTION", 1045 "NAMED COLLECTION", 1046 } 1047 1048 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1049 NON_NULLABLE_TYPES = { 1050 exp.DataType.Type.ARRAY, 1051 exp.DataType.Type.MAP, 1052 exp.DataType.Type.STRUCT, 1053 exp.DataType.Type.POINT, 1054 exp.DataType.Type.RING, 1055 exp.DataType.Type.LINESTRING, 1056 exp.DataType.Type.MULTILINESTRING, 1057 exp.DataType.Type.POLYGON, 1058 exp.DataType.Type.MULTIPOLYGON, 1059 } 1060 1061 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1062 strtodate_sql = self.function_fallback_sql(expression) 1063 1064 if not isinstance(expression.parent, exp.Cast): 1065 # StrToDate returns DATEs in other dialects (eg. postgres), so 1066 # this branch aims to improve the transpilation to clickhouse 1067 return f"CAST({strtodate_sql} AS DATE)" 1068 1069 return strtodate_sql 1070 1071 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1072 this = expression.this 1073 1074 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1075 return self.sql(this) 1076 1077 return super().cast_sql(expression, safe_prefix=safe_prefix) 1078 1079 def trycast_sql(self, expression: exp.TryCast) -> str: 1080 dtype = expression.to 1081 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1082 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1083 dtype.set("nullable", True) 1084 1085 return super().cast_sql(expression) 1086 1087 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1088 this = self.json_path_part(expression.this) 1089 return str(int(this) + 1) if is_int(this) else this 1090 1091 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1092 return f"AS {self.sql(expression, 'this')}" 1093 1094 def _any_to_has( 1095 self, 1096 expression: exp.EQ | exp.NEQ, 1097 default: t.Callable[[t.Any], str], 1098 prefix: str = "", 1099 ) -> str: 1100 if isinstance(expression.left, exp.Any): 1101 arr = expression.left 1102 this = expression.right 1103 elif isinstance(expression.right, exp.Any): 1104 arr = expression.right 1105 this = expression.left 1106 else: 1107 return default(expression) 1108 1109 return prefix + self.func("has", arr.this.unnest(), this) 1110 1111 def eq_sql(self, expression: exp.EQ) -> str: 1112 return self._any_to_has(expression, super().eq_sql) 1113 1114 def neq_sql(self, expression: exp.NEQ) -> str: 1115 return self._any_to_has(expression, super().neq_sql, "NOT ") 1116 1117 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1118 # Manually add a flag to make the search case-insensitive 1119 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1120 return self.func("match", expression.this, regex) 1121 1122 def datatype_sql(self, expression: exp.DataType) -> str: 1123 # String is the standard ClickHouse type, every other variant is just an alias. 1124 # Additionally, any supplied length parameter will be ignored. 1125 # 1126 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1127 if expression.this in self.STRING_TYPE_MAPPING: 1128 dtype = "String" 1129 else: 1130 dtype = super().datatype_sql(expression) 1131 1132 # This section changes the type to `Nullable(...)` if the following conditions hold: 1133 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1134 # and change their semantics 1135 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1136 # constraint: "Type of Map key must be a type, that can be represented by integer or 1137 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1138 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1139 parent = expression.parent 1140 nullable = expression.args.get("nullable") 1141 if nullable is True or ( 1142 nullable is None 1143 and not ( 1144 isinstance(parent, exp.DataType) 1145 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1146 and expression.index in (None, 0) 1147 ) 1148 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1149 ): 1150 dtype = f"Nullable({dtype})" 1151 1152 return dtype 1153 1154 def cte_sql(self, expression: exp.CTE) -> str: 1155 if expression.args.get("scalar"): 1156 this = self.sql(expression, "this") 1157 alias = self.sql(expression, "alias") 1158 return f"{this} AS {alias}" 1159 1160 return super().cte_sql(expression) 1161 1162 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1163 return super().after_limit_modifiers(expression) + [ 1164 ( 1165 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1166 if expression.args.get("settings") 1167 else "" 1168 ), 1169 ( 1170 self.seg("FORMAT ") + self.sql(expression, "format") 1171 if expression.args.get("format") 1172 else "" 1173 ), 1174 ] 1175 1176 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1177 params = self.expressions(expression, key="params", flat=True) 1178 return self.func(expression.name, *expression.expressions) + f"({params})" 1179 1180 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1181 return self.func(expression.name, *expression.expressions) 1182 1183 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1184 return self.anonymousaggfunc_sql(expression) 1185 1186 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1187 return self.parameterizedagg_sql(expression) 1188 1189 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1190 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1191 1192 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1193 return f"ON CLUSTER {self.sql(expression, 'this')}" 1194 1195 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1196 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1197 exp.Properties.Location.POST_NAME 1198 ): 1199 this_name = self.sql( 1200 expression.this if isinstance(expression.this, exp.Schema) else expression, 1201 "this", 1202 ) 1203 this_properties = " ".join( 1204 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1205 ) 1206 this_schema = self.schema_columns_sql(expression.this) 1207 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1208 1209 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1210 1211 return super().createable_sql(expression, locations) 1212 1213 def create_sql(self, expression: exp.Create) -> str: 1214 # The comment property comes last in CTAS statements, i.e. after the query 1215 query = expression.expression 1216 if isinstance(query, exp.Query): 1217 comment_prop = expression.find(exp.SchemaCommentProperty) 1218 if comment_prop: 1219 comment_prop.pop() 1220 query.replace(exp.paren(query)) 1221 else: 1222 comment_prop = None 1223 1224 create_sql = super().create_sql(expression) 1225 1226 comment_sql = self.sql(comment_prop) 1227 comment_sql = f" {comment_sql}" if comment_sql else "" 1228 1229 return f"{create_sql}{comment_sql}" 1230 1231 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1232 this = self.indent(self.sql(expression, "this")) 1233 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1234 1235 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1236 this = self.sql(expression, "this") 1237 this = f" {this}" if this else "" 1238 expr = self.sql(expression, "expression") 1239 expr = f" {expr}" if expr else "" 1240 index_type = self.sql(expression, "index_type") 1241 index_type = f" TYPE {index_type}" if index_type else "" 1242 granularity = self.sql(expression, "granularity") 1243 granularity = f" GRANULARITY {granularity}" if granularity else "" 1244 1245 return f"INDEX{this}{expr}{index_type}{granularity}" 1246 1247 def partition_sql(self, expression: exp.Partition) -> str: 1248 return f"PARTITION {self.expressions(expression, flat=True)}" 1249 1250 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1251 return f"ID {self.sql(expression.this)}" 1252 1253 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1254 return ( 1255 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1256 ) 1257 1258 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1259 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1260 1261 def is_sql(self, expression: exp.Is) -> str: 1262 is_sql = super().is_sql(expression) 1263 1264 if isinstance(expression.parent, exp.Not): 1265 # value IS NOT NULL -> NOT (value IS NULL) 1266 is_sql = self.wrap(is_sql) 1267 1268 return is_sql
163class ClickHouse(Dialect): 164 NORMALIZE_FUNCTIONS: bool | str = False 165 NULL_ORDERING = "nulls_are_last" 166 SUPPORTS_USER_DEFINED_TYPES = False 167 SAFE_DIVISION = True 168 LOG_BASE_FIRST: t.Optional[bool] = None 169 FORCE_EARLY_ALIAS_REF_EXPANSION = True 170 PRESERVE_ORIGINAL_NAMES = True 171 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 172 IDENTIFIERS_CAN_START_WITH_DIGIT = True 173 174 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 175 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 176 177 UNESCAPED_SEQUENCES = { 178 "\\0": "\0", 179 } 180 181 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 182 183 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 184 exp.Except: False, 185 exp.Intersect: False, 186 exp.Union: None, 187 } 188 189 class Tokenizer(tokens.Tokenizer): 190 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 191 IDENTIFIERS = ['"', "`"] 192 IDENTIFIER_ESCAPES = ["\\"] 193 STRING_ESCAPES = ["'", "\\"] 194 BIT_STRINGS = [("0b", "")] 195 HEX_STRINGS = [("0x", ""), ("0X", "")] 196 HEREDOC_STRINGS = ["$"] 197 198 KEYWORDS = { 199 **tokens.Tokenizer.KEYWORDS, 200 "ATTACH": TokenType.COMMAND, 201 "DATE32": TokenType.DATE32, 202 "DATETIME64": TokenType.DATETIME64, 203 "DICTIONARY": TokenType.DICTIONARY, 204 "ENUM8": TokenType.ENUM8, 205 "ENUM16": TokenType.ENUM16, 206 "FINAL": TokenType.FINAL, 207 "FIXEDSTRING": TokenType.FIXEDSTRING, 208 "FLOAT32": TokenType.FLOAT, 209 "FLOAT64": TokenType.DOUBLE, 210 "GLOBAL": TokenType.GLOBAL, 211 "INT256": TokenType.INT256, 212 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 213 "MAP": TokenType.MAP, 214 "NESTED": TokenType.NESTED, 215 "SAMPLE": TokenType.TABLE_SAMPLE, 216 "TUPLE": TokenType.STRUCT, 217 "UINT128": TokenType.UINT128, 218 "UINT16": TokenType.USMALLINT, 219 "UINT256": TokenType.UINT256, 220 "UINT32": TokenType.UINT, 221 "UINT64": TokenType.UBIGINT, 222 "UINT8": TokenType.UTINYINT, 223 "IPV4": TokenType.IPV4, 224 "IPV6": TokenType.IPV6, 225 "POINT": TokenType.POINT, 226 "RING": TokenType.RING, 227 "LINESTRING": TokenType.LINESTRING, 228 "MULTILINESTRING": TokenType.MULTILINESTRING, 229 "POLYGON": TokenType.POLYGON, 230 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 231 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 232 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 233 "SYSTEM": TokenType.COMMAND, 234 "PREWHERE": TokenType.PREWHERE, 235 } 236 KEYWORDS.pop("/*+") 237 238 SINGLE_TOKENS = { 239 **tokens.Tokenizer.SINGLE_TOKENS, 240 "$": TokenType.HEREDOC_STRING, 241 } 242 243 class Parser(parser.Parser): 244 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 245 # * select x from t1 union all select x from t2 limit 1; 246 # * select x from t1 union all (select x from t2 limit 1); 247 MODIFIERS_ATTACHED_TO_SET_OP = False 248 INTERVAL_SPANS = False 249 250 FUNCTIONS = { 251 **parser.Parser.FUNCTIONS, 252 "ANY": exp.AnyValue.from_arg_list, 253 "ARRAYSUM": exp.ArraySum.from_arg_list, 254 "COUNTIF": _build_count_if, 255 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 256 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 257 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 258 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 259 "DATE_FORMAT": _build_date_format, 260 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 261 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 262 "FORMATDATETIME": _build_date_format, 263 "JSONEXTRACTSTRING": build_json_extract_path( 264 exp.JSONExtractScalar, zero_based_indexing=False 265 ), 266 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 267 "MAP": parser.build_var_map, 268 "MATCH": exp.RegexpLike.from_arg_list, 269 "RANDCANONICAL": exp.Rand.from_arg_list, 270 "STR_TO_DATE": _build_str_to_date, 271 "TUPLE": exp.Struct.from_arg_list, 272 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 273 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 274 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 275 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 276 "UNIQ": exp.ApproxDistinct.from_arg_list, 277 "XOR": lambda args: exp.Xor(expressions=args), 278 "MD5": exp.MD5Digest.from_arg_list, 279 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 280 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 281 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 282 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 283 } 284 FUNCTIONS.pop("TRANSFORM") 285 286 AGG_FUNCTIONS = { 287 "count", 288 "min", 289 "max", 290 "sum", 291 "avg", 292 "any", 293 "stddevPop", 294 "stddevSamp", 295 "varPop", 296 "varSamp", 297 "corr", 298 "covarPop", 299 "covarSamp", 300 "entropy", 301 "exponentialMovingAverage", 302 "intervalLengthSum", 303 "kolmogorovSmirnovTest", 304 "mannWhitneyUTest", 305 "median", 306 "rankCorr", 307 "sumKahan", 308 "studentTTest", 309 "welchTTest", 310 "anyHeavy", 311 "anyLast", 312 "boundingRatio", 313 "first_value", 314 "last_value", 315 "argMin", 316 "argMax", 317 "avgWeighted", 318 "topK", 319 "topKWeighted", 320 "deltaSum", 321 "deltaSumTimestamp", 322 "groupArray", 323 "groupArrayLast", 324 "groupUniqArray", 325 "groupArrayInsertAt", 326 "groupArrayMovingAvg", 327 "groupArrayMovingSum", 328 "groupArraySample", 329 "groupBitAnd", 330 "groupBitOr", 331 "groupBitXor", 332 "groupBitmap", 333 "groupBitmapAnd", 334 "groupBitmapOr", 335 "groupBitmapXor", 336 "sumWithOverflow", 337 "sumMap", 338 "minMap", 339 "maxMap", 340 "skewSamp", 341 "skewPop", 342 "kurtSamp", 343 "kurtPop", 344 "uniq", 345 "uniqExact", 346 "uniqCombined", 347 "uniqCombined64", 348 "uniqHLL12", 349 "uniqTheta", 350 "quantile", 351 "quantiles", 352 "quantileExact", 353 "quantilesExact", 354 "quantileExactLow", 355 "quantilesExactLow", 356 "quantileExactHigh", 357 "quantilesExactHigh", 358 "quantileExactWeighted", 359 "quantilesExactWeighted", 360 "quantileTiming", 361 "quantilesTiming", 362 "quantileTimingWeighted", 363 "quantilesTimingWeighted", 364 "quantileDeterministic", 365 "quantilesDeterministic", 366 "quantileTDigest", 367 "quantilesTDigest", 368 "quantileTDigestWeighted", 369 "quantilesTDigestWeighted", 370 "quantileBFloat16", 371 "quantilesBFloat16", 372 "quantileBFloat16Weighted", 373 "quantilesBFloat16Weighted", 374 "simpleLinearRegression", 375 "stochasticLinearRegression", 376 "stochasticLogisticRegression", 377 "categoricalInformationValue", 378 "contingency", 379 "cramersV", 380 "cramersVBiasCorrected", 381 "theilsU", 382 "maxIntersections", 383 "maxIntersectionsPosition", 384 "meanZTest", 385 "quantileInterpolatedWeighted", 386 "quantilesInterpolatedWeighted", 387 "quantileGK", 388 "quantilesGK", 389 "sparkBar", 390 "sumCount", 391 "largestTriangleThreeBuckets", 392 "histogram", 393 "sequenceMatch", 394 "sequenceCount", 395 "windowFunnel", 396 "retention", 397 "uniqUpTo", 398 "sequenceNextNode", 399 "exponentialTimeDecayedAvg", 400 } 401 402 AGG_FUNCTIONS_SUFFIXES = [ 403 "If", 404 "Array", 405 "ArrayIf", 406 "Map", 407 "SimpleState", 408 "State", 409 "Merge", 410 "MergeState", 411 "ForEach", 412 "Distinct", 413 "OrDefault", 414 "OrNull", 415 "Resample", 416 "ArgMin", 417 "ArgMax", 418 ] 419 420 FUNC_TOKENS = { 421 *parser.Parser.FUNC_TOKENS, 422 TokenType.SET, 423 } 424 425 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 426 427 ID_VAR_TOKENS = { 428 *parser.Parser.ID_VAR_TOKENS, 429 TokenType.LIKE, 430 } 431 432 AGG_FUNC_MAPPING = ( 433 lambda functions, suffixes: { 434 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 435 } 436 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 437 438 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 439 440 FUNCTION_PARSERS = { 441 **parser.Parser.FUNCTION_PARSERS, 442 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 443 "QUANTILE": lambda self: self._parse_quantile(), 444 "MEDIAN": lambda self: self._parse_quantile(), 445 "COLUMNS": lambda self: self._parse_columns(), 446 } 447 448 FUNCTION_PARSERS.pop("MATCH") 449 450 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 451 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 452 453 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 454 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 455 456 RANGE_PARSERS = { 457 **parser.Parser.RANGE_PARSERS, 458 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 459 and self._parse_in(this, is_global=True), 460 } 461 462 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 463 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 464 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 465 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 466 467 JOIN_KINDS = { 468 *parser.Parser.JOIN_KINDS, 469 TokenType.ANY, 470 TokenType.ASOF, 471 TokenType.ARRAY, 472 } 473 474 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 475 TokenType.ANY, 476 TokenType.ARRAY, 477 TokenType.FINAL, 478 TokenType.FORMAT, 479 TokenType.SETTINGS, 480 } 481 482 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 483 TokenType.FORMAT, 484 } 485 486 LOG_DEFAULTS_TO_LN = True 487 488 QUERY_MODIFIER_PARSERS = { 489 **parser.Parser.QUERY_MODIFIER_PARSERS, 490 TokenType.SETTINGS: lambda self: ( 491 "settings", 492 self._advance() or self._parse_csv(self._parse_assignment), 493 ), 494 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 495 } 496 497 CONSTRAINT_PARSERS = { 498 **parser.Parser.CONSTRAINT_PARSERS, 499 "INDEX": lambda self: self._parse_index_constraint(), 500 "CODEC": lambda self: self._parse_compress(), 501 } 502 503 ALTER_PARSERS = { 504 **parser.Parser.ALTER_PARSERS, 505 "REPLACE": lambda self: self._parse_alter_table_replace(), 506 } 507 508 SCHEMA_UNNAMED_CONSTRAINTS = { 509 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 510 "INDEX", 511 } 512 513 PLACEHOLDER_PARSERS = { 514 **parser.Parser.PLACEHOLDER_PARSERS, 515 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 516 } 517 518 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 519 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 520 return self._parse_lambda() 521 522 def _parse_types( 523 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 524 ) -> t.Optional[exp.Expression]: 525 dtype = super()._parse_types( 526 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 527 ) 528 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 529 # Mark every type as non-nullable which is ClickHouse's default, unless it's 530 # already marked as nullable. This marker helps us transpile types from other 531 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 532 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 533 # fail in ClickHouse without the `Nullable` type constructor. 534 dtype.set("nullable", False) 535 536 return dtype 537 538 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 539 index = self._index 540 this = self._parse_bitwise() 541 if self._match(TokenType.FROM): 542 self._retreat(index) 543 return super()._parse_extract() 544 545 # We return Anonymous here because extract and regexpExtract have different semantics, 546 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 547 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 548 # 549 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 550 self._match(TokenType.COMMA) 551 return self.expression( 552 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 553 ) 554 555 def _parse_assignment(self) -> t.Optional[exp.Expression]: 556 this = super()._parse_assignment() 557 558 if self._match(TokenType.PLACEHOLDER): 559 return self.expression( 560 exp.If, 561 this=this, 562 true=self._parse_assignment(), 563 false=self._match(TokenType.COLON) and self._parse_assignment(), 564 ) 565 566 return this 567 568 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 569 """ 570 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 571 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 572 """ 573 index = self._index 574 575 this = self._parse_id_var() 576 self._match(TokenType.COLON) 577 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 578 self._match_text_seq("IDENTIFIER") and "Identifier" 579 ) 580 581 if not kind: 582 self._retreat(index) 583 return None 584 elif not self._match(TokenType.R_BRACE): 585 self.raise_error("Expecting }") 586 587 return self.expression(exp.Placeholder, this=this, kind=kind) 588 589 def _parse_bracket( 590 self, this: t.Optional[exp.Expression] = None 591 ) -> t.Optional[exp.Expression]: 592 l_brace = self._match(TokenType.L_BRACE, advance=False) 593 bracket = super()._parse_bracket(this) 594 595 if l_brace and isinstance(bracket, exp.Struct): 596 varmap = exp.VarMap(keys=exp.Array(), values=exp.Array()) 597 for expression in bracket.expressions: 598 if not isinstance(expression, exp.PropertyEQ): 599 break 600 601 varmap.args["keys"].append("expressions", exp.Literal.string(expression.name)) 602 varmap.args["values"].append("expressions", expression.expression) 603 604 return varmap 605 606 return bracket 607 608 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 609 this = super()._parse_in(this) 610 this.set("is_global", is_global) 611 return this 612 613 def _parse_table( 614 self, 615 schema: bool = False, 616 joins: bool = False, 617 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 618 parse_bracket: bool = False, 619 is_db_reference: bool = False, 620 parse_partition: bool = False, 621 ) -> t.Optional[exp.Expression]: 622 this = super()._parse_table( 623 schema=schema, 624 joins=joins, 625 alias_tokens=alias_tokens, 626 parse_bracket=parse_bracket, 627 is_db_reference=is_db_reference, 628 ) 629 630 if self._match(TokenType.FINAL): 631 this = self.expression(exp.Final, this=this) 632 633 return this 634 635 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 636 return super()._parse_position(haystack_first=True) 637 638 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 639 def _parse_cte(self) -> t.Optional[exp.CTE]: 640 # WITH <identifier> AS <subquery expression> 641 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 642 643 if not cte: 644 # WITH <expression> AS <identifier> 645 cte = self.expression( 646 exp.CTE, 647 this=self._parse_assignment(), 648 alias=self._parse_table_alias(), 649 scalar=True, 650 ) 651 652 return cte 653 654 def _parse_join_parts( 655 self, 656 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 657 is_global = self._match(TokenType.GLOBAL) and self._prev 658 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 659 660 if kind_pre: 661 kind = self._match_set(self.JOIN_KINDS) and self._prev 662 side = self._match_set(self.JOIN_SIDES) and self._prev 663 return is_global, side, kind 664 665 return ( 666 is_global, 667 self._match_set(self.JOIN_SIDES) and self._prev, 668 self._match_set(self.JOIN_KINDS) and self._prev, 669 ) 670 671 def _parse_join( 672 self, skip_join_token: bool = False, parse_bracket: bool = False 673 ) -> t.Optional[exp.Join]: 674 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 675 if join: 676 join.set("global", join.args.pop("method", None)) 677 678 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 679 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 680 if join.kind == "ARRAY": 681 for table in join.find_all(exp.Table): 682 table.replace(table.to_column()) 683 684 return join 685 686 def _parse_function( 687 self, 688 functions: t.Optional[t.Dict[str, t.Callable]] = None, 689 anonymous: bool = False, 690 optional_parens: bool = True, 691 any_token: bool = False, 692 ) -> t.Optional[exp.Expression]: 693 expr = super()._parse_function( 694 functions=functions, 695 anonymous=anonymous, 696 optional_parens=optional_parens, 697 any_token=any_token, 698 ) 699 700 func = expr.this if isinstance(expr, exp.Window) else expr 701 702 # Aggregate functions can be split in 2 parts: <func_name><suffix> 703 parts = ( 704 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 705 ) 706 707 if parts: 708 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 709 params = self._parse_func_params(anon_func) 710 711 kwargs = { 712 "this": anon_func.this, 713 "expressions": anon_func.expressions, 714 } 715 if parts[1]: 716 kwargs["parts"] = parts 717 exp_class: t.Type[exp.Expression] = ( 718 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 719 ) 720 else: 721 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 722 723 kwargs["exp_class"] = exp_class 724 if params: 725 kwargs["params"] = params 726 727 func = self.expression(**kwargs) 728 729 if isinstance(expr, exp.Window): 730 # The window's func was parsed as Anonymous in base parser, fix its 731 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 732 expr.set("this", func) 733 elif params: 734 # Params have blocked super()._parse_function() from parsing the following window 735 # (if that exists) as they're standing between the function call and the window spec 736 expr = self._parse_window(func) 737 else: 738 expr = func 739 740 return expr 741 742 def _parse_func_params( 743 self, this: t.Optional[exp.Func] = None 744 ) -> t.Optional[t.List[exp.Expression]]: 745 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 746 return self._parse_csv(self._parse_lambda) 747 748 if self._match(TokenType.L_PAREN): 749 params = self._parse_csv(self._parse_lambda) 750 self._match_r_paren(this) 751 return params 752 753 return None 754 755 def _parse_quantile(self) -> exp.Quantile: 756 this = self._parse_lambda() 757 params = self._parse_func_params() 758 if params: 759 return self.expression(exp.Quantile, this=params[0], quantile=this) 760 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 761 762 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 763 return super()._parse_wrapped_id_vars(optional=True) 764 765 def _parse_primary_key( 766 self, wrapped_optional: bool = False, in_props: bool = False 767 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 768 return super()._parse_primary_key( 769 wrapped_optional=wrapped_optional or in_props, in_props=in_props 770 ) 771 772 def _parse_on_property(self) -> t.Optional[exp.Expression]: 773 index = self._index 774 if self._match_text_seq("CLUSTER"): 775 this = self._parse_id_var() 776 if this: 777 return self.expression(exp.OnCluster, this=this) 778 else: 779 self._retreat(index) 780 return None 781 782 def _parse_index_constraint( 783 self, kind: t.Optional[str] = None 784 ) -> exp.IndexColumnConstraint: 785 # INDEX name1 expr TYPE type1(args) GRANULARITY value 786 this = self._parse_id_var() 787 expression = self._parse_assignment() 788 789 index_type = self._match_text_seq("TYPE") and ( 790 self._parse_function() or self._parse_var() 791 ) 792 793 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 794 795 return self.expression( 796 exp.IndexColumnConstraint, 797 this=this, 798 expression=expression, 799 index_type=index_type, 800 granularity=granularity, 801 ) 802 803 def _parse_partition(self) -> t.Optional[exp.Partition]: 804 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 805 if not self._match(TokenType.PARTITION): 806 return None 807 808 if self._match_text_seq("ID"): 809 # Corresponds to the PARTITION ID <string_value> syntax 810 expressions: t.List[exp.Expression] = [ 811 self.expression(exp.PartitionId, this=self._parse_string()) 812 ] 813 else: 814 expressions = self._parse_expressions() 815 816 return self.expression(exp.Partition, expressions=expressions) 817 818 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 819 partition = self._parse_partition() 820 821 if not partition or not self._match(TokenType.FROM): 822 return None 823 824 return self.expression( 825 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 826 ) 827 828 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 829 if not self._match_text_seq("PROJECTION"): 830 return None 831 832 return self.expression( 833 exp.ProjectionDef, 834 this=self._parse_id_var(), 835 expression=self._parse_wrapped(self._parse_statement), 836 ) 837 838 def _parse_constraint(self) -> t.Optional[exp.Expression]: 839 return super()._parse_constraint() or self._parse_projection_def() 840 841 def _parse_alias( 842 self, this: t.Optional[exp.Expression], explicit: bool = False 843 ) -> t.Optional[exp.Expression]: 844 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 845 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 846 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 847 return this 848 849 return super()._parse_alias(this=this, explicit=explicit) 850 851 def _parse_expression(self) -> t.Optional[exp.Expression]: 852 this = super()._parse_expression() 853 854 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 855 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 856 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 857 self._match(TokenType.R_PAREN) 858 859 return this 860 861 def _parse_columns(self) -> exp.Expression: 862 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 863 864 while self._next and self._match_text_seq(")", "APPLY", "("): 865 self._match(TokenType.R_PAREN) 866 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 867 return this 868 869 class Generator(generator.Generator): 870 QUERY_HINTS = False 871 STRUCT_DELIMITER = ("(", ")") 872 NVL2_SUPPORTED = False 873 TABLESAMPLE_REQUIRES_PARENS = False 874 TABLESAMPLE_SIZE_IS_ROWS = False 875 TABLESAMPLE_KEYWORDS = "SAMPLE" 876 LAST_DAY_SUPPORTS_DATE_PART = False 877 CAN_IMPLEMENT_ARRAY_ANY = True 878 SUPPORTS_TO_NUMBER = False 879 JOIN_HINTS = False 880 TABLE_HINTS = False 881 GROUPINGS_SEP = "" 882 SET_OP_MODIFIERS = False 883 SUPPORTS_TABLE_ALIAS_COLUMNS = False 884 VALUES_AS_TABLE = False 885 ARRAY_SIZE_NAME = "LENGTH" 886 887 STRING_TYPE_MAPPING = { 888 exp.DataType.Type.CHAR: "String", 889 exp.DataType.Type.LONGBLOB: "String", 890 exp.DataType.Type.LONGTEXT: "String", 891 exp.DataType.Type.MEDIUMBLOB: "String", 892 exp.DataType.Type.MEDIUMTEXT: "String", 893 exp.DataType.Type.TINYBLOB: "String", 894 exp.DataType.Type.TINYTEXT: "String", 895 exp.DataType.Type.TEXT: "String", 896 exp.DataType.Type.VARBINARY: "String", 897 exp.DataType.Type.VARCHAR: "String", 898 } 899 900 SUPPORTED_JSON_PATH_PARTS = { 901 exp.JSONPathKey, 902 exp.JSONPathRoot, 903 exp.JSONPathSubscript, 904 } 905 906 TYPE_MAPPING = { 907 **generator.Generator.TYPE_MAPPING, 908 **STRING_TYPE_MAPPING, 909 exp.DataType.Type.ARRAY: "Array", 910 exp.DataType.Type.BOOLEAN: "Bool", 911 exp.DataType.Type.BIGINT: "Int64", 912 exp.DataType.Type.DATE32: "Date32", 913 exp.DataType.Type.DATETIME: "DateTime", 914 exp.DataType.Type.DATETIME2: "DateTime", 915 exp.DataType.Type.SMALLDATETIME: "DateTime", 916 exp.DataType.Type.DATETIME64: "DateTime64", 917 exp.DataType.Type.DECIMAL: "Decimal", 918 exp.DataType.Type.DECIMAL32: "Decimal32", 919 exp.DataType.Type.DECIMAL64: "Decimal64", 920 exp.DataType.Type.DECIMAL128: "Decimal128", 921 exp.DataType.Type.DECIMAL256: "Decimal256", 922 exp.DataType.Type.TIMESTAMP: "DateTime", 923 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 924 exp.DataType.Type.DOUBLE: "Float64", 925 exp.DataType.Type.ENUM: "Enum", 926 exp.DataType.Type.ENUM8: "Enum8", 927 exp.DataType.Type.ENUM16: "Enum16", 928 exp.DataType.Type.FIXEDSTRING: "FixedString", 929 exp.DataType.Type.FLOAT: "Float32", 930 exp.DataType.Type.INT: "Int32", 931 exp.DataType.Type.MEDIUMINT: "Int32", 932 exp.DataType.Type.INT128: "Int128", 933 exp.DataType.Type.INT256: "Int256", 934 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 935 exp.DataType.Type.MAP: "Map", 936 exp.DataType.Type.NESTED: "Nested", 937 exp.DataType.Type.SMALLINT: "Int16", 938 exp.DataType.Type.STRUCT: "Tuple", 939 exp.DataType.Type.TINYINT: "Int8", 940 exp.DataType.Type.UBIGINT: "UInt64", 941 exp.DataType.Type.UINT: "UInt32", 942 exp.DataType.Type.UINT128: "UInt128", 943 exp.DataType.Type.UINT256: "UInt256", 944 exp.DataType.Type.USMALLINT: "UInt16", 945 exp.DataType.Type.UTINYINT: "UInt8", 946 exp.DataType.Type.IPV4: "IPv4", 947 exp.DataType.Type.IPV6: "IPv6", 948 exp.DataType.Type.POINT: "Point", 949 exp.DataType.Type.RING: "Ring", 950 exp.DataType.Type.LINESTRING: "LineString", 951 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 952 exp.DataType.Type.POLYGON: "Polygon", 953 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 954 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 955 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 956 } 957 958 TRANSFORMS = { 959 **generator.Generator.TRANSFORMS, 960 exp.AnyValue: rename_func("any"), 961 exp.ApproxDistinct: rename_func("uniq"), 962 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 963 exp.ArraySum: rename_func("arraySum"), 964 exp.ArgMax: arg_max_or_min_no_count("argMax"), 965 exp.ArgMin: arg_max_or_min_no_count("argMin"), 966 exp.Array: inline_array_sql, 967 exp.CastToStrType: rename_func("CAST"), 968 exp.CountIf: rename_func("countIf"), 969 exp.CompressColumnConstraint: lambda self, 970 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 971 exp.ComputedColumnConstraint: lambda self, 972 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 973 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 974 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 975 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 976 exp.DateStrToDate: rename_func("toDate"), 977 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 978 exp.Explode: rename_func("arrayJoin"), 979 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 980 exp.IsNan: rename_func("isNaN"), 981 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 982 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 983 exp.JSONPathKey: json_path_key_only_name, 984 exp.JSONPathRoot: lambda *_: "", 985 exp.Length: length_or_char_length_sql, 986 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 987 exp.Median: rename_func("median"), 988 exp.Nullif: rename_func("nullIf"), 989 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 990 exp.Pivot: no_pivot_sql, 991 exp.Quantile: _quantile_sql, 992 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 993 exp.Rand: rename_func("randCanonical"), 994 exp.StartsWith: rename_func("startsWith"), 995 exp.StrPosition: lambda self, e: self.func( 996 "position", e.this, e.args.get("substr"), e.args.get("position") 997 ), 998 exp.TimeToStr: lambda self, e: self.func( 999 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 1000 ), 1001 exp.TimeStrToTime: _timestrtotime_sql, 1002 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 1003 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 1004 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 1005 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 1006 exp.MD5Digest: rename_func("MD5"), 1007 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 1008 exp.SHA: rename_func("SHA1"), 1009 exp.SHA2: sha256_sql, 1010 exp.UnixToTime: _unix_to_time_sql, 1011 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 1012 exp.Trim: trim_sql, 1013 exp.Variance: rename_func("varSamp"), 1014 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 1015 exp.Stddev: rename_func("stddevSamp"), 1016 exp.Chr: rename_func("CHAR"), 1017 exp.Lag: lambda self, e: self.func( 1018 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 1019 ), 1020 exp.Lead: lambda self, e: self.func( 1021 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 1022 ), 1023 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 1024 rename_func("editDistance") 1025 ), 1026 } 1027 1028 PROPERTIES_LOCATION = { 1029 **generator.Generator.PROPERTIES_LOCATION, 1030 exp.OnCluster: exp.Properties.Location.POST_NAME, 1031 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1032 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1033 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1034 } 1035 1036 # There's no list in docs, but it can be found in Clickhouse code 1037 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1038 ON_CLUSTER_TARGETS = { 1039 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1040 "DATABASE", 1041 "TABLE", 1042 "VIEW", 1043 "DICTIONARY", 1044 "INDEX", 1045 "FUNCTION", 1046 "NAMED COLLECTION", 1047 } 1048 1049 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1050 NON_NULLABLE_TYPES = { 1051 exp.DataType.Type.ARRAY, 1052 exp.DataType.Type.MAP, 1053 exp.DataType.Type.STRUCT, 1054 exp.DataType.Type.POINT, 1055 exp.DataType.Type.RING, 1056 exp.DataType.Type.LINESTRING, 1057 exp.DataType.Type.MULTILINESTRING, 1058 exp.DataType.Type.POLYGON, 1059 exp.DataType.Type.MULTIPOLYGON, 1060 } 1061 1062 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1063 strtodate_sql = self.function_fallback_sql(expression) 1064 1065 if not isinstance(expression.parent, exp.Cast): 1066 # StrToDate returns DATEs in other dialects (eg. postgres), so 1067 # this branch aims to improve the transpilation to clickhouse 1068 return f"CAST({strtodate_sql} AS DATE)" 1069 1070 return strtodate_sql 1071 1072 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1073 this = expression.this 1074 1075 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1076 return self.sql(this) 1077 1078 return super().cast_sql(expression, safe_prefix=safe_prefix) 1079 1080 def trycast_sql(self, expression: exp.TryCast) -> str: 1081 dtype = expression.to 1082 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1083 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1084 dtype.set("nullable", True) 1085 1086 return super().cast_sql(expression) 1087 1088 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1089 this = self.json_path_part(expression.this) 1090 return str(int(this) + 1) if is_int(this) else this 1091 1092 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1093 return f"AS {self.sql(expression, 'this')}" 1094 1095 def _any_to_has( 1096 self, 1097 expression: exp.EQ | exp.NEQ, 1098 default: t.Callable[[t.Any], str], 1099 prefix: str = "", 1100 ) -> str: 1101 if isinstance(expression.left, exp.Any): 1102 arr = expression.left 1103 this = expression.right 1104 elif isinstance(expression.right, exp.Any): 1105 arr = expression.right 1106 this = expression.left 1107 else: 1108 return default(expression) 1109 1110 return prefix + self.func("has", arr.this.unnest(), this) 1111 1112 def eq_sql(self, expression: exp.EQ) -> str: 1113 return self._any_to_has(expression, super().eq_sql) 1114 1115 def neq_sql(self, expression: exp.NEQ) -> str: 1116 return self._any_to_has(expression, super().neq_sql, "NOT ") 1117 1118 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1119 # Manually add a flag to make the search case-insensitive 1120 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1121 return self.func("match", expression.this, regex) 1122 1123 def datatype_sql(self, expression: exp.DataType) -> str: 1124 # String is the standard ClickHouse type, every other variant is just an alias. 1125 # Additionally, any supplied length parameter will be ignored. 1126 # 1127 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1128 if expression.this in self.STRING_TYPE_MAPPING: 1129 dtype = "String" 1130 else: 1131 dtype = super().datatype_sql(expression) 1132 1133 # This section changes the type to `Nullable(...)` if the following conditions hold: 1134 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1135 # and change their semantics 1136 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1137 # constraint: "Type of Map key must be a type, that can be represented by integer or 1138 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1139 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1140 parent = expression.parent 1141 nullable = expression.args.get("nullable") 1142 if nullable is True or ( 1143 nullable is None 1144 and not ( 1145 isinstance(parent, exp.DataType) 1146 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1147 and expression.index in (None, 0) 1148 ) 1149 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1150 ): 1151 dtype = f"Nullable({dtype})" 1152 1153 return dtype 1154 1155 def cte_sql(self, expression: exp.CTE) -> str: 1156 if expression.args.get("scalar"): 1157 this = self.sql(expression, "this") 1158 alias = self.sql(expression, "alias") 1159 return f"{this} AS {alias}" 1160 1161 return super().cte_sql(expression) 1162 1163 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1164 return super().after_limit_modifiers(expression) + [ 1165 ( 1166 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1167 if expression.args.get("settings") 1168 else "" 1169 ), 1170 ( 1171 self.seg("FORMAT ") + self.sql(expression, "format") 1172 if expression.args.get("format") 1173 else "" 1174 ), 1175 ] 1176 1177 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1178 params = self.expressions(expression, key="params", flat=True) 1179 return self.func(expression.name, *expression.expressions) + f"({params})" 1180 1181 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1182 return self.func(expression.name, *expression.expressions) 1183 1184 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1185 return self.anonymousaggfunc_sql(expression) 1186 1187 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1188 return self.parameterizedagg_sql(expression) 1189 1190 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1191 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1192 1193 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1194 return f"ON CLUSTER {self.sql(expression, 'this')}" 1195 1196 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1197 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1198 exp.Properties.Location.POST_NAME 1199 ): 1200 this_name = self.sql( 1201 expression.this if isinstance(expression.this, exp.Schema) else expression, 1202 "this", 1203 ) 1204 this_properties = " ".join( 1205 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1206 ) 1207 this_schema = self.schema_columns_sql(expression.this) 1208 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1209 1210 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1211 1212 return super().createable_sql(expression, locations) 1213 1214 def create_sql(self, expression: exp.Create) -> str: 1215 # The comment property comes last in CTAS statements, i.e. after the query 1216 query = expression.expression 1217 if isinstance(query, exp.Query): 1218 comment_prop = expression.find(exp.SchemaCommentProperty) 1219 if comment_prop: 1220 comment_prop.pop() 1221 query.replace(exp.paren(query)) 1222 else: 1223 comment_prop = None 1224 1225 create_sql = super().create_sql(expression) 1226 1227 comment_sql = self.sql(comment_prop) 1228 comment_sql = f" {comment_sql}" if comment_sql else "" 1229 1230 return f"{create_sql}{comment_sql}" 1231 1232 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1233 this = self.indent(self.sql(expression, "this")) 1234 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1235 1236 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1237 this = self.sql(expression, "this") 1238 this = f" {this}" if this else "" 1239 expr = self.sql(expression, "expression") 1240 expr = f" {expr}" if expr else "" 1241 index_type = self.sql(expression, "index_type") 1242 index_type = f" TYPE {index_type}" if index_type else "" 1243 granularity = self.sql(expression, "granularity") 1244 granularity = f" GRANULARITY {granularity}" if granularity else "" 1245 1246 return f"INDEX{this}{expr}{index_type}{granularity}" 1247 1248 def partition_sql(self, expression: exp.Partition) -> str: 1249 return f"PARTITION {self.expressions(expression, flat=True)}" 1250 1251 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1252 return f"ID {self.sql(expression.this)}" 1253 1254 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1255 return ( 1256 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1257 ) 1258 1259 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1260 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1261 1262 def is_sql(self, expression: exp.Is) -> str: 1263 is_sql = super().is_sql(expression) 1264 1265 if isinstance(expression.parent, exp.Not): 1266 # value IS NOT NULL -> NOT (value IS NULL) 1267 is_sql = self.wrap(is_sql) 1268 1269 return is_sql
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Whether the name of the function should be preserved inside the node's metadata, can be useful for roundtripping deprecated vs new functions that share an AST node e.g JSON_VALUE vs JSON_EXTRACT_SCALAR in BigQuery
Whether number literals can include underscores for better readability
Specifies the strategy according to which identifiers should be normalized.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Helper for dialects that use a different name for the same creatable kind. For example, the Clickhouse equivalent of CREATE SCHEMA is CREATE DATABASE.
Whether a set operation uses DISTINCT by default. This is None
when either DISTINCT
or ALL
must be explicitly specified.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- COPY_PARAMS_ARE_CSV
- TYPED_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- TIME_MAPPING
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- STRICT_JSON_PATH_SYNTAX
- ON_CONDITION_EMPTY_BEFORE_ERROR
- ARRAY_AGG_INCLUDES_NULLS
- PROMOTE_TO_INFERRED_DATETIME_TYPE
- SUPPORTS_VALUES_DEFAULT
- REGEXP_EXTRACT_DEFAULT_GROUP
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
189 class Tokenizer(tokens.Tokenizer): 190 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 191 IDENTIFIERS = ['"', "`"] 192 IDENTIFIER_ESCAPES = ["\\"] 193 STRING_ESCAPES = ["'", "\\"] 194 BIT_STRINGS = [("0b", "")] 195 HEX_STRINGS = [("0x", ""), ("0X", "")] 196 HEREDOC_STRINGS = ["$"] 197 198 KEYWORDS = { 199 **tokens.Tokenizer.KEYWORDS, 200 "ATTACH": TokenType.COMMAND, 201 "DATE32": TokenType.DATE32, 202 "DATETIME64": TokenType.DATETIME64, 203 "DICTIONARY": TokenType.DICTIONARY, 204 "ENUM8": TokenType.ENUM8, 205 "ENUM16": TokenType.ENUM16, 206 "FINAL": TokenType.FINAL, 207 "FIXEDSTRING": TokenType.FIXEDSTRING, 208 "FLOAT32": TokenType.FLOAT, 209 "FLOAT64": TokenType.DOUBLE, 210 "GLOBAL": TokenType.GLOBAL, 211 "INT256": TokenType.INT256, 212 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 213 "MAP": TokenType.MAP, 214 "NESTED": TokenType.NESTED, 215 "SAMPLE": TokenType.TABLE_SAMPLE, 216 "TUPLE": TokenType.STRUCT, 217 "UINT128": TokenType.UINT128, 218 "UINT16": TokenType.USMALLINT, 219 "UINT256": TokenType.UINT256, 220 "UINT32": TokenType.UINT, 221 "UINT64": TokenType.UBIGINT, 222 "UINT8": TokenType.UTINYINT, 223 "IPV4": TokenType.IPV4, 224 "IPV6": TokenType.IPV6, 225 "POINT": TokenType.POINT, 226 "RING": TokenType.RING, 227 "LINESTRING": TokenType.LINESTRING, 228 "MULTILINESTRING": TokenType.MULTILINESTRING, 229 "POLYGON": TokenType.POLYGON, 230 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 231 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 232 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 233 "SYSTEM": TokenType.COMMAND, 234 "PREWHERE": TokenType.PREWHERE, 235 } 236 KEYWORDS.pop("/*+") 237 238 SINGLE_TOKENS = { 239 **tokens.Tokenizer.SINGLE_TOKENS, 240 "$": TokenType.HEREDOC_STRING, 241 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BYTE_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
243 class Parser(parser.Parser): 244 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 245 # * select x from t1 union all select x from t2 limit 1; 246 # * select x from t1 union all (select x from t2 limit 1); 247 MODIFIERS_ATTACHED_TO_SET_OP = False 248 INTERVAL_SPANS = False 249 250 FUNCTIONS = { 251 **parser.Parser.FUNCTIONS, 252 "ANY": exp.AnyValue.from_arg_list, 253 "ARRAYSUM": exp.ArraySum.from_arg_list, 254 "COUNTIF": _build_count_if, 255 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 256 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 257 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 258 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 259 "DATE_FORMAT": _build_date_format, 260 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 261 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 262 "FORMATDATETIME": _build_date_format, 263 "JSONEXTRACTSTRING": build_json_extract_path( 264 exp.JSONExtractScalar, zero_based_indexing=False 265 ), 266 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 267 "MAP": parser.build_var_map, 268 "MATCH": exp.RegexpLike.from_arg_list, 269 "RANDCANONICAL": exp.Rand.from_arg_list, 270 "STR_TO_DATE": _build_str_to_date, 271 "TUPLE": exp.Struct.from_arg_list, 272 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 273 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 274 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 275 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 276 "UNIQ": exp.ApproxDistinct.from_arg_list, 277 "XOR": lambda args: exp.Xor(expressions=args), 278 "MD5": exp.MD5Digest.from_arg_list, 279 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 280 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 281 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 282 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 283 } 284 FUNCTIONS.pop("TRANSFORM") 285 286 AGG_FUNCTIONS = { 287 "count", 288 "min", 289 "max", 290 "sum", 291 "avg", 292 "any", 293 "stddevPop", 294 "stddevSamp", 295 "varPop", 296 "varSamp", 297 "corr", 298 "covarPop", 299 "covarSamp", 300 "entropy", 301 "exponentialMovingAverage", 302 "intervalLengthSum", 303 "kolmogorovSmirnovTest", 304 "mannWhitneyUTest", 305 "median", 306 "rankCorr", 307 "sumKahan", 308 "studentTTest", 309 "welchTTest", 310 "anyHeavy", 311 "anyLast", 312 "boundingRatio", 313 "first_value", 314 "last_value", 315 "argMin", 316 "argMax", 317 "avgWeighted", 318 "topK", 319 "topKWeighted", 320 "deltaSum", 321 "deltaSumTimestamp", 322 "groupArray", 323 "groupArrayLast", 324 "groupUniqArray", 325 "groupArrayInsertAt", 326 "groupArrayMovingAvg", 327 "groupArrayMovingSum", 328 "groupArraySample", 329 "groupBitAnd", 330 "groupBitOr", 331 "groupBitXor", 332 "groupBitmap", 333 "groupBitmapAnd", 334 "groupBitmapOr", 335 "groupBitmapXor", 336 "sumWithOverflow", 337 "sumMap", 338 "minMap", 339 "maxMap", 340 "skewSamp", 341 "skewPop", 342 "kurtSamp", 343 "kurtPop", 344 "uniq", 345 "uniqExact", 346 "uniqCombined", 347 "uniqCombined64", 348 "uniqHLL12", 349 "uniqTheta", 350 "quantile", 351 "quantiles", 352 "quantileExact", 353 "quantilesExact", 354 "quantileExactLow", 355 "quantilesExactLow", 356 "quantileExactHigh", 357 "quantilesExactHigh", 358 "quantileExactWeighted", 359 "quantilesExactWeighted", 360 "quantileTiming", 361 "quantilesTiming", 362 "quantileTimingWeighted", 363 "quantilesTimingWeighted", 364 "quantileDeterministic", 365 "quantilesDeterministic", 366 "quantileTDigest", 367 "quantilesTDigest", 368 "quantileTDigestWeighted", 369 "quantilesTDigestWeighted", 370 "quantileBFloat16", 371 "quantilesBFloat16", 372 "quantileBFloat16Weighted", 373 "quantilesBFloat16Weighted", 374 "simpleLinearRegression", 375 "stochasticLinearRegression", 376 "stochasticLogisticRegression", 377 "categoricalInformationValue", 378 "contingency", 379 "cramersV", 380 "cramersVBiasCorrected", 381 "theilsU", 382 "maxIntersections", 383 "maxIntersectionsPosition", 384 "meanZTest", 385 "quantileInterpolatedWeighted", 386 "quantilesInterpolatedWeighted", 387 "quantileGK", 388 "quantilesGK", 389 "sparkBar", 390 "sumCount", 391 "largestTriangleThreeBuckets", 392 "histogram", 393 "sequenceMatch", 394 "sequenceCount", 395 "windowFunnel", 396 "retention", 397 "uniqUpTo", 398 "sequenceNextNode", 399 "exponentialTimeDecayedAvg", 400 } 401 402 AGG_FUNCTIONS_SUFFIXES = [ 403 "If", 404 "Array", 405 "ArrayIf", 406 "Map", 407 "SimpleState", 408 "State", 409 "Merge", 410 "MergeState", 411 "ForEach", 412 "Distinct", 413 "OrDefault", 414 "OrNull", 415 "Resample", 416 "ArgMin", 417 "ArgMax", 418 ] 419 420 FUNC_TOKENS = { 421 *parser.Parser.FUNC_TOKENS, 422 TokenType.SET, 423 } 424 425 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 426 427 ID_VAR_TOKENS = { 428 *parser.Parser.ID_VAR_TOKENS, 429 TokenType.LIKE, 430 } 431 432 AGG_FUNC_MAPPING = ( 433 lambda functions, suffixes: { 434 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 435 } 436 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 437 438 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 439 440 FUNCTION_PARSERS = { 441 **parser.Parser.FUNCTION_PARSERS, 442 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 443 "QUANTILE": lambda self: self._parse_quantile(), 444 "MEDIAN": lambda self: self._parse_quantile(), 445 "COLUMNS": lambda self: self._parse_columns(), 446 } 447 448 FUNCTION_PARSERS.pop("MATCH") 449 450 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 451 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 452 453 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 454 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 455 456 RANGE_PARSERS = { 457 **parser.Parser.RANGE_PARSERS, 458 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 459 and self._parse_in(this, is_global=True), 460 } 461 462 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 463 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 464 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 465 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 466 467 JOIN_KINDS = { 468 *parser.Parser.JOIN_KINDS, 469 TokenType.ANY, 470 TokenType.ASOF, 471 TokenType.ARRAY, 472 } 473 474 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 475 TokenType.ANY, 476 TokenType.ARRAY, 477 TokenType.FINAL, 478 TokenType.FORMAT, 479 TokenType.SETTINGS, 480 } 481 482 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 483 TokenType.FORMAT, 484 } 485 486 LOG_DEFAULTS_TO_LN = True 487 488 QUERY_MODIFIER_PARSERS = { 489 **parser.Parser.QUERY_MODIFIER_PARSERS, 490 TokenType.SETTINGS: lambda self: ( 491 "settings", 492 self._advance() or self._parse_csv(self._parse_assignment), 493 ), 494 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 495 } 496 497 CONSTRAINT_PARSERS = { 498 **parser.Parser.CONSTRAINT_PARSERS, 499 "INDEX": lambda self: self._parse_index_constraint(), 500 "CODEC": lambda self: self._parse_compress(), 501 } 502 503 ALTER_PARSERS = { 504 **parser.Parser.ALTER_PARSERS, 505 "REPLACE": lambda self: self._parse_alter_table_replace(), 506 } 507 508 SCHEMA_UNNAMED_CONSTRAINTS = { 509 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 510 "INDEX", 511 } 512 513 PLACEHOLDER_PARSERS = { 514 **parser.Parser.PLACEHOLDER_PARSERS, 515 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 516 } 517 518 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 519 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 520 return self._parse_lambda() 521 522 def _parse_types( 523 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 524 ) -> t.Optional[exp.Expression]: 525 dtype = super()._parse_types( 526 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 527 ) 528 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 529 # Mark every type as non-nullable which is ClickHouse's default, unless it's 530 # already marked as nullable. This marker helps us transpile types from other 531 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 532 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 533 # fail in ClickHouse without the `Nullable` type constructor. 534 dtype.set("nullable", False) 535 536 return dtype 537 538 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 539 index = self._index 540 this = self._parse_bitwise() 541 if self._match(TokenType.FROM): 542 self._retreat(index) 543 return super()._parse_extract() 544 545 # We return Anonymous here because extract and regexpExtract have different semantics, 546 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 547 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 548 # 549 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 550 self._match(TokenType.COMMA) 551 return self.expression( 552 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 553 ) 554 555 def _parse_assignment(self) -> t.Optional[exp.Expression]: 556 this = super()._parse_assignment() 557 558 if self._match(TokenType.PLACEHOLDER): 559 return self.expression( 560 exp.If, 561 this=this, 562 true=self._parse_assignment(), 563 false=self._match(TokenType.COLON) and self._parse_assignment(), 564 ) 565 566 return this 567 568 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 569 """ 570 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 571 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 572 """ 573 index = self._index 574 575 this = self._parse_id_var() 576 self._match(TokenType.COLON) 577 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 578 self._match_text_seq("IDENTIFIER") and "Identifier" 579 ) 580 581 if not kind: 582 self._retreat(index) 583 return None 584 elif not self._match(TokenType.R_BRACE): 585 self.raise_error("Expecting }") 586 587 return self.expression(exp.Placeholder, this=this, kind=kind) 588 589 def _parse_bracket( 590 self, this: t.Optional[exp.Expression] = None 591 ) -> t.Optional[exp.Expression]: 592 l_brace = self._match(TokenType.L_BRACE, advance=False) 593 bracket = super()._parse_bracket(this) 594 595 if l_brace and isinstance(bracket, exp.Struct): 596 varmap = exp.VarMap(keys=exp.Array(), values=exp.Array()) 597 for expression in bracket.expressions: 598 if not isinstance(expression, exp.PropertyEQ): 599 break 600 601 varmap.args["keys"].append("expressions", exp.Literal.string(expression.name)) 602 varmap.args["values"].append("expressions", expression.expression) 603 604 return varmap 605 606 return bracket 607 608 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 609 this = super()._parse_in(this) 610 this.set("is_global", is_global) 611 return this 612 613 def _parse_table( 614 self, 615 schema: bool = False, 616 joins: bool = False, 617 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 618 parse_bracket: bool = False, 619 is_db_reference: bool = False, 620 parse_partition: bool = False, 621 ) -> t.Optional[exp.Expression]: 622 this = super()._parse_table( 623 schema=schema, 624 joins=joins, 625 alias_tokens=alias_tokens, 626 parse_bracket=parse_bracket, 627 is_db_reference=is_db_reference, 628 ) 629 630 if self._match(TokenType.FINAL): 631 this = self.expression(exp.Final, this=this) 632 633 return this 634 635 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 636 return super()._parse_position(haystack_first=True) 637 638 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 639 def _parse_cte(self) -> t.Optional[exp.CTE]: 640 # WITH <identifier> AS <subquery expression> 641 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 642 643 if not cte: 644 # WITH <expression> AS <identifier> 645 cte = self.expression( 646 exp.CTE, 647 this=self._parse_assignment(), 648 alias=self._parse_table_alias(), 649 scalar=True, 650 ) 651 652 return cte 653 654 def _parse_join_parts( 655 self, 656 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 657 is_global = self._match(TokenType.GLOBAL) and self._prev 658 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 659 660 if kind_pre: 661 kind = self._match_set(self.JOIN_KINDS) and self._prev 662 side = self._match_set(self.JOIN_SIDES) and self._prev 663 return is_global, side, kind 664 665 return ( 666 is_global, 667 self._match_set(self.JOIN_SIDES) and self._prev, 668 self._match_set(self.JOIN_KINDS) and self._prev, 669 ) 670 671 def _parse_join( 672 self, skip_join_token: bool = False, parse_bracket: bool = False 673 ) -> t.Optional[exp.Join]: 674 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 675 if join: 676 join.set("global", join.args.pop("method", None)) 677 678 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 679 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 680 if join.kind == "ARRAY": 681 for table in join.find_all(exp.Table): 682 table.replace(table.to_column()) 683 684 return join 685 686 def _parse_function( 687 self, 688 functions: t.Optional[t.Dict[str, t.Callable]] = None, 689 anonymous: bool = False, 690 optional_parens: bool = True, 691 any_token: bool = False, 692 ) -> t.Optional[exp.Expression]: 693 expr = super()._parse_function( 694 functions=functions, 695 anonymous=anonymous, 696 optional_parens=optional_parens, 697 any_token=any_token, 698 ) 699 700 func = expr.this if isinstance(expr, exp.Window) else expr 701 702 # Aggregate functions can be split in 2 parts: <func_name><suffix> 703 parts = ( 704 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 705 ) 706 707 if parts: 708 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 709 params = self._parse_func_params(anon_func) 710 711 kwargs = { 712 "this": anon_func.this, 713 "expressions": anon_func.expressions, 714 } 715 if parts[1]: 716 kwargs["parts"] = parts 717 exp_class: t.Type[exp.Expression] = ( 718 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 719 ) 720 else: 721 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 722 723 kwargs["exp_class"] = exp_class 724 if params: 725 kwargs["params"] = params 726 727 func = self.expression(**kwargs) 728 729 if isinstance(expr, exp.Window): 730 # The window's func was parsed as Anonymous in base parser, fix its 731 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 732 expr.set("this", func) 733 elif params: 734 # Params have blocked super()._parse_function() from parsing the following window 735 # (if that exists) as they're standing between the function call and the window spec 736 expr = self._parse_window(func) 737 else: 738 expr = func 739 740 return expr 741 742 def _parse_func_params( 743 self, this: t.Optional[exp.Func] = None 744 ) -> t.Optional[t.List[exp.Expression]]: 745 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 746 return self._parse_csv(self._parse_lambda) 747 748 if self._match(TokenType.L_PAREN): 749 params = self._parse_csv(self._parse_lambda) 750 self._match_r_paren(this) 751 return params 752 753 return None 754 755 def _parse_quantile(self) -> exp.Quantile: 756 this = self._parse_lambda() 757 params = self._parse_func_params() 758 if params: 759 return self.expression(exp.Quantile, this=params[0], quantile=this) 760 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 761 762 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 763 return super()._parse_wrapped_id_vars(optional=True) 764 765 def _parse_primary_key( 766 self, wrapped_optional: bool = False, in_props: bool = False 767 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 768 return super()._parse_primary_key( 769 wrapped_optional=wrapped_optional or in_props, in_props=in_props 770 ) 771 772 def _parse_on_property(self) -> t.Optional[exp.Expression]: 773 index = self._index 774 if self._match_text_seq("CLUSTER"): 775 this = self._parse_id_var() 776 if this: 777 return self.expression(exp.OnCluster, this=this) 778 else: 779 self._retreat(index) 780 return None 781 782 def _parse_index_constraint( 783 self, kind: t.Optional[str] = None 784 ) -> exp.IndexColumnConstraint: 785 # INDEX name1 expr TYPE type1(args) GRANULARITY value 786 this = self._parse_id_var() 787 expression = self._parse_assignment() 788 789 index_type = self._match_text_seq("TYPE") and ( 790 self._parse_function() or self._parse_var() 791 ) 792 793 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 794 795 return self.expression( 796 exp.IndexColumnConstraint, 797 this=this, 798 expression=expression, 799 index_type=index_type, 800 granularity=granularity, 801 ) 802 803 def _parse_partition(self) -> t.Optional[exp.Partition]: 804 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 805 if not self._match(TokenType.PARTITION): 806 return None 807 808 if self._match_text_seq("ID"): 809 # Corresponds to the PARTITION ID <string_value> syntax 810 expressions: t.List[exp.Expression] = [ 811 self.expression(exp.PartitionId, this=self._parse_string()) 812 ] 813 else: 814 expressions = self._parse_expressions() 815 816 return self.expression(exp.Partition, expressions=expressions) 817 818 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 819 partition = self._parse_partition() 820 821 if not partition or not self._match(TokenType.FROM): 822 return None 823 824 return self.expression( 825 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 826 ) 827 828 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 829 if not self._match_text_seq("PROJECTION"): 830 return None 831 832 return self.expression( 833 exp.ProjectionDef, 834 this=self._parse_id_var(), 835 expression=self._parse_wrapped(self._parse_statement), 836 ) 837 838 def _parse_constraint(self) -> t.Optional[exp.Expression]: 839 return super()._parse_constraint() or self._parse_projection_def() 840 841 def _parse_alias( 842 self, this: t.Optional[exp.Expression], explicit: bool = False 843 ) -> t.Optional[exp.Expression]: 844 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 845 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 846 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 847 return this 848 849 return super()._parse_alias(this=this, explicit=explicit) 850 851 def _parse_expression(self) -> t.Optional[exp.Expression]: 852 this = super()._parse_expression() 853 854 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 855 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 856 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 857 self._match(TokenType.R_PAREN) 858 859 return this 860 861 def _parse_columns(self) -> exp.Expression: 862 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 863 864 while self._next and self._match_text_seq(")", "APPLY", "("): 865 self._match(TokenType.R_PAREN) 866 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 867 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_HINTS
- LAMBDAS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PROPERTY_PARSERS
- ALTER_ALTER_PARSERS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
869 class Generator(generator.Generator): 870 QUERY_HINTS = False 871 STRUCT_DELIMITER = ("(", ")") 872 NVL2_SUPPORTED = False 873 TABLESAMPLE_REQUIRES_PARENS = False 874 TABLESAMPLE_SIZE_IS_ROWS = False 875 TABLESAMPLE_KEYWORDS = "SAMPLE" 876 LAST_DAY_SUPPORTS_DATE_PART = False 877 CAN_IMPLEMENT_ARRAY_ANY = True 878 SUPPORTS_TO_NUMBER = False 879 JOIN_HINTS = False 880 TABLE_HINTS = False 881 GROUPINGS_SEP = "" 882 SET_OP_MODIFIERS = False 883 SUPPORTS_TABLE_ALIAS_COLUMNS = False 884 VALUES_AS_TABLE = False 885 ARRAY_SIZE_NAME = "LENGTH" 886 887 STRING_TYPE_MAPPING = { 888 exp.DataType.Type.CHAR: "String", 889 exp.DataType.Type.LONGBLOB: "String", 890 exp.DataType.Type.LONGTEXT: "String", 891 exp.DataType.Type.MEDIUMBLOB: "String", 892 exp.DataType.Type.MEDIUMTEXT: "String", 893 exp.DataType.Type.TINYBLOB: "String", 894 exp.DataType.Type.TINYTEXT: "String", 895 exp.DataType.Type.TEXT: "String", 896 exp.DataType.Type.VARBINARY: "String", 897 exp.DataType.Type.VARCHAR: "String", 898 } 899 900 SUPPORTED_JSON_PATH_PARTS = { 901 exp.JSONPathKey, 902 exp.JSONPathRoot, 903 exp.JSONPathSubscript, 904 } 905 906 TYPE_MAPPING = { 907 **generator.Generator.TYPE_MAPPING, 908 **STRING_TYPE_MAPPING, 909 exp.DataType.Type.ARRAY: "Array", 910 exp.DataType.Type.BOOLEAN: "Bool", 911 exp.DataType.Type.BIGINT: "Int64", 912 exp.DataType.Type.DATE32: "Date32", 913 exp.DataType.Type.DATETIME: "DateTime", 914 exp.DataType.Type.DATETIME2: "DateTime", 915 exp.DataType.Type.SMALLDATETIME: "DateTime", 916 exp.DataType.Type.DATETIME64: "DateTime64", 917 exp.DataType.Type.DECIMAL: "Decimal", 918 exp.DataType.Type.DECIMAL32: "Decimal32", 919 exp.DataType.Type.DECIMAL64: "Decimal64", 920 exp.DataType.Type.DECIMAL128: "Decimal128", 921 exp.DataType.Type.DECIMAL256: "Decimal256", 922 exp.DataType.Type.TIMESTAMP: "DateTime", 923 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 924 exp.DataType.Type.DOUBLE: "Float64", 925 exp.DataType.Type.ENUM: "Enum", 926 exp.DataType.Type.ENUM8: "Enum8", 927 exp.DataType.Type.ENUM16: "Enum16", 928 exp.DataType.Type.FIXEDSTRING: "FixedString", 929 exp.DataType.Type.FLOAT: "Float32", 930 exp.DataType.Type.INT: "Int32", 931 exp.DataType.Type.MEDIUMINT: "Int32", 932 exp.DataType.Type.INT128: "Int128", 933 exp.DataType.Type.INT256: "Int256", 934 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 935 exp.DataType.Type.MAP: "Map", 936 exp.DataType.Type.NESTED: "Nested", 937 exp.DataType.Type.SMALLINT: "Int16", 938 exp.DataType.Type.STRUCT: "Tuple", 939 exp.DataType.Type.TINYINT: "Int8", 940 exp.DataType.Type.UBIGINT: "UInt64", 941 exp.DataType.Type.UINT: "UInt32", 942 exp.DataType.Type.UINT128: "UInt128", 943 exp.DataType.Type.UINT256: "UInt256", 944 exp.DataType.Type.USMALLINT: "UInt16", 945 exp.DataType.Type.UTINYINT: "UInt8", 946 exp.DataType.Type.IPV4: "IPv4", 947 exp.DataType.Type.IPV6: "IPv6", 948 exp.DataType.Type.POINT: "Point", 949 exp.DataType.Type.RING: "Ring", 950 exp.DataType.Type.LINESTRING: "LineString", 951 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 952 exp.DataType.Type.POLYGON: "Polygon", 953 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 954 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 955 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 956 } 957 958 TRANSFORMS = { 959 **generator.Generator.TRANSFORMS, 960 exp.AnyValue: rename_func("any"), 961 exp.ApproxDistinct: rename_func("uniq"), 962 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 963 exp.ArraySum: rename_func("arraySum"), 964 exp.ArgMax: arg_max_or_min_no_count("argMax"), 965 exp.ArgMin: arg_max_or_min_no_count("argMin"), 966 exp.Array: inline_array_sql, 967 exp.CastToStrType: rename_func("CAST"), 968 exp.CountIf: rename_func("countIf"), 969 exp.CompressColumnConstraint: lambda self, 970 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 971 exp.ComputedColumnConstraint: lambda self, 972 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 973 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 974 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 975 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 976 exp.DateStrToDate: rename_func("toDate"), 977 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 978 exp.Explode: rename_func("arrayJoin"), 979 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 980 exp.IsNan: rename_func("isNaN"), 981 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 982 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 983 exp.JSONPathKey: json_path_key_only_name, 984 exp.JSONPathRoot: lambda *_: "", 985 exp.Length: length_or_char_length_sql, 986 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 987 exp.Median: rename_func("median"), 988 exp.Nullif: rename_func("nullIf"), 989 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 990 exp.Pivot: no_pivot_sql, 991 exp.Quantile: _quantile_sql, 992 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 993 exp.Rand: rename_func("randCanonical"), 994 exp.StartsWith: rename_func("startsWith"), 995 exp.StrPosition: lambda self, e: self.func( 996 "position", e.this, e.args.get("substr"), e.args.get("position") 997 ), 998 exp.TimeToStr: lambda self, e: self.func( 999 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 1000 ), 1001 exp.TimeStrToTime: _timestrtotime_sql, 1002 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 1003 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 1004 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 1005 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 1006 exp.MD5Digest: rename_func("MD5"), 1007 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 1008 exp.SHA: rename_func("SHA1"), 1009 exp.SHA2: sha256_sql, 1010 exp.UnixToTime: _unix_to_time_sql, 1011 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 1012 exp.Trim: trim_sql, 1013 exp.Variance: rename_func("varSamp"), 1014 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 1015 exp.Stddev: rename_func("stddevSamp"), 1016 exp.Chr: rename_func("CHAR"), 1017 exp.Lag: lambda self, e: self.func( 1018 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 1019 ), 1020 exp.Lead: lambda self, e: self.func( 1021 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 1022 ), 1023 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 1024 rename_func("editDistance") 1025 ), 1026 } 1027 1028 PROPERTIES_LOCATION = { 1029 **generator.Generator.PROPERTIES_LOCATION, 1030 exp.OnCluster: exp.Properties.Location.POST_NAME, 1031 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1032 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1033 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1034 } 1035 1036 # There's no list in docs, but it can be found in Clickhouse code 1037 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1038 ON_CLUSTER_TARGETS = { 1039 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1040 "DATABASE", 1041 "TABLE", 1042 "VIEW", 1043 "DICTIONARY", 1044 "INDEX", 1045 "FUNCTION", 1046 "NAMED COLLECTION", 1047 } 1048 1049 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1050 NON_NULLABLE_TYPES = { 1051 exp.DataType.Type.ARRAY, 1052 exp.DataType.Type.MAP, 1053 exp.DataType.Type.STRUCT, 1054 exp.DataType.Type.POINT, 1055 exp.DataType.Type.RING, 1056 exp.DataType.Type.LINESTRING, 1057 exp.DataType.Type.MULTILINESTRING, 1058 exp.DataType.Type.POLYGON, 1059 exp.DataType.Type.MULTIPOLYGON, 1060 } 1061 1062 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1063 strtodate_sql = self.function_fallback_sql(expression) 1064 1065 if not isinstance(expression.parent, exp.Cast): 1066 # StrToDate returns DATEs in other dialects (eg. postgres), so 1067 # this branch aims to improve the transpilation to clickhouse 1068 return f"CAST({strtodate_sql} AS DATE)" 1069 1070 return strtodate_sql 1071 1072 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1073 this = expression.this 1074 1075 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1076 return self.sql(this) 1077 1078 return super().cast_sql(expression, safe_prefix=safe_prefix) 1079 1080 def trycast_sql(self, expression: exp.TryCast) -> str: 1081 dtype = expression.to 1082 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1083 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1084 dtype.set("nullable", True) 1085 1086 return super().cast_sql(expression) 1087 1088 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1089 this = self.json_path_part(expression.this) 1090 return str(int(this) + 1) if is_int(this) else this 1091 1092 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1093 return f"AS {self.sql(expression, 'this')}" 1094 1095 def _any_to_has( 1096 self, 1097 expression: exp.EQ | exp.NEQ, 1098 default: t.Callable[[t.Any], str], 1099 prefix: str = "", 1100 ) -> str: 1101 if isinstance(expression.left, exp.Any): 1102 arr = expression.left 1103 this = expression.right 1104 elif isinstance(expression.right, exp.Any): 1105 arr = expression.right 1106 this = expression.left 1107 else: 1108 return default(expression) 1109 1110 return prefix + self.func("has", arr.this.unnest(), this) 1111 1112 def eq_sql(self, expression: exp.EQ) -> str: 1113 return self._any_to_has(expression, super().eq_sql) 1114 1115 def neq_sql(self, expression: exp.NEQ) -> str: 1116 return self._any_to_has(expression, super().neq_sql, "NOT ") 1117 1118 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1119 # Manually add a flag to make the search case-insensitive 1120 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1121 return self.func("match", expression.this, regex) 1122 1123 def datatype_sql(self, expression: exp.DataType) -> str: 1124 # String is the standard ClickHouse type, every other variant is just an alias. 1125 # Additionally, any supplied length parameter will be ignored. 1126 # 1127 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1128 if expression.this in self.STRING_TYPE_MAPPING: 1129 dtype = "String" 1130 else: 1131 dtype = super().datatype_sql(expression) 1132 1133 # This section changes the type to `Nullable(...)` if the following conditions hold: 1134 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1135 # and change their semantics 1136 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1137 # constraint: "Type of Map key must be a type, that can be represented by integer or 1138 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1139 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1140 parent = expression.parent 1141 nullable = expression.args.get("nullable") 1142 if nullable is True or ( 1143 nullable is None 1144 and not ( 1145 isinstance(parent, exp.DataType) 1146 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1147 and expression.index in (None, 0) 1148 ) 1149 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1150 ): 1151 dtype = f"Nullable({dtype})" 1152 1153 return dtype 1154 1155 def cte_sql(self, expression: exp.CTE) -> str: 1156 if expression.args.get("scalar"): 1157 this = self.sql(expression, "this") 1158 alias = self.sql(expression, "alias") 1159 return f"{this} AS {alias}" 1160 1161 return super().cte_sql(expression) 1162 1163 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1164 return super().after_limit_modifiers(expression) + [ 1165 ( 1166 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1167 if expression.args.get("settings") 1168 else "" 1169 ), 1170 ( 1171 self.seg("FORMAT ") + self.sql(expression, "format") 1172 if expression.args.get("format") 1173 else "" 1174 ), 1175 ] 1176 1177 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1178 params = self.expressions(expression, key="params", flat=True) 1179 return self.func(expression.name, *expression.expressions) + f"({params})" 1180 1181 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1182 return self.func(expression.name, *expression.expressions) 1183 1184 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1185 return self.anonymousaggfunc_sql(expression) 1186 1187 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1188 return self.parameterizedagg_sql(expression) 1189 1190 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1191 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1192 1193 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1194 return f"ON CLUSTER {self.sql(expression, 'this')}" 1195 1196 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1197 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1198 exp.Properties.Location.POST_NAME 1199 ): 1200 this_name = self.sql( 1201 expression.this if isinstance(expression.this, exp.Schema) else expression, 1202 "this", 1203 ) 1204 this_properties = " ".join( 1205 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1206 ) 1207 this_schema = self.schema_columns_sql(expression.this) 1208 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1209 1210 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1211 1212 return super().createable_sql(expression, locations) 1213 1214 def create_sql(self, expression: exp.Create) -> str: 1215 # The comment property comes last in CTAS statements, i.e. after the query 1216 query = expression.expression 1217 if isinstance(query, exp.Query): 1218 comment_prop = expression.find(exp.SchemaCommentProperty) 1219 if comment_prop: 1220 comment_prop.pop() 1221 query.replace(exp.paren(query)) 1222 else: 1223 comment_prop = None 1224 1225 create_sql = super().create_sql(expression) 1226 1227 comment_sql = self.sql(comment_prop) 1228 comment_sql = f" {comment_sql}" if comment_sql else "" 1229 1230 return f"{create_sql}{comment_sql}" 1231 1232 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1233 this = self.indent(self.sql(expression, "this")) 1234 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1235 1236 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1237 this = self.sql(expression, "this") 1238 this = f" {this}" if this else "" 1239 expr = self.sql(expression, "expression") 1240 expr = f" {expr}" if expr else "" 1241 index_type = self.sql(expression, "index_type") 1242 index_type = f" TYPE {index_type}" if index_type else "" 1243 granularity = self.sql(expression, "granularity") 1244 granularity = f" GRANULARITY {granularity}" if granularity else "" 1245 1246 return f"INDEX{this}{expr}{index_type}{granularity}" 1247 1248 def partition_sql(self, expression: exp.Partition) -> str: 1249 return f"PARTITION {self.expressions(expression, flat=True)}" 1250 1251 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1252 return f"ID {self.sql(expression.this)}" 1253 1254 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1255 return ( 1256 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1257 ) 1258 1259 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1260 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1261 1262 def is_sql(self, expression: exp.Is) -> str: 1263 is_sql = super().is_sql(expression) 1264 1265 if isinstance(expression.parent, exp.Not): 1266 # value IS NOT NULL -> NOT (value IS NULL) 1267 is_sql = self.wrap(is_sql) 1268 1269 return is_sql
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1062 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1063 strtodate_sql = self.function_fallback_sql(expression) 1064 1065 if not isinstance(expression.parent, exp.Cast): 1066 # StrToDate returns DATEs in other dialects (eg. postgres), so 1067 # this branch aims to improve the transpilation to clickhouse 1068 return f"CAST({strtodate_sql} AS DATE)" 1069 1070 return strtodate_sql
1072 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1073 this = expression.this 1074 1075 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1076 return self.sql(this) 1077 1078 return super().cast_sql(expression, safe_prefix=safe_prefix)
1080 def trycast_sql(self, expression: exp.TryCast) -> str: 1081 dtype = expression.to 1082 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1083 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1084 dtype.set("nullable", True) 1085 1086 return super().cast_sql(expression)
1123 def datatype_sql(self, expression: exp.DataType) -> str: 1124 # String is the standard ClickHouse type, every other variant is just an alias. 1125 # Additionally, any supplied length parameter will be ignored. 1126 # 1127 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1128 if expression.this in self.STRING_TYPE_MAPPING: 1129 dtype = "String" 1130 else: 1131 dtype = super().datatype_sql(expression) 1132 1133 # This section changes the type to `Nullable(...)` if the following conditions hold: 1134 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1135 # and change their semantics 1136 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1137 # constraint: "Type of Map key must be a type, that can be represented by integer or 1138 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1139 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1140 parent = expression.parent 1141 nullable = expression.args.get("nullable") 1142 if nullable is True or ( 1143 nullable is None 1144 and not ( 1145 isinstance(parent, exp.DataType) 1146 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1147 and expression.index in (None, 0) 1148 ) 1149 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1150 ): 1151 dtype = f"Nullable({dtype})" 1152 1153 return dtype
1163 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1164 return super().after_limit_modifiers(expression) + [ 1165 ( 1166 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1167 if expression.args.get("settings") 1168 else "" 1169 ), 1170 ( 1171 self.seg("FORMAT ") + self.sql(expression, "format") 1172 if expression.args.get("format") 1173 else "" 1174 ), 1175 ]
1196 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1197 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1198 exp.Properties.Location.POST_NAME 1199 ): 1200 this_name = self.sql( 1201 expression.this if isinstance(expression.this, exp.Schema) else expression, 1202 "this", 1203 ) 1204 this_properties = " ".join( 1205 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1206 ) 1207 this_schema = self.schema_columns_sql(expression.this) 1208 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1209 1210 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1211 1212 return super().createable_sql(expression, locations)
1214 def create_sql(self, expression: exp.Create) -> str: 1215 # The comment property comes last in CTAS statements, i.e. after the query 1216 query = expression.expression 1217 if isinstance(query, exp.Query): 1218 comment_prop = expression.find(exp.SchemaCommentProperty) 1219 if comment_prop: 1220 comment_prop.pop() 1221 query.replace(exp.paren(query)) 1222 else: 1223 comment_prop = None 1224 1225 create_sql = super().create_sql(expression) 1226 1227 comment_sql = self.sql(comment_prop) 1228 comment_sql = f" {comment_sql}" if comment_sql else "" 1229 1230 return f"{create_sql}{comment_sql}"
1236 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1237 this = self.sql(expression, "this") 1238 this = f" {this}" if this else "" 1239 expr = self.sql(expression, "expression") 1240 expr = f" {expr}" if expr else "" 1241 index_type = self.sql(expression, "index_type") 1242 index_type = f" TYPE {index_type}" if index_type else "" 1243 granularity = self.sql(expression, "granularity") 1244 granularity = f" GRANULARITY {granularity}" if granularity else "" 1245 1246 return f"INDEX{this}{expr}{index_type}{granularity}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_EXPLODING_PROJECTIONS
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- SUPPORTS_UNIX_SECONDS
- PARSE_JSON_NAME
- ARRAY_SIZE_DIM_REQUIRED
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- subquery_sql
- qualify_sql
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql