sqlglot.dialects.clickhouse
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 arg_max_or_min_no_count, 10 build_date_delta, 11 build_formatted_time, 12 inline_array_sql, 13 json_extract_segments, 14 json_path_key_only_name, 15 no_pivot_sql, 16 build_json_extract_path, 17 rename_func, 18 sha256_sql, 19 var_map_sql, 20 timestamptrunc_sql, 21 unit_to_var, 22) 23from sqlglot.generator import Generator 24from sqlglot.helper import is_int, seq_get 25from sqlglot.tokens import Token, TokenType 26 27DATEΤΙΜΕ_DELTA = t.Union[exp.DateAdd, exp.DateDiff, exp.DateSub, exp.TimestampSub, exp.TimestampAdd] 28 29 30def _build_date_format(args: t.List) -> exp.TimeToStr: 31 expr = build_formatted_time(exp.TimeToStr, "clickhouse")(args) 32 33 timezone = seq_get(args, 2) 34 if timezone: 35 expr.set("timezone", timezone) 36 37 return expr 38 39 40def _unix_to_time_sql(self: ClickHouse.Generator, expression: exp.UnixToTime) -> str: 41 scale = expression.args.get("scale") 42 timestamp = expression.this 43 44 if scale in (None, exp.UnixToTime.SECONDS): 45 return self.func("fromUnixTimestamp", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 46 if scale == exp.UnixToTime.MILLIS: 47 return self.func("fromUnixTimestamp64Milli", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 48 if scale == exp.UnixToTime.MICROS: 49 return self.func("fromUnixTimestamp64Micro", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 50 if scale == exp.UnixToTime.NANOS: 51 return self.func("fromUnixTimestamp64Nano", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 52 53 return self.func( 54 "fromUnixTimestamp", 55 exp.cast( 56 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 57 ), 58 ) 59 60 61def _lower_func(sql: str) -> str: 62 index = sql.index("(") 63 return sql[:index].lower() + sql[index:] 64 65 66def _quantile_sql(self: ClickHouse.Generator, expression: exp.Quantile) -> str: 67 quantile = expression.args["quantile"] 68 args = f"({self.sql(expression, 'this')})" 69 70 if isinstance(quantile, exp.Array): 71 func = self.func("quantiles", *quantile) 72 else: 73 func = self.func("quantile", quantile) 74 75 return func + args 76 77 78def _build_count_if(args: t.List) -> exp.CountIf | exp.CombinedAggFunc: 79 if len(args) == 1: 80 return exp.CountIf(this=seq_get(args, 0)) 81 82 return exp.CombinedAggFunc(this="countIf", expressions=args, parts=("count", "If")) 83 84 85def _build_str_to_date(args: t.List) -> exp.Cast | exp.Anonymous: 86 if len(args) == 3: 87 return exp.Anonymous(this="STR_TO_DATE", expressions=args) 88 89 strtodate = exp.StrToDate.from_arg_list(args) 90 return exp.cast(strtodate, exp.DataType.build(exp.DataType.Type.DATETIME)) 91 92 93def _datetime_delta_sql(name: str) -> t.Callable[[Generator, DATEΤΙΜΕ_DELTA], str]: 94 def _delta_sql(self: Generator, expression: DATEΤΙΜΕ_DELTA) -> str: 95 if not expression.unit: 96 return rename_func(name)(self, expression) 97 98 return self.func( 99 name, 100 unit_to_var(expression), 101 expression.expression, 102 expression.this, 103 ) 104 105 return _delta_sql 106 107 108class ClickHouse(Dialect): 109 NORMALIZE_FUNCTIONS: bool | str = False 110 NULL_ORDERING = "nulls_are_last" 111 SUPPORTS_USER_DEFINED_TYPES = False 112 SAFE_DIVISION = True 113 LOG_BASE_FIRST: t.Optional[bool] = None 114 FORCE_EARLY_ALIAS_REF_EXPANSION = True 115 116 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 117 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 118 119 UNESCAPED_SEQUENCES = { 120 "\\0": "\0", 121 } 122 123 class Tokenizer(tokens.Tokenizer): 124 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 125 IDENTIFIERS = ['"', "`"] 126 STRING_ESCAPES = ["'", "\\"] 127 BIT_STRINGS = [("0b", "")] 128 HEX_STRINGS = [("0x", ""), ("0X", "")] 129 HEREDOC_STRINGS = ["$"] 130 131 KEYWORDS = { 132 **tokens.Tokenizer.KEYWORDS, 133 "ATTACH": TokenType.COMMAND, 134 "DATE32": TokenType.DATE32, 135 "DATETIME64": TokenType.DATETIME64, 136 "DICTIONARY": TokenType.DICTIONARY, 137 "ENUM8": TokenType.ENUM8, 138 "ENUM16": TokenType.ENUM16, 139 "FINAL": TokenType.FINAL, 140 "FIXEDSTRING": TokenType.FIXEDSTRING, 141 "FLOAT32": TokenType.FLOAT, 142 "FLOAT64": TokenType.DOUBLE, 143 "GLOBAL": TokenType.GLOBAL, 144 "INT256": TokenType.INT256, 145 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 146 "MAP": TokenType.MAP, 147 "NESTED": TokenType.NESTED, 148 "SAMPLE": TokenType.TABLE_SAMPLE, 149 "TUPLE": TokenType.STRUCT, 150 "UINT128": TokenType.UINT128, 151 "UINT16": TokenType.USMALLINT, 152 "UINT256": TokenType.UINT256, 153 "UINT32": TokenType.UINT, 154 "UINT64": TokenType.UBIGINT, 155 "UINT8": TokenType.UTINYINT, 156 "IPV4": TokenType.IPV4, 157 "IPV6": TokenType.IPV6, 158 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 159 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 160 "SYSTEM": TokenType.COMMAND, 161 "PREWHERE": TokenType.PREWHERE, 162 } 163 KEYWORDS.pop("/*+") 164 165 SINGLE_TOKENS = { 166 **tokens.Tokenizer.SINGLE_TOKENS, 167 "$": TokenType.HEREDOC_STRING, 168 } 169 170 class Parser(parser.Parser): 171 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 172 # * select x from t1 union all select x from t2 limit 1; 173 # * select x from t1 union all (select x from t2 limit 1); 174 MODIFIERS_ATTACHED_TO_SET_OP = False 175 INTERVAL_SPANS = False 176 177 FUNCTIONS = { 178 **parser.Parser.FUNCTIONS, 179 "ANY": exp.AnyValue.from_arg_list, 180 "ARRAYSUM": exp.ArraySum.from_arg_list, 181 "COUNTIF": _build_count_if, 182 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 183 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 184 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 185 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 186 "DATE_FORMAT": _build_date_format, 187 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 188 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 189 "FORMATDATETIME": _build_date_format, 190 "JSONEXTRACTSTRING": build_json_extract_path( 191 exp.JSONExtractScalar, zero_based_indexing=False 192 ), 193 "MAP": parser.build_var_map, 194 "MATCH": exp.RegexpLike.from_arg_list, 195 "RANDCANONICAL": exp.Rand.from_arg_list, 196 "STR_TO_DATE": _build_str_to_date, 197 "TUPLE": exp.Struct.from_arg_list, 198 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 199 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 200 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 201 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 202 "UNIQ": exp.ApproxDistinct.from_arg_list, 203 "XOR": lambda args: exp.Xor(expressions=args), 204 "MD5": exp.MD5Digest.from_arg_list, 205 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 206 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 207 } 208 209 AGG_FUNCTIONS = { 210 "count", 211 "min", 212 "max", 213 "sum", 214 "avg", 215 "any", 216 "stddevPop", 217 "stddevSamp", 218 "varPop", 219 "varSamp", 220 "corr", 221 "covarPop", 222 "covarSamp", 223 "entropy", 224 "exponentialMovingAverage", 225 "intervalLengthSum", 226 "kolmogorovSmirnovTest", 227 "mannWhitneyUTest", 228 "median", 229 "rankCorr", 230 "sumKahan", 231 "studentTTest", 232 "welchTTest", 233 "anyHeavy", 234 "anyLast", 235 "boundingRatio", 236 "first_value", 237 "last_value", 238 "argMin", 239 "argMax", 240 "avgWeighted", 241 "topK", 242 "topKWeighted", 243 "deltaSum", 244 "deltaSumTimestamp", 245 "groupArray", 246 "groupArrayLast", 247 "groupUniqArray", 248 "groupArrayInsertAt", 249 "groupArrayMovingAvg", 250 "groupArrayMovingSum", 251 "groupArraySample", 252 "groupBitAnd", 253 "groupBitOr", 254 "groupBitXor", 255 "groupBitmap", 256 "groupBitmapAnd", 257 "groupBitmapOr", 258 "groupBitmapXor", 259 "sumWithOverflow", 260 "sumMap", 261 "minMap", 262 "maxMap", 263 "skewSamp", 264 "skewPop", 265 "kurtSamp", 266 "kurtPop", 267 "uniq", 268 "uniqExact", 269 "uniqCombined", 270 "uniqCombined64", 271 "uniqHLL12", 272 "uniqTheta", 273 "quantile", 274 "quantiles", 275 "quantileExact", 276 "quantilesExact", 277 "quantileExactLow", 278 "quantilesExactLow", 279 "quantileExactHigh", 280 "quantilesExactHigh", 281 "quantileExactWeighted", 282 "quantilesExactWeighted", 283 "quantileTiming", 284 "quantilesTiming", 285 "quantileTimingWeighted", 286 "quantilesTimingWeighted", 287 "quantileDeterministic", 288 "quantilesDeterministic", 289 "quantileTDigest", 290 "quantilesTDigest", 291 "quantileTDigestWeighted", 292 "quantilesTDigestWeighted", 293 "quantileBFloat16", 294 "quantilesBFloat16", 295 "quantileBFloat16Weighted", 296 "quantilesBFloat16Weighted", 297 "simpleLinearRegression", 298 "stochasticLinearRegression", 299 "stochasticLogisticRegression", 300 "categoricalInformationValue", 301 "contingency", 302 "cramersV", 303 "cramersVBiasCorrected", 304 "theilsU", 305 "maxIntersections", 306 "maxIntersectionsPosition", 307 "meanZTest", 308 "quantileInterpolatedWeighted", 309 "quantilesInterpolatedWeighted", 310 "quantileGK", 311 "quantilesGK", 312 "sparkBar", 313 "sumCount", 314 "largestTriangleThreeBuckets", 315 "histogram", 316 "sequenceMatch", 317 "sequenceCount", 318 "windowFunnel", 319 "retention", 320 "uniqUpTo", 321 "sequenceNextNode", 322 "exponentialTimeDecayedAvg", 323 } 324 325 AGG_FUNCTIONS_SUFFIXES = [ 326 "If", 327 "Array", 328 "ArrayIf", 329 "Map", 330 "SimpleState", 331 "State", 332 "Merge", 333 "MergeState", 334 "ForEach", 335 "Distinct", 336 "OrDefault", 337 "OrNull", 338 "Resample", 339 "ArgMin", 340 "ArgMax", 341 ] 342 343 FUNC_TOKENS = { 344 *parser.Parser.FUNC_TOKENS, 345 TokenType.SET, 346 } 347 348 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 349 350 ID_VAR_TOKENS = { 351 *parser.Parser.ID_VAR_TOKENS, 352 TokenType.LIKE, 353 } 354 355 AGG_FUNC_MAPPING = ( 356 lambda functions, suffixes: { 357 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 358 } 359 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 360 361 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 362 363 FUNCTION_PARSERS = { 364 **parser.Parser.FUNCTION_PARSERS, 365 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 366 "QUANTILE": lambda self: self._parse_quantile(), 367 } 368 369 FUNCTION_PARSERS.pop("MATCH") 370 371 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 372 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 373 374 RANGE_PARSERS = { 375 **parser.Parser.RANGE_PARSERS, 376 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 377 and self._parse_in(this, is_global=True), 378 } 379 380 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 381 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 382 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 383 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 384 385 JOIN_KINDS = { 386 *parser.Parser.JOIN_KINDS, 387 TokenType.ANY, 388 TokenType.ASOF, 389 TokenType.ARRAY, 390 } 391 392 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 393 TokenType.ANY, 394 TokenType.ARRAY, 395 TokenType.FINAL, 396 TokenType.FORMAT, 397 TokenType.SETTINGS, 398 } 399 400 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 401 TokenType.FORMAT, 402 } 403 404 LOG_DEFAULTS_TO_LN = True 405 406 QUERY_MODIFIER_PARSERS = { 407 **parser.Parser.QUERY_MODIFIER_PARSERS, 408 TokenType.SETTINGS: lambda self: ( 409 "settings", 410 self._advance() or self._parse_csv(self._parse_assignment), 411 ), 412 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 413 } 414 415 CONSTRAINT_PARSERS = { 416 **parser.Parser.CONSTRAINT_PARSERS, 417 "INDEX": lambda self: self._parse_index_constraint(), 418 "CODEC": lambda self: self._parse_compress(), 419 } 420 421 ALTER_PARSERS = { 422 **parser.Parser.ALTER_PARSERS, 423 "REPLACE": lambda self: self._parse_alter_table_replace(), 424 } 425 426 SCHEMA_UNNAMED_CONSTRAINTS = { 427 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 428 "INDEX", 429 } 430 431 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 432 index = self._index 433 this = self._parse_bitwise() 434 if self._match(TokenType.FROM): 435 self._retreat(index) 436 return super()._parse_extract() 437 438 # We return Anonymous here because extract and regexpExtract have different semantics, 439 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 440 # `extract('foobar', 'b')` works, but CH crashes for `regexpExtract('foobar', 'b')`. 441 # 442 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 443 self._match(TokenType.COMMA) 444 return self.expression( 445 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 446 ) 447 448 def _parse_assignment(self) -> t.Optional[exp.Expression]: 449 this = super()._parse_assignment() 450 451 if self._match(TokenType.PLACEHOLDER): 452 return self.expression( 453 exp.If, 454 this=this, 455 true=self._parse_assignment(), 456 false=self._match(TokenType.COLON) and self._parse_assignment(), 457 ) 458 459 return this 460 461 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 462 """ 463 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 464 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 465 """ 466 if not self._match(TokenType.L_BRACE): 467 return None 468 469 this = self._parse_id_var() 470 self._match(TokenType.COLON) 471 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 472 self._match_text_seq("IDENTIFIER") and "Identifier" 473 ) 474 475 if not kind: 476 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 477 elif not self._match(TokenType.R_BRACE): 478 self.raise_error("Expecting }") 479 480 return self.expression(exp.Placeholder, this=this, kind=kind) 481 482 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 483 this = super()._parse_in(this) 484 this.set("is_global", is_global) 485 return this 486 487 def _parse_table( 488 self, 489 schema: bool = False, 490 joins: bool = False, 491 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 492 parse_bracket: bool = False, 493 is_db_reference: bool = False, 494 parse_partition: bool = False, 495 ) -> t.Optional[exp.Expression]: 496 this = super()._parse_table( 497 schema=schema, 498 joins=joins, 499 alias_tokens=alias_tokens, 500 parse_bracket=parse_bracket, 501 is_db_reference=is_db_reference, 502 ) 503 504 if self._match(TokenType.FINAL): 505 this = self.expression(exp.Final, this=this) 506 507 return this 508 509 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 510 return super()._parse_position(haystack_first=True) 511 512 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 513 def _parse_cte(self) -> exp.CTE: 514 # WITH <identifier> AS <subquery expression> 515 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 516 517 if not cte: 518 # WITH <expression> AS <identifier> 519 cte = self.expression( 520 exp.CTE, 521 this=self._parse_assignment(), 522 alias=self._parse_table_alias(), 523 scalar=True, 524 ) 525 526 return cte 527 528 def _parse_join_parts( 529 self, 530 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 531 is_global = self._match(TokenType.GLOBAL) and self._prev 532 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 533 534 if kind_pre: 535 kind = self._match_set(self.JOIN_KINDS) and self._prev 536 side = self._match_set(self.JOIN_SIDES) and self._prev 537 return is_global, side, kind 538 539 return ( 540 is_global, 541 self._match_set(self.JOIN_SIDES) and self._prev, 542 self._match_set(self.JOIN_KINDS) and self._prev, 543 ) 544 545 def _parse_join( 546 self, skip_join_token: bool = False, parse_bracket: bool = False 547 ) -> t.Optional[exp.Join]: 548 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 549 if join: 550 join.set("global", join.args.pop("method", None)) 551 552 return join 553 554 def _parse_function( 555 self, 556 functions: t.Optional[t.Dict[str, t.Callable]] = None, 557 anonymous: bool = False, 558 optional_parens: bool = True, 559 any_token: bool = False, 560 ) -> t.Optional[exp.Expression]: 561 expr = super()._parse_function( 562 functions=functions, 563 anonymous=anonymous, 564 optional_parens=optional_parens, 565 any_token=any_token, 566 ) 567 568 func = expr.this if isinstance(expr, exp.Window) else expr 569 570 # Aggregate functions can be split in 2 parts: <func_name><suffix> 571 parts = ( 572 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 573 ) 574 575 if parts: 576 params = self._parse_func_params(func) 577 578 kwargs = { 579 "this": func.this, 580 "expressions": func.expressions, 581 } 582 if parts[1]: 583 kwargs["parts"] = parts 584 exp_class = exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 585 else: 586 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 587 588 kwargs["exp_class"] = exp_class 589 if params: 590 kwargs["params"] = params 591 592 func = self.expression(**kwargs) 593 594 if isinstance(expr, exp.Window): 595 # The window's func was parsed as Anonymous in base parser, fix its 596 # type to be CH style CombinedAnonymousAggFunc / AnonymousAggFunc 597 expr.set("this", func) 598 elif params: 599 # Params have blocked super()._parse_function() from parsing the following window 600 # (if that exists) as they're standing between the function call and the window spec 601 expr = self._parse_window(func) 602 else: 603 expr = func 604 605 return expr 606 607 def _parse_func_params( 608 self, this: t.Optional[exp.Func] = None 609 ) -> t.Optional[t.List[exp.Expression]]: 610 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 611 return self._parse_csv(self._parse_lambda) 612 613 if self._match(TokenType.L_PAREN): 614 params = self._parse_csv(self._parse_lambda) 615 self._match_r_paren(this) 616 return params 617 618 return None 619 620 def _parse_quantile(self) -> exp.Quantile: 621 this = self._parse_lambda() 622 params = self._parse_func_params() 623 if params: 624 return self.expression(exp.Quantile, this=params[0], quantile=this) 625 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 626 627 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 628 return super()._parse_wrapped_id_vars(optional=True) 629 630 def _parse_primary_key( 631 self, wrapped_optional: bool = False, in_props: bool = False 632 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 633 return super()._parse_primary_key( 634 wrapped_optional=wrapped_optional or in_props, in_props=in_props 635 ) 636 637 def _parse_on_property(self) -> t.Optional[exp.Expression]: 638 index = self._index 639 if self._match_text_seq("CLUSTER"): 640 this = self._parse_id_var() 641 if this: 642 return self.expression(exp.OnCluster, this=this) 643 else: 644 self._retreat(index) 645 return None 646 647 def _parse_index_constraint( 648 self, kind: t.Optional[str] = None 649 ) -> exp.IndexColumnConstraint: 650 # INDEX name1 expr TYPE type1(args) GRANULARITY value 651 this = self._parse_id_var() 652 expression = self._parse_assignment() 653 654 index_type = self._match_text_seq("TYPE") and ( 655 self._parse_function() or self._parse_var() 656 ) 657 658 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 659 660 return self.expression( 661 exp.IndexColumnConstraint, 662 this=this, 663 expression=expression, 664 index_type=index_type, 665 granularity=granularity, 666 ) 667 668 def _parse_partition(self) -> t.Optional[exp.Partition]: 669 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 670 if not self._match(TokenType.PARTITION): 671 return None 672 673 if self._match_text_seq("ID"): 674 # Corresponds to the PARTITION ID <string_value> syntax 675 expressions: t.List[exp.Expression] = [ 676 self.expression(exp.PartitionId, this=self._parse_string()) 677 ] 678 else: 679 expressions = self._parse_expressions() 680 681 return self.expression(exp.Partition, expressions=expressions) 682 683 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 684 partition = self._parse_partition() 685 686 if not partition or not self._match(TokenType.FROM): 687 return None 688 689 return self.expression( 690 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 691 ) 692 693 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 694 if not self._match_text_seq("PROJECTION"): 695 return None 696 697 return self.expression( 698 exp.ProjectionDef, 699 this=self._parse_id_var(), 700 expression=self._parse_wrapped(self._parse_statement), 701 ) 702 703 def _parse_constraint(self) -> t.Optional[exp.Expression]: 704 return super()._parse_constraint() or self._parse_projection_def() 705 706 class Generator(generator.Generator): 707 QUERY_HINTS = False 708 STRUCT_DELIMITER = ("(", ")") 709 NVL2_SUPPORTED = False 710 TABLESAMPLE_REQUIRES_PARENS = False 711 TABLESAMPLE_SIZE_IS_ROWS = False 712 TABLESAMPLE_KEYWORDS = "SAMPLE" 713 LAST_DAY_SUPPORTS_DATE_PART = False 714 CAN_IMPLEMENT_ARRAY_ANY = True 715 SUPPORTS_TO_NUMBER = False 716 JOIN_HINTS = False 717 TABLE_HINTS = False 718 EXPLICIT_SET_OP = True 719 GROUPINGS_SEP = "" 720 SET_OP_MODIFIERS = False 721 SUPPORTS_TABLE_ALIAS_COLUMNS = False 722 VALUES_AS_TABLE = False 723 724 STRING_TYPE_MAPPING = { 725 exp.DataType.Type.CHAR: "String", 726 exp.DataType.Type.LONGBLOB: "String", 727 exp.DataType.Type.LONGTEXT: "String", 728 exp.DataType.Type.MEDIUMBLOB: "String", 729 exp.DataType.Type.MEDIUMTEXT: "String", 730 exp.DataType.Type.TINYBLOB: "String", 731 exp.DataType.Type.TINYTEXT: "String", 732 exp.DataType.Type.TEXT: "String", 733 exp.DataType.Type.VARBINARY: "String", 734 exp.DataType.Type.VARCHAR: "String", 735 } 736 737 SUPPORTED_JSON_PATH_PARTS = { 738 exp.JSONPathKey, 739 exp.JSONPathRoot, 740 exp.JSONPathSubscript, 741 } 742 743 TYPE_MAPPING = { 744 **generator.Generator.TYPE_MAPPING, 745 **STRING_TYPE_MAPPING, 746 exp.DataType.Type.ARRAY: "Array", 747 exp.DataType.Type.BIGINT: "Int64", 748 exp.DataType.Type.DATE32: "Date32", 749 exp.DataType.Type.DATETIME64: "DateTime64", 750 exp.DataType.Type.DOUBLE: "Float64", 751 exp.DataType.Type.ENUM: "Enum", 752 exp.DataType.Type.ENUM8: "Enum8", 753 exp.DataType.Type.ENUM16: "Enum16", 754 exp.DataType.Type.FIXEDSTRING: "FixedString", 755 exp.DataType.Type.FLOAT: "Float32", 756 exp.DataType.Type.INT: "Int32", 757 exp.DataType.Type.MEDIUMINT: "Int32", 758 exp.DataType.Type.INT128: "Int128", 759 exp.DataType.Type.INT256: "Int256", 760 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 761 exp.DataType.Type.MAP: "Map", 762 exp.DataType.Type.NESTED: "Nested", 763 exp.DataType.Type.NULLABLE: "Nullable", 764 exp.DataType.Type.SMALLINT: "Int16", 765 exp.DataType.Type.STRUCT: "Tuple", 766 exp.DataType.Type.TINYINT: "Int8", 767 exp.DataType.Type.UBIGINT: "UInt64", 768 exp.DataType.Type.UINT: "UInt32", 769 exp.DataType.Type.UINT128: "UInt128", 770 exp.DataType.Type.UINT256: "UInt256", 771 exp.DataType.Type.USMALLINT: "UInt16", 772 exp.DataType.Type.UTINYINT: "UInt8", 773 exp.DataType.Type.IPV4: "IPv4", 774 exp.DataType.Type.IPV6: "IPv6", 775 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 776 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 777 } 778 779 TRANSFORMS = { 780 **generator.Generator.TRANSFORMS, 781 exp.AnyValue: rename_func("any"), 782 exp.ApproxDistinct: rename_func("uniq"), 783 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 784 exp.ArraySize: rename_func("LENGTH"), 785 exp.ArraySum: rename_func("arraySum"), 786 exp.ArgMax: arg_max_or_min_no_count("argMax"), 787 exp.ArgMin: arg_max_or_min_no_count("argMin"), 788 exp.Array: inline_array_sql, 789 exp.CastToStrType: rename_func("CAST"), 790 exp.CountIf: rename_func("countIf"), 791 exp.CompressColumnConstraint: lambda self, 792 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 793 exp.ComputedColumnConstraint: lambda self, 794 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 795 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 796 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 797 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 798 exp.DateStrToDate: rename_func("toDate"), 799 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 800 exp.Explode: rename_func("arrayJoin"), 801 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 802 exp.IsNan: rename_func("isNaN"), 803 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 804 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 805 exp.JSONPathKey: json_path_key_only_name, 806 exp.JSONPathRoot: lambda *_: "", 807 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 808 exp.Nullif: rename_func("nullIf"), 809 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 810 exp.Pivot: no_pivot_sql, 811 exp.Quantile: _quantile_sql, 812 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 813 exp.Rand: rename_func("randCanonical"), 814 exp.StartsWith: rename_func("startsWith"), 815 exp.StrPosition: lambda self, e: self.func( 816 "position", e.this, e.args.get("substr"), e.args.get("position") 817 ), 818 exp.TimeToStr: lambda self, e: self.func( 819 "DATE_FORMAT", e.this, self.format_time(e), e.args.get("timezone") 820 ), 821 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 822 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 823 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 824 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 825 exp.MD5Digest: rename_func("MD5"), 826 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 827 exp.SHA: rename_func("SHA1"), 828 exp.SHA2: sha256_sql, 829 exp.UnixToTime: _unix_to_time_sql, 830 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 831 exp.Variance: rename_func("varSamp"), 832 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 833 exp.Stddev: rename_func("stddevSamp"), 834 } 835 836 PROPERTIES_LOCATION = { 837 **generator.Generator.PROPERTIES_LOCATION, 838 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 839 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 840 exp.OnCluster: exp.Properties.Location.POST_NAME, 841 } 842 843 # there's no list in docs, but it can be found in Clickhouse code 844 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 845 ON_CLUSTER_TARGETS = { 846 "DATABASE", 847 "TABLE", 848 "VIEW", 849 "DICTIONARY", 850 "INDEX", 851 "FUNCTION", 852 "NAMED COLLECTION", 853 } 854 855 def strtodate_sql(self, expression: exp.StrToDate) -> str: 856 strtodate_sql = self.function_fallback_sql(expression) 857 858 if not isinstance(expression.parent, exp.Cast): 859 # StrToDate returns DATEs in other dialects (eg. postgres), so 860 # this branch aims to improve the transpilation to clickhouse 861 return f"CAST({strtodate_sql} AS DATE)" 862 863 return strtodate_sql 864 865 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 866 this = expression.this 867 868 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 869 return self.sql(this) 870 871 return super().cast_sql(expression, safe_prefix=safe_prefix) 872 873 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 874 this = self.json_path_part(expression.this) 875 return str(int(this) + 1) if is_int(this) else this 876 877 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 878 return f"AS {self.sql(expression, 'this')}" 879 880 def _any_to_has( 881 self, 882 expression: exp.EQ | exp.NEQ, 883 default: t.Callable[[t.Any], str], 884 prefix: str = "", 885 ) -> str: 886 if isinstance(expression.left, exp.Any): 887 arr = expression.left 888 this = expression.right 889 elif isinstance(expression.right, exp.Any): 890 arr = expression.right 891 this = expression.left 892 else: 893 return default(expression) 894 895 return prefix + self.func("has", arr.this.unnest(), this) 896 897 def eq_sql(self, expression: exp.EQ) -> str: 898 return self._any_to_has(expression, super().eq_sql) 899 900 def neq_sql(self, expression: exp.NEQ) -> str: 901 return self._any_to_has(expression, super().neq_sql, "NOT ") 902 903 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 904 # Manually add a flag to make the search case-insensitive 905 regex = self.func("CONCAT", "'(?i)'", expression.expression) 906 return self.func("match", expression.this, regex) 907 908 def datatype_sql(self, expression: exp.DataType) -> str: 909 # String is the standard ClickHouse type, every other variant is just an alias. 910 # Additionally, any supplied length parameter will be ignored. 911 # 912 # https://clickhouse.com/docs/en/sql-reference/data-types/string 913 if expression.this in self.STRING_TYPE_MAPPING: 914 return "String" 915 916 return super().datatype_sql(expression) 917 918 def cte_sql(self, expression: exp.CTE) -> str: 919 if expression.args.get("scalar"): 920 this = self.sql(expression, "this") 921 alias = self.sql(expression, "alias") 922 return f"{this} AS {alias}" 923 924 return super().cte_sql(expression) 925 926 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 927 return super().after_limit_modifiers(expression) + [ 928 ( 929 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 930 if expression.args.get("settings") 931 else "" 932 ), 933 ( 934 self.seg("FORMAT ") + self.sql(expression, "format") 935 if expression.args.get("format") 936 else "" 937 ), 938 ] 939 940 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 941 params = self.expressions(expression, key="params", flat=True) 942 return self.func(expression.name, *expression.expressions) + f"({params})" 943 944 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 945 return self.func(expression.name, *expression.expressions) 946 947 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 948 return self.anonymousaggfunc_sql(expression) 949 950 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 951 return self.parameterizedagg_sql(expression) 952 953 def placeholder_sql(self, expression: exp.Placeholder) -> str: 954 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 955 956 def oncluster_sql(self, expression: exp.OnCluster) -> str: 957 return f"ON CLUSTER {self.sql(expression, 'this')}" 958 959 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 960 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 961 exp.Properties.Location.POST_NAME 962 ): 963 this_name = self.sql( 964 expression.this if isinstance(expression.this, exp.Schema) else expression, 965 "this", 966 ) 967 this_properties = " ".join( 968 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 969 ) 970 this_schema = self.schema_columns_sql(expression.this) 971 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 972 973 return super().createable_sql(expression, locations) 974 975 def prewhere_sql(self, expression: exp.PreWhere) -> str: 976 this = self.indent(self.sql(expression, "this")) 977 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 978 979 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 980 this = self.sql(expression, "this") 981 this = f" {this}" if this else "" 982 expr = self.sql(expression, "expression") 983 expr = f" {expr}" if expr else "" 984 index_type = self.sql(expression, "index_type") 985 index_type = f" TYPE {index_type}" if index_type else "" 986 granularity = self.sql(expression, "granularity") 987 granularity = f" GRANULARITY {granularity}" if granularity else "" 988 989 return f"INDEX{this}{expr}{index_type}{granularity}" 990 991 def partition_sql(self, expression: exp.Partition) -> str: 992 return f"PARTITION {self.expressions(expression, flat=True)}" 993 994 def partitionid_sql(self, expression: exp.PartitionId) -> str: 995 return f"ID {self.sql(expression.this)}" 996 997 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 998 return ( 999 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1000 ) 1001 1002 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1003 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
109class ClickHouse(Dialect): 110 NORMALIZE_FUNCTIONS: bool | str = False 111 NULL_ORDERING = "nulls_are_last" 112 SUPPORTS_USER_DEFINED_TYPES = False 113 SAFE_DIVISION = True 114 LOG_BASE_FIRST: t.Optional[bool] = None 115 FORCE_EARLY_ALIAS_REF_EXPANSION = True 116 117 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 118 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 119 120 UNESCAPED_SEQUENCES = { 121 "\\0": "\0", 122 } 123 124 class Tokenizer(tokens.Tokenizer): 125 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 126 IDENTIFIERS = ['"', "`"] 127 STRING_ESCAPES = ["'", "\\"] 128 BIT_STRINGS = [("0b", "")] 129 HEX_STRINGS = [("0x", ""), ("0X", "")] 130 HEREDOC_STRINGS = ["$"] 131 132 KEYWORDS = { 133 **tokens.Tokenizer.KEYWORDS, 134 "ATTACH": TokenType.COMMAND, 135 "DATE32": TokenType.DATE32, 136 "DATETIME64": TokenType.DATETIME64, 137 "DICTIONARY": TokenType.DICTIONARY, 138 "ENUM8": TokenType.ENUM8, 139 "ENUM16": TokenType.ENUM16, 140 "FINAL": TokenType.FINAL, 141 "FIXEDSTRING": TokenType.FIXEDSTRING, 142 "FLOAT32": TokenType.FLOAT, 143 "FLOAT64": TokenType.DOUBLE, 144 "GLOBAL": TokenType.GLOBAL, 145 "INT256": TokenType.INT256, 146 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 147 "MAP": TokenType.MAP, 148 "NESTED": TokenType.NESTED, 149 "SAMPLE": TokenType.TABLE_SAMPLE, 150 "TUPLE": TokenType.STRUCT, 151 "UINT128": TokenType.UINT128, 152 "UINT16": TokenType.USMALLINT, 153 "UINT256": TokenType.UINT256, 154 "UINT32": TokenType.UINT, 155 "UINT64": TokenType.UBIGINT, 156 "UINT8": TokenType.UTINYINT, 157 "IPV4": TokenType.IPV4, 158 "IPV6": TokenType.IPV6, 159 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 160 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 161 "SYSTEM": TokenType.COMMAND, 162 "PREWHERE": TokenType.PREWHERE, 163 } 164 KEYWORDS.pop("/*+") 165 166 SINGLE_TOKENS = { 167 **tokens.Tokenizer.SINGLE_TOKENS, 168 "$": TokenType.HEREDOC_STRING, 169 } 170 171 class Parser(parser.Parser): 172 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 173 # * select x from t1 union all select x from t2 limit 1; 174 # * select x from t1 union all (select x from t2 limit 1); 175 MODIFIERS_ATTACHED_TO_SET_OP = False 176 INTERVAL_SPANS = False 177 178 FUNCTIONS = { 179 **parser.Parser.FUNCTIONS, 180 "ANY": exp.AnyValue.from_arg_list, 181 "ARRAYSUM": exp.ArraySum.from_arg_list, 182 "COUNTIF": _build_count_if, 183 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 184 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 185 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 186 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 187 "DATE_FORMAT": _build_date_format, 188 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 189 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 190 "FORMATDATETIME": _build_date_format, 191 "JSONEXTRACTSTRING": build_json_extract_path( 192 exp.JSONExtractScalar, zero_based_indexing=False 193 ), 194 "MAP": parser.build_var_map, 195 "MATCH": exp.RegexpLike.from_arg_list, 196 "RANDCANONICAL": exp.Rand.from_arg_list, 197 "STR_TO_DATE": _build_str_to_date, 198 "TUPLE": exp.Struct.from_arg_list, 199 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 200 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 201 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 202 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 203 "UNIQ": exp.ApproxDistinct.from_arg_list, 204 "XOR": lambda args: exp.Xor(expressions=args), 205 "MD5": exp.MD5Digest.from_arg_list, 206 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 207 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 208 } 209 210 AGG_FUNCTIONS = { 211 "count", 212 "min", 213 "max", 214 "sum", 215 "avg", 216 "any", 217 "stddevPop", 218 "stddevSamp", 219 "varPop", 220 "varSamp", 221 "corr", 222 "covarPop", 223 "covarSamp", 224 "entropy", 225 "exponentialMovingAverage", 226 "intervalLengthSum", 227 "kolmogorovSmirnovTest", 228 "mannWhitneyUTest", 229 "median", 230 "rankCorr", 231 "sumKahan", 232 "studentTTest", 233 "welchTTest", 234 "anyHeavy", 235 "anyLast", 236 "boundingRatio", 237 "first_value", 238 "last_value", 239 "argMin", 240 "argMax", 241 "avgWeighted", 242 "topK", 243 "topKWeighted", 244 "deltaSum", 245 "deltaSumTimestamp", 246 "groupArray", 247 "groupArrayLast", 248 "groupUniqArray", 249 "groupArrayInsertAt", 250 "groupArrayMovingAvg", 251 "groupArrayMovingSum", 252 "groupArraySample", 253 "groupBitAnd", 254 "groupBitOr", 255 "groupBitXor", 256 "groupBitmap", 257 "groupBitmapAnd", 258 "groupBitmapOr", 259 "groupBitmapXor", 260 "sumWithOverflow", 261 "sumMap", 262 "minMap", 263 "maxMap", 264 "skewSamp", 265 "skewPop", 266 "kurtSamp", 267 "kurtPop", 268 "uniq", 269 "uniqExact", 270 "uniqCombined", 271 "uniqCombined64", 272 "uniqHLL12", 273 "uniqTheta", 274 "quantile", 275 "quantiles", 276 "quantileExact", 277 "quantilesExact", 278 "quantileExactLow", 279 "quantilesExactLow", 280 "quantileExactHigh", 281 "quantilesExactHigh", 282 "quantileExactWeighted", 283 "quantilesExactWeighted", 284 "quantileTiming", 285 "quantilesTiming", 286 "quantileTimingWeighted", 287 "quantilesTimingWeighted", 288 "quantileDeterministic", 289 "quantilesDeterministic", 290 "quantileTDigest", 291 "quantilesTDigest", 292 "quantileTDigestWeighted", 293 "quantilesTDigestWeighted", 294 "quantileBFloat16", 295 "quantilesBFloat16", 296 "quantileBFloat16Weighted", 297 "quantilesBFloat16Weighted", 298 "simpleLinearRegression", 299 "stochasticLinearRegression", 300 "stochasticLogisticRegression", 301 "categoricalInformationValue", 302 "contingency", 303 "cramersV", 304 "cramersVBiasCorrected", 305 "theilsU", 306 "maxIntersections", 307 "maxIntersectionsPosition", 308 "meanZTest", 309 "quantileInterpolatedWeighted", 310 "quantilesInterpolatedWeighted", 311 "quantileGK", 312 "quantilesGK", 313 "sparkBar", 314 "sumCount", 315 "largestTriangleThreeBuckets", 316 "histogram", 317 "sequenceMatch", 318 "sequenceCount", 319 "windowFunnel", 320 "retention", 321 "uniqUpTo", 322 "sequenceNextNode", 323 "exponentialTimeDecayedAvg", 324 } 325 326 AGG_FUNCTIONS_SUFFIXES = [ 327 "If", 328 "Array", 329 "ArrayIf", 330 "Map", 331 "SimpleState", 332 "State", 333 "Merge", 334 "MergeState", 335 "ForEach", 336 "Distinct", 337 "OrDefault", 338 "OrNull", 339 "Resample", 340 "ArgMin", 341 "ArgMax", 342 ] 343 344 FUNC_TOKENS = { 345 *parser.Parser.FUNC_TOKENS, 346 TokenType.SET, 347 } 348 349 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 350 351 ID_VAR_TOKENS = { 352 *parser.Parser.ID_VAR_TOKENS, 353 TokenType.LIKE, 354 } 355 356 AGG_FUNC_MAPPING = ( 357 lambda functions, suffixes: { 358 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 359 } 360 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 361 362 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 363 364 FUNCTION_PARSERS = { 365 **parser.Parser.FUNCTION_PARSERS, 366 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 367 "QUANTILE": lambda self: self._parse_quantile(), 368 } 369 370 FUNCTION_PARSERS.pop("MATCH") 371 372 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 373 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 374 375 RANGE_PARSERS = { 376 **parser.Parser.RANGE_PARSERS, 377 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 378 and self._parse_in(this, is_global=True), 379 } 380 381 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 382 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 383 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 384 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 385 386 JOIN_KINDS = { 387 *parser.Parser.JOIN_KINDS, 388 TokenType.ANY, 389 TokenType.ASOF, 390 TokenType.ARRAY, 391 } 392 393 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 394 TokenType.ANY, 395 TokenType.ARRAY, 396 TokenType.FINAL, 397 TokenType.FORMAT, 398 TokenType.SETTINGS, 399 } 400 401 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 402 TokenType.FORMAT, 403 } 404 405 LOG_DEFAULTS_TO_LN = True 406 407 QUERY_MODIFIER_PARSERS = { 408 **parser.Parser.QUERY_MODIFIER_PARSERS, 409 TokenType.SETTINGS: lambda self: ( 410 "settings", 411 self._advance() or self._parse_csv(self._parse_assignment), 412 ), 413 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 414 } 415 416 CONSTRAINT_PARSERS = { 417 **parser.Parser.CONSTRAINT_PARSERS, 418 "INDEX": lambda self: self._parse_index_constraint(), 419 "CODEC": lambda self: self._parse_compress(), 420 } 421 422 ALTER_PARSERS = { 423 **parser.Parser.ALTER_PARSERS, 424 "REPLACE": lambda self: self._parse_alter_table_replace(), 425 } 426 427 SCHEMA_UNNAMED_CONSTRAINTS = { 428 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 429 "INDEX", 430 } 431 432 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 433 index = self._index 434 this = self._parse_bitwise() 435 if self._match(TokenType.FROM): 436 self._retreat(index) 437 return super()._parse_extract() 438 439 # We return Anonymous here because extract and regexpExtract have different semantics, 440 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 441 # `extract('foobar', 'b')` works, but CH crashes for `regexpExtract('foobar', 'b')`. 442 # 443 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 444 self._match(TokenType.COMMA) 445 return self.expression( 446 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 447 ) 448 449 def _parse_assignment(self) -> t.Optional[exp.Expression]: 450 this = super()._parse_assignment() 451 452 if self._match(TokenType.PLACEHOLDER): 453 return self.expression( 454 exp.If, 455 this=this, 456 true=self._parse_assignment(), 457 false=self._match(TokenType.COLON) and self._parse_assignment(), 458 ) 459 460 return this 461 462 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 463 """ 464 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 465 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 466 """ 467 if not self._match(TokenType.L_BRACE): 468 return None 469 470 this = self._parse_id_var() 471 self._match(TokenType.COLON) 472 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 473 self._match_text_seq("IDENTIFIER") and "Identifier" 474 ) 475 476 if not kind: 477 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 478 elif not self._match(TokenType.R_BRACE): 479 self.raise_error("Expecting }") 480 481 return self.expression(exp.Placeholder, this=this, kind=kind) 482 483 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 484 this = super()._parse_in(this) 485 this.set("is_global", is_global) 486 return this 487 488 def _parse_table( 489 self, 490 schema: bool = False, 491 joins: bool = False, 492 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 493 parse_bracket: bool = False, 494 is_db_reference: bool = False, 495 parse_partition: bool = False, 496 ) -> t.Optional[exp.Expression]: 497 this = super()._parse_table( 498 schema=schema, 499 joins=joins, 500 alias_tokens=alias_tokens, 501 parse_bracket=parse_bracket, 502 is_db_reference=is_db_reference, 503 ) 504 505 if self._match(TokenType.FINAL): 506 this = self.expression(exp.Final, this=this) 507 508 return this 509 510 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 511 return super()._parse_position(haystack_first=True) 512 513 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 514 def _parse_cte(self) -> exp.CTE: 515 # WITH <identifier> AS <subquery expression> 516 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 517 518 if not cte: 519 # WITH <expression> AS <identifier> 520 cte = self.expression( 521 exp.CTE, 522 this=self._parse_assignment(), 523 alias=self._parse_table_alias(), 524 scalar=True, 525 ) 526 527 return cte 528 529 def _parse_join_parts( 530 self, 531 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 532 is_global = self._match(TokenType.GLOBAL) and self._prev 533 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 534 535 if kind_pre: 536 kind = self._match_set(self.JOIN_KINDS) and self._prev 537 side = self._match_set(self.JOIN_SIDES) and self._prev 538 return is_global, side, kind 539 540 return ( 541 is_global, 542 self._match_set(self.JOIN_SIDES) and self._prev, 543 self._match_set(self.JOIN_KINDS) and self._prev, 544 ) 545 546 def _parse_join( 547 self, skip_join_token: bool = False, parse_bracket: bool = False 548 ) -> t.Optional[exp.Join]: 549 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 550 if join: 551 join.set("global", join.args.pop("method", None)) 552 553 return join 554 555 def _parse_function( 556 self, 557 functions: t.Optional[t.Dict[str, t.Callable]] = None, 558 anonymous: bool = False, 559 optional_parens: bool = True, 560 any_token: bool = False, 561 ) -> t.Optional[exp.Expression]: 562 expr = super()._parse_function( 563 functions=functions, 564 anonymous=anonymous, 565 optional_parens=optional_parens, 566 any_token=any_token, 567 ) 568 569 func = expr.this if isinstance(expr, exp.Window) else expr 570 571 # Aggregate functions can be split in 2 parts: <func_name><suffix> 572 parts = ( 573 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 574 ) 575 576 if parts: 577 params = self._parse_func_params(func) 578 579 kwargs = { 580 "this": func.this, 581 "expressions": func.expressions, 582 } 583 if parts[1]: 584 kwargs["parts"] = parts 585 exp_class = exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 586 else: 587 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 588 589 kwargs["exp_class"] = exp_class 590 if params: 591 kwargs["params"] = params 592 593 func = self.expression(**kwargs) 594 595 if isinstance(expr, exp.Window): 596 # The window's func was parsed as Anonymous in base parser, fix its 597 # type to be CH style CombinedAnonymousAggFunc / AnonymousAggFunc 598 expr.set("this", func) 599 elif params: 600 # Params have blocked super()._parse_function() from parsing the following window 601 # (if that exists) as they're standing between the function call and the window spec 602 expr = self._parse_window(func) 603 else: 604 expr = func 605 606 return expr 607 608 def _parse_func_params( 609 self, this: t.Optional[exp.Func] = None 610 ) -> t.Optional[t.List[exp.Expression]]: 611 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 612 return self._parse_csv(self._parse_lambda) 613 614 if self._match(TokenType.L_PAREN): 615 params = self._parse_csv(self._parse_lambda) 616 self._match_r_paren(this) 617 return params 618 619 return None 620 621 def _parse_quantile(self) -> exp.Quantile: 622 this = self._parse_lambda() 623 params = self._parse_func_params() 624 if params: 625 return self.expression(exp.Quantile, this=params[0], quantile=this) 626 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 627 628 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 629 return super()._parse_wrapped_id_vars(optional=True) 630 631 def _parse_primary_key( 632 self, wrapped_optional: bool = False, in_props: bool = False 633 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 634 return super()._parse_primary_key( 635 wrapped_optional=wrapped_optional or in_props, in_props=in_props 636 ) 637 638 def _parse_on_property(self) -> t.Optional[exp.Expression]: 639 index = self._index 640 if self._match_text_seq("CLUSTER"): 641 this = self._parse_id_var() 642 if this: 643 return self.expression(exp.OnCluster, this=this) 644 else: 645 self._retreat(index) 646 return None 647 648 def _parse_index_constraint( 649 self, kind: t.Optional[str] = None 650 ) -> exp.IndexColumnConstraint: 651 # INDEX name1 expr TYPE type1(args) GRANULARITY value 652 this = self._parse_id_var() 653 expression = self._parse_assignment() 654 655 index_type = self._match_text_seq("TYPE") and ( 656 self._parse_function() or self._parse_var() 657 ) 658 659 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 660 661 return self.expression( 662 exp.IndexColumnConstraint, 663 this=this, 664 expression=expression, 665 index_type=index_type, 666 granularity=granularity, 667 ) 668 669 def _parse_partition(self) -> t.Optional[exp.Partition]: 670 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 671 if not self._match(TokenType.PARTITION): 672 return None 673 674 if self._match_text_seq("ID"): 675 # Corresponds to the PARTITION ID <string_value> syntax 676 expressions: t.List[exp.Expression] = [ 677 self.expression(exp.PartitionId, this=self._parse_string()) 678 ] 679 else: 680 expressions = self._parse_expressions() 681 682 return self.expression(exp.Partition, expressions=expressions) 683 684 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 685 partition = self._parse_partition() 686 687 if not partition or not self._match(TokenType.FROM): 688 return None 689 690 return self.expression( 691 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 692 ) 693 694 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 695 if not self._match_text_seq("PROJECTION"): 696 return None 697 698 return self.expression( 699 exp.ProjectionDef, 700 this=self._parse_id_var(), 701 expression=self._parse_wrapped(self._parse_statement), 702 ) 703 704 def _parse_constraint(self) -> t.Optional[exp.Expression]: 705 return super()._parse_constraint() or self._parse_projection_def() 706 707 class Generator(generator.Generator): 708 QUERY_HINTS = False 709 STRUCT_DELIMITER = ("(", ")") 710 NVL2_SUPPORTED = False 711 TABLESAMPLE_REQUIRES_PARENS = False 712 TABLESAMPLE_SIZE_IS_ROWS = False 713 TABLESAMPLE_KEYWORDS = "SAMPLE" 714 LAST_DAY_SUPPORTS_DATE_PART = False 715 CAN_IMPLEMENT_ARRAY_ANY = True 716 SUPPORTS_TO_NUMBER = False 717 JOIN_HINTS = False 718 TABLE_HINTS = False 719 EXPLICIT_SET_OP = True 720 GROUPINGS_SEP = "" 721 SET_OP_MODIFIERS = False 722 SUPPORTS_TABLE_ALIAS_COLUMNS = False 723 VALUES_AS_TABLE = False 724 725 STRING_TYPE_MAPPING = { 726 exp.DataType.Type.CHAR: "String", 727 exp.DataType.Type.LONGBLOB: "String", 728 exp.DataType.Type.LONGTEXT: "String", 729 exp.DataType.Type.MEDIUMBLOB: "String", 730 exp.DataType.Type.MEDIUMTEXT: "String", 731 exp.DataType.Type.TINYBLOB: "String", 732 exp.DataType.Type.TINYTEXT: "String", 733 exp.DataType.Type.TEXT: "String", 734 exp.DataType.Type.VARBINARY: "String", 735 exp.DataType.Type.VARCHAR: "String", 736 } 737 738 SUPPORTED_JSON_PATH_PARTS = { 739 exp.JSONPathKey, 740 exp.JSONPathRoot, 741 exp.JSONPathSubscript, 742 } 743 744 TYPE_MAPPING = { 745 **generator.Generator.TYPE_MAPPING, 746 **STRING_TYPE_MAPPING, 747 exp.DataType.Type.ARRAY: "Array", 748 exp.DataType.Type.BIGINT: "Int64", 749 exp.DataType.Type.DATE32: "Date32", 750 exp.DataType.Type.DATETIME64: "DateTime64", 751 exp.DataType.Type.DOUBLE: "Float64", 752 exp.DataType.Type.ENUM: "Enum", 753 exp.DataType.Type.ENUM8: "Enum8", 754 exp.DataType.Type.ENUM16: "Enum16", 755 exp.DataType.Type.FIXEDSTRING: "FixedString", 756 exp.DataType.Type.FLOAT: "Float32", 757 exp.DataType.Type.INT: "Int32", 758 exp.DataType.Type.MEDIUMINT: "Int32", 759 exp.DataType.Type.INT128: "Int128", 760 exp.DataType.Type.INT256: "Int256", 761 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 762 exp.DataType.Type.MAP: "Map", 763 exp.DataType.Type.NESTED: "Nested", 764 exp.DataType.Type.NULLABLE: "Nullable", 765 exp.DataType.Type.SMALLINT: "Int16", 766 exp.DataType.Type.STRUCT: "Tuple", 767 exp.DataType.Type.TINYINT: "Int8", 768 exp.DataType.Type.UBIGINT: "UInt64", 769 exp.DataType.Type.UINT: "UInt32", 770 exp.DataType.Type.UINT128: "UInt128", 771 exp.DataType.Type.UINT256: "UInt256", 772 exp.DataType.Type.USMALLINT: "UInt16", 773 exp.DataType.Type.UTINYINT: "UInt8", 774 exp.DataType.Type.IPV4: "IPv4", 775 exp.DataType.Type.IPV6: "IPv6", 776 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 777 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 778 } 779 780 TRANSFORMS = { 781 **generator.Generator.TRANSFORMS, 782 exp.AnyValue: rename_func("any"), 783 exp.ApproxDistinct: rename_func("uniq"), 784 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 785 exp.ArraySize: rename_func("LENGTH"), 786 exp.ArraySum: rename_func("arraySum"), 787 exp.ArgMax: arg_max_or_min_no_count("argMax"), 788 exp.ArgMin: arg_max_or_min_no_count("argMin"), 789 exp.Array: inline_array_sql, 790 exp.CastToStrType: rename_func("CAST"), 791 exp.CountIf: rename_func("countIf"), 792 exp.CompressColumnConstraint: lambda self, 793 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 794 exp.ComputedColumnConstraint: lambda self, 795 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 796 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 797 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 798 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 799 exp.DateStrToDate: rename_func("toDate"), 800 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 801 exp.Explode: rename_func("arrayJoin"), 802 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 803 exp.IsNan: rename_func("isNaN"), 804 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 805 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 806 exp.JSONPathKey: json_path_key_only_name, 807 exp.JSONPathRoot: lambda *_: "", 808 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 809 exp.Nullif: rename_func("nullIf"), 810 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 811 exp.Pivot: no_pivot_sql, 812 exp.Quantile: _quantile_sql, 813 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 814 exp.Rand: rename_func("randCanonical"), 815 exp.StartsWith: rename_func("startsWith"), 816 exp.StrPosition: lambda self, e: self.func( 817 "position", e.this, e.args.get("substr"), e.args.get("position") 818 ), 819 exp.TimeToStr: lambda self, e: self.func( 820 "DATE_FORMAT", e.this, self.format_time(e), e.args.get("timezone") 821 ), 822 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 823 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 824 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 825 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 826 exp.MD5Digest: rename_func("MD5"), 827 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 828 exp.SHA: rename_func("SHA1"), 829 exp.SHA2: sha256_sql, 830 exp.UnixToTime: _unix_to_time_sql, 831 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 832 exp.Variance: rename_func("varSamp"), 833 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 834 exp.Stddev: rename_func("stddevSamp"), 835 } 836 837 PROPERTIES_LOCATION = { 838 **generator.Generator.PROPERTIES_LOCATION, 839 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 840 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 841 exp.OnCluster: exp.Properties.Location.POST_NAME, 842 } 843 844 # there's no list in docs, but it can be found in Clickhouse code 845 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 846 ON_CLUSTER_TARGETS = { 847 "DATABASE", 848 "TABLE", 849 "VIEW", 850 "DICTIONARY", 851 "INDEX", 852 "FUNCTION", 853 "NAMED COLLECTION", 854 } 855 856 def strtodate_sql(self, expression: exp.StrToDate) -> str: 857 strtodate_sql = self.function_fallback_sql(expression) 858 859 if not isinstance(expression.parent, exp.Cast): 860 # StrToDate returns DATEs in other dialects (eg. postgres), so 861 # this branch aims to improve the transpilation to clickhouse 862 return f"CAST({strtodate_sql} AS DATE)" 863 864 return strtodate_sql 865 866 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 867 this = expression.this 868 869 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 870 return self.sql(this) 871 872 return super().cast_sql(expression, safe_prefix=safe_prefix) 873 874 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 875 this = self.json_path_part(expression.this) 876 return str(int(this) + 1) if is_int(this) else this 877 878 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 879 return f"AS {self.sql(expression, 'this')}" 880 881 def _any_to_has( 882 self, 883 expression: exp.EQ | exp.NEQ, 884 default: t.Callable[[t.Any], str], 885 prefix: str = "", 886 ) -> str: 887 if isinstance(expression.left, exp.Any): 888 arr = expression.left 889 this = expression.right 890 elif isinstance(expression.right, exp.Any): 891 arr = expression.right 892 this = expression.left 893 else: 894 return default(expression) 895 896 return prefix + self.func("has", arr.this.unnest(), this) 897 898 def eq_sql(self, expression: exp.EQ) -> str: 899 return self._any_to_has(expression, super().eq_sql) 900 901 def neq_sql(self, expression: exp.NEQ) -> str: 902 return self._any_to_has(expression, super().neq_sql, "NOT ") 903 904 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 905 # Manually add a flag to make the search case-insensitive 906 regex = self.func("CONCAT", "'(?i)'", expression.expression) 907 return self.func("match", expression.this, regex) 908 909 def datatype_sql(self, expression: exp.DataType) -> str: 910 # String is the standard ClickHouse type, every other variant is just an alias. 911 # Additionally, any supplied length parameter will be ignored. 912 # 913 # https://clickhouse.com/docs/en/sql-reference/data-types/string 914 if expression.this in self.STRING_TYPE_MAPPING: 915 return "String" 916 917 return super().datatype_sql(expression) 918 919 def cte_sql(self, expression: exp.CTE) -> str: 920 if expression.args.get("scalar"): 921 this = self.sql(expression, "this") 922 alias = self.sql(expression, "alias") 923 return f"{this} AS {alias}" 924 925 return super().cte_sql(expression) 926 927 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 928 return super().after_limit_modifiers(expression) + [ 929 ( 930 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 931 if expression.args.get("settings") 932 else "" 933 ), 934 ( 935 self.seg("FORMAT ") + self.sql(expression, "format") 936 if expression.args.get("format") 937 else "" 938 ), 939 ] 940 941 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 942 params = self.expressions(expression, key="params", flat=True) 943 return self.func(expression.name, *expression.expressions) + f"({params})" 944 945 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 946 return self.func(expression.name, *expression.expressions) 947 948 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 949 return self.anonymousaggfunc_sql(expression) 950 951 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 952 return self.parameterizedagg_sql(expression) 953 954 def placeholder_sql(self, expression: exp.Placeholder) -> str: 955 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 956 957 def oncluster_sql(self, expression: exp.OnCluster) -> str: 958 return f"ON CLUSTER {self.sql(expression, 'this')}" 959 960 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 961 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 962 exp.Properties.Location.POST_NAME 963 ): 964 this_name = self.sql( 965 expression.this if isinstance(expression.this, exp.Schema) else expression, 966 "this", 967 ) 968 this_properties = " ".join( 969 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 970 ) 971 this_schema = self.schema_columns_sql(expression.this) 972 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 973 974 return super().createable_sql(expression, locations) 975 976 def prewhere_sql(self, expression: exp.PreWhere) -> str: 977 this = self.indent(self.sql(expression, "this")) 978 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 979 980 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 981 this = self.sql(expression, "this") 982 this = f" {this}" if this else "" 983 expr = self.sql(expression, "expression") 984 expr = f" {expr}" if expr else "" 985 index_type = self.sql(expression, "index_type") 986 index_type = f" TYPE {index_type}" if index_type else "" 987 granularity = self.sql(expression, "granularity") 988 granularity = f" GRANULARITY {granularity}" if granularity else "" 989 990 return f"INDEX{this}{expr}{index_type}{granularity}" 991 992 def partition_sql(self, expression: exp.Partition) -> str: 993 return f"PARTITION {self.expressions(expression, flat=True)}" 994 995 def partitionid_sql(self, expression: exp.PartitionId) -> str: 996 return f"ID {self.sql(expression.this)}" 997 998 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 999 return ( 1000 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1001 ) 1002 1003 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1004 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects "my_id" would refer to "data.my_id" (which is done in _qualify_columns()) across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Specifies the strategy according to which identifiers should be normalized.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- COPY_PARAMS_ARE_CSV
- TYPED_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- TIME_MAPPING
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
124 class Tokenizer(tokens.Tokenizer): 125 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 126 IDENTIFIERS = ['"', "`"] 127 STRING_ESCAPES = ["'", "\\"] 128 BIT_STRINGS = [("0b", "")] 129 HEX_STRINGS = [("0x", ""), ("0X", "")] 130 HEREDOC_STRINGS = ["$"] 131 132 KEYWORDS = { 133 **tokens.Tokenizer.KEYWORDS, 134 "ATTACH": TokenType.COMMAND, 135 "DATE32": TokenType.DATE32, 136 "DATETIME64": TokenType.DATETIME64, 137 "DICTIONARY": TokenType.DICTIONARY, 138 "ENUM8": TokenType.ENUM8, 139 "ENUM16": TokenType.ENUM16, 140 "FINAL": TokenType.FINAL, 141 "FIXEDSTRING": TokenType.FIXEDSTRING, 142 "FLOAT32": TokenType.FLOAT, 143 "FLOAT64": TokenType.DOUBLE, 144 "GLOBAL": TokenType.GLOBAL, 145 "INT256": TokenType.INT256, 146 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 147 "MAP": TokenType.MAP, 148 "NESTED": TokenType.NESTED, 149 "SAMPLE": TokenType.TABLE_SAMPLE, 150 "TUPLE": TokenType.STRUCT, 151 "UINT128": TokenType.UINT128, 152 "UINT16": TokenType.USMALLINT, 153 "UINT256": TokenType.UINT256, 154 "UINT32": TokenType.UINT, 155 "UINT64": TokenType.UBIGINT, 156 "UINT8": TokenType.UTINYINT, 157 "IPV4": TokenType.IPV4, 158 "IPV6": TokenType.IPV6, 159 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 160 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 161 "SYSTEM": TokenType.COMMAND, 162 "PREWHERE": TokenType.PREWHERE, 163 } 164 KEYWORDS.pop("/*+") 165 166 SINGLE_TOKENS = { 167 **tokens.Tokenizer.SINGLE_TOKENS, 168 "$": TokenType.HEREDOC_STRING, 169 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BYTE_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIER_ESCAPES
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
171 class Parser(parser.Parser): 172 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 173 # * select x from t1 union all select x from t2 limit 1; 174 # * select x from t1 union all (select x from t2 limit 1); 175 MODIFIERS_ATTACHED_TO_SET_OP = False 176 INTERVAL_SPANS = False 177 178 FUNCTIONS = { 179 **parser.Parser.FUNCTIONS, 180 "ANY": exp.AnyValue.from_arg_list, 181 "ARRAYSUM": exp.ArraySum.from_arg_list, 182 "COUNTIF": _build_count_if, 183 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 184 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 185 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 186 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 187 "DATE_FORMAT": _build_date_format, 188 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 189 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 190 "FORMATDATETIME": _build_date_format, 191 "JSONEXTRACTSTRING": build_json_extract_path( 192 exp.JSONExtractScalar, zero_based_indexing=False 193 ), 194 "MAP": parser.build_var_map, 195 "MATCH": exp.RegexpLike.from_arg_list, 196 "RANDCANONICAL": exp.Rand.from_arg_list, 197 "STR_TO_DATE": _build_str_to_date, 198 "TUPLE": exp.Struct.from_arg_list, 199 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 200 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 201 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 202 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 203 "UNIQ": exp.ApproxDistinct.from_arg_list, 204 "XOR": lambda args: exp.Xor(expressions=args), 205 "MD5": exp.MD5Digest.from_arg_list, 206 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 207 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 208 } 209 210 AGG_FUNCTIONS = { 211 "count", 212 "min", 213 "max", 214 "sum", 215 "avg", 216 "any", 217 "stddevPop", 218 "stddevSamp", 219 "varPop", 220 "varSamp", 221 "corr", 222 "covarPop", 223 "covarSamp", 224 "entropy", 225 "exponentialMovingAverage", 226 "intervalLengthSum", 227 "kolmogorovSmirnovTest", 228 "mannWhitneyUTest", 229 "median", 230 "rankCorr", 231 "sumKahan", 232 "studentTTest", 233 "welchTTest", 234 "anyHeavy", 235 "anyLast", 236 "boundingRatio", 237 "first_value", 238 "last_value", 239 "argMin", 240 "argMax", 241 "avgWeighted", 242 "topK", 243 "topKWeighted", 244 "deltaSum", 245 "deltaSumTimestamp", 246 "groupArray", 247 "groupArrayLast", 248 "groupUniqArray", 249 "groupArrayInsertAt", 250 "groupArrayMovingAvg", 251 "groupArrayMovingSum", 252 "groupArraySample", 253 "groupBitAnd", 254 "groupBitOr", 255 "groupBitXor", 256 "groupBitmap", 257 "groupBitmapAnd", 258 "groupBitmapOr", 259 "groupBitmapXor", 260 "sumWithOverflow", 261 "sumMap", 262 "minMap", 263 "maxMap", 264 "skewSamp", 265 "skewPop", 266 "kurtSamp", 267 "kurtPop", 268 "uniq", 269 "uniqExact", 270 "uniqCombined", 271 "uniqCombined64", 272 "uniqHLL12", 273 "uniqTheta", 274 "quantile", 275 "quantiles", 276 "quantileExact", 277 "quantilesExact", 278 "quantileExactLow", 279 "quantilesExactLow", 280 "quantileExactHigh", 281 "quantilesExactHigh", 282 "quantileExactWeighted", 283 "quantilesExactWeighted", 284 "quantileTiming", 285 "quantilesTiming", 286 "quantileTimingWeighted", 287 "quantilesTimingWeighted", 288 "quantileDeterministic", 289 "quantilesDeterministic", 290 "quantileTDigest", 291 "quantilesTDigest", 292 "quantileTDigestWeighted", 293 "quantilesTDigestWeighted", 294 "quantileBFloat16", 295 "quantilesBFloat16", 296 "quantileBFloat16Weighted", 297 "quantilesBFloat16Weighted", 298 "simpleLinearRegression", 299 "stochasticLinearRegression", 300 "stochasticLogisticRegression", 301 "categoricalInformationValue", 302 "contingency", 303 "cramersV", 304 "cramersVBiasCorrected", 305 "theilsU", 306 "maxIntersections", 307 "maxIntersectionsPosition", 308 "meanZTest", 309 "quantileInterpolatedWeighted", 310 "quantilesInterpolatedWeighted", 311 "quantileGK", 312 "quantilesGK", 313 "sparkBar", 314 "sumCount", 315 "largestTriangleThreeBuckets", 316 "histogram", 317 "sequenceMatch", 318 "sequenceCount", 319 "windowFunnel", 320 "retention", 321 "uniqUpTo", 322 "sequenceNextNode", 323 "exponentialTimeDecayedAvg", 324 } 325 326 AGG_FUNCTIONS_SUFFIXES = [ 327 "If", 328 "Array", 329 "ArrayIf", 330 "Map", 331 "SimpleState", 332 "State", 333 "Merge", 334 "MergeState", 335 "ForEach", 336 "Distinct", 337 "OrDefault", 338 "OrNull", 339 "Resample", 340 "ArgMin", 341 "ArgMax", 342 ] 343 344 FUNC_TOKENS = { 345 *parser.Parser.FUNC_TOKENS, 346 TokenType.SET, 347 } 348 349 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 350 351 ID_VAR_TOKENS = { 352 *parser.Parser.ID_VAR_TOKENS, 353 TokenType.LIKE, 354 } 355 356 AGG_FUNC_MAPPING = ( 357 lambda functions, suffixes: { 358 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 359 } 360 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 361 362 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 363 364 FUNCTION_PARSERS = { 365 **parser.Parser.FUNCTION_PARSERS, 366 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 367 "QUANTILE": lambda self: self._parse_quantile(), 368 } 369 370 FUNCTION_PARSERS.pop("MATCH") 371 372 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 373 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 374 375 RANGE_PARSERS = { 376 **parser.Parser.RANGE_PARSERS, 377 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 378 and self._parse_in(this, is_global=True), 379 } 380 381 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 382 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 383 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 384 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 385 386 JOIN_KINDS = { 387 *parser.Parser.JOIN_KINDS, 388 TokenType.ANY, 389 TokenType.ASOF, 390 TokenType.ARRAY, 391 } 392 393 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 394 TokenType.ANY, 395 TokenType.ARRAY, 396 TokenType.FINAL, 397 TokenType.FORMAT, 398 TokenType.SETTINGS, 399 } 400 401 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 402 TokenType.FORMAT, 403 } 404 405 LOG_DEFAULTS_TO_LN = True 406 407 QUERY_MODIFIER_PARSERS = { 408 **parser.Parser.QUERY_MODIFIER_PARSERS, 409 TokenType.SETTINGS: lambda self: ( 410 "settings", 411 self._advance() or self._parse_csv(self._parse_assignment), 412 ), 413 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 414 } 415 416 CONSTRAINT_PARSERS = { 417 **parser.Parser.CONSTRAINT_PARSERS, 418 "INDEX": lambda self: self._parse_index_constraint(), 419 "CODEC": lambda self: self._parse_compress(), 420 } 421 422 ALTER_PARSERS = { 423 **parser.Parser.ALTER_PARSERS, 424 "REPLACE": lambda self: self._parse_alter_table_replace(), 425 } 426 427 SCHEMA_UNNAMED_CONSTRAINTS = { 428 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 429 "INDEX", 430 } 431 432 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 433 index = self._index 434 this = self._parse_bitwise() 435 if self._match(TokenType.FROM): 436 self._retreat(index) 437 return super()._parse_extract() 438 439 # We return Anonymous here because extract and regexpExtract have different semantics, 440 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 441 # `extract('foobar', 'b')` works, but CH crashes for `regexpExtract('foobar', 'b')`. 442 # 443 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 444 self._match(TokenType.COMMA) 445 return self.expression( 446 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 447 ) 448 449 def _parse_assignment(self) -> t.Optional[exp.Expression]: 450 this = super()._parse_assignment() 451 452 if self._match(TokenType.PLACEHOLDER): 453 return self.expression( 454 exp.If, 455 this=this, 456 true=self._parse_assignment(), 457 false=self._match(TokenType.COLON) and self._parse_assignment(), 458 ) 459 460 return this 461 462 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 463 """ 464 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 465 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 466 """ 467 if not self._match(TokenType.L_BRACE): 468 return None 469 470 this = self._parse_id_var() 471 self._match(TokenType.COLON) 472 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 473 self._match_text_seq("IDENTIFIER") and "Identifier" 474 ) 475 476 if not kind: 477 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 478 elif not self._match(TokenType.R_BRACE): 479 self.raise_error("Expecting }") 480 481 return self.expression(exp.Placeholder, this=this, kind=kind) 482 483 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 484 this = super()._parse_in(this) 485 this.set("is_global", is_global) 486 return this 487 488 def _parse_table( 489 self, 490 schema: bool = False, 491 joins: bool = False, 492 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 493 parse_bracket: bool = False, 494 is_db_reference: bool = False, 495 parse_partition: bool = False, 496 ) -> t.Optional[exp.Expression]: 497 this = super()._parse_table( 498 schema=schema, 499 joins=joins, 500 alias_tokens=alias_tokens, 501 parse_bracket=parse_bracket, 502 is_db_reference=is_db_reference, 503 ) 504 505 if self._match(TokenType.FINAL): 506 this = self.expression(exp.Final, this=this) 507 508 return this 509 510 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 511 return super()._parse_position(haystack_first=True) 512 513 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 514 def _parse_cte(self) -> exp.CTE: 515 # WITH <identifier> AS <subquery expression> 516 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 517 518 if not cte: 519 # WITH <expression> AS <identifier> 520 cte = self.expression( 521 exp.CTE, 522 this=self._parse_assignment(), 523 alias=self._parse_table_alias(), 524 scalar=True, 525 ) 526 527 return cte 528 529 def _parse_join_parts( 530 self, 531 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 532 is_global = self._match(TokenType.GLOBAL) and self._prev 533 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 534 535 if kind_pre: 536 kind = self._match_set(self.JOIN_KINDS) and self._prev 537 side = self._match_set(self.JOIN_SIDES) and self._prev 538 return is_global, side, kind 539 540 return ( 541 is_global, 542 self._match_set(self.JOIN_SIDES) and self._prev, 543 self._match_set(self.JOIN_KINDS) and self._prev, 544 ) 545 546 def _parse_join( 547 self, skip_join_token: bool = False, parse_bracket: bool = False 548 ) -> t.Optional[exp.Join]: 549 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 550 if join: 551 join.set("global", join.args.pop("method", None)) 552 553 return join 554 555 def _parse_function( 556 self, 557 functions: t.Optional[t.Dict[str, t.Callable]] = None, 558 anonymous: bool = False, 559 optional_parens: bool = True, 560 any_token: bool = False, 561 ) -> t.Optional[exp.Expression]: 562 expr = super()._parse_function( 563 functions=functions, 564 anonymous=anonymous, 565 optional_parens=optional_parens, 566 any_token=any_token, 567 ) 568 569 func = expr.this if isinstance(expr, exp.Window) else expr 570 571 # Aggregate functions can be split in 2 parts: <func_name><suffix> 572 parts = ( 573 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 574 ) 575 576 if parts: 577 params = self._parse_func_params(func) 578 579 kwargs = { 580 "this": func.this, 581 "expressions": func.expressions, 582 } 583 if parts[1]: 584 kwargs["parts"] = parts 585 exp_class = exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 586 else: 587 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 588 589 kwargs["exp_class"] = exp_class 590 if params: 591 kwargs["params"] = params 592 593 func = self.expression(**kwargs) 594 595 if isinstance(expr, exp.Window): 596 # The window's func was parsed as Anonymous in base parser, fix its 597 # type to be CH style CombinedAnonymousAggFunc / AnonymousAggFunc 598 expr.set("this", func) 599 elif params: 600 # Params have blocked super()._parse_function() from parsing the following window 601 # (if that exists) as they're standing between the function call and the window spec 602 expr = self._parse_window(func) 603 else: 604 expr = func 605 606 return expr 607 608 def _parse_func_params( 609 self, this: t.Optional[exp.Func] = None 610 ) -> t.Optional[t.List[exp.Expression]]: 611 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 612 return self._parse_csv(self._parse_lambda) 613 614 if self._match(TokenType.L_PAREN): 615 params = self._parse_csv(self._parse_lambda) 616 self._match_r_paren(this) 617 return params 618 619 return None 620 621 def _parse_quantile(self) -> exp.Quantile: 622 this = self._parse_lambda() 623 params = self._parse_func_params() 624 if params: 625 return self.expression(exp.Quantile, this=params[0], quantile=this) 626 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 627 628 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 629 return super()._parse_wrapped_id_vars(optional=True) 630 631 def _parse_primary_key( 632 self, wrapped_optional: bool = False, in_props: bool = False 633 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 634 return super()._parse_primary_key( 635 wrapped_optional=wrapped_optional or in_props, in_props=in_props 636 ) 637 638 def _parse_on_property(self) -> t.Optional[exp.Expression]: 639 index = self._index 640 if self._match_text_seq("CLUSTER"): 641 this = self._parse_id_var() 642 if this: 643 return self.expression(exp.OnCluster, this=this) 644 else: 645 self._retreat(index) 646 return None 647 648 def _parse_index_constraint( 649 self, kind: t.Optional[str] = None 650 ) -> exp.IndexColumnConstraint: 651 # INDEX name1 expr TYPE type1(args) GRANULARITY value 652 this = self._parse_id_var() 653 expression = self._parse_assignment() 654 655 index_type = self._match_text_seq("TYPE") and ( 656 self._parse_function() or self._parse_var() 657 ) 658 659 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 660 661 return self.expression( 662 exp.IndexColumnConstraint, 663 this=this, 664 expression=expression, 665 index_type=index_type, 666 granularity=granularity, 667 ) 668 669 def _parse_partition(self) -> t.Optional[exp.Partition]: 670 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 671 if not self._match(TokenType.PARTITION): 672 return None 673 674 if self._match_text_seq("ID"): 675 # Corresponds to the PARTITION ID <string_value> syntax 676 expressions: t.List[exp.Expression] = [ 677 self.expression(exp.PartitionId, this=self._parse_string()) 678 ] 679 else: 680 expressions = self._parse_expressions() 681 682 return self.expression(exp.Partition, expressions=expressions) 683 684 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 685 partition = self._parse_partition() 686 687 if not partition or not self._match(TokenType.FROM): 688 return None 689 690 return self.expression( 691 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 692 ) 693 694 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 695 if not self._match_text_seq("PROJECTION"): 696 return None 697 698 return self.expression( 699 exp.ProjectionDef, 700 this=self._parse_id_var(), 701 expression=self._parse_wrapped(self._parse_statement), 702 ) 703 704 def _parse_constraint(self) -> t.Optional[exp.Expression]: 705 return super()._parse_constraint() or self._parse_projection_def()
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_HINTS
- LAMBDAS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- PROPERTY_PARSERS
- ALTER_ALTER_PARSERS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
707 class Generator(generator.Generator): 708 QUERY_HINTS = False 709 STRUCT_DELIMITER = ("(", ")") 710 NVL2_SUPPORTED = False 711 TABLESAMPLE_REQUIRES_PARENS = False 712 TABLESAMPLE_SIZE_IS_ROWS = False 713 TABLESAMPLE_KEYWORDS = "SAMPLE" 714 LAST_DAY_SUPPORTS_DATE_PART = False 715 CAN_IMPLEMENT_ARRAY_ANY = True 716 SUPPORTS_TO_NUMBER = False 717 JOIN_HINTS = False 718 TABLE_HINTS = False 719 EXPLICIT_SET_OP = True 720 GROUPINGS_SEP = "" 721 SET_OP_MODIFIERS = False 722 SUPPORTS_TABLE_ALIAS_COLUMNS = False 723 VALUES_AS_TABLE = False 724 725 STRING_TYPE_MAPPING = { 726 exp.DataType.Type.CHAR: "String", 727 exp.DataType.Type.LONGBLOB: "String", 728 exp.DataType.Type.LONGTEXT: "String", 729 exp.DataType.Type.MEDIUMBLOB: "String", 730 exp.DataType.Type.MEDIUMTEXT: "String", 731 exp.DataType.Type.TINYBLOB: "String", 732 exp.DataType.Type.TINYTEXT: "String", 733 exp.DataType.Type.TEXT: "String", 734 exp.DataType.Type.VARBINARY: "String", 735 exp.DataType.Type.VARCHAR: "String", 736 } 737 738 SUPPORTED_JSON_PATH_PARTS = { 739 exp.JSONPathKey, 740 exp.JSONPathRoot, 741 exp.JSONPathSubscript, 742 } 743 744 TYPE_MAPPING = { 745 **generator.Generator.TYPE_MAPPING, 746 **STRING_TYPE_MAPPING, 747 exp.DataType.Type.ARRAY: "Array", 748 exp.DataType.Type.BIGINT: "Int64", 749 exp.DataType.Type.DATE32: "Date32", 750 exp.DataType.Type.DATETIME64: "DateTime64", 751 exp.DataType.Type.DOUBLE: "Float64", 752 exp.DataType.Type.ENUM: "Enum", 753 exp.DataType.Type.ENUM8: "Enum8", 754 exp.DataType.Type.ENUM16: "Enum16", 755 exp.DataType.Type.FIXEDSTRING: "FixedString", 756 exp.DataType.Type.FLOAT: "Float32", 757 exp.DataType.Type.INT: "Int32", 758 exp.DataType.Type.MEDIUMINT: "Int32", 759 exp.DataType.Type.INT128: "Int128", 760 exp.DataType.Type.INT256: "Int256", 761 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 762 exp.DataType.Type.MAP: "Map", 763 exp.DataType.Type.NESTED: "Nested", 764 exp.DataType.Type.NULLABLE: "Nullable", 765 exp.DataType.Type.SMALLINT: "Int16", 766 exp.DataType.Type.STRUCT: "Tuple", 767 exp.DataType.Type.TINYINT: "Int8", 768 exp.DataType.Type.UBIGINT: "UInt64", 769 exp.DataType.Type.UINT: "UInt32", 770 exp.DataType.Type.UINT128: "UInt128", 771 exp.DataType.Type.UINT256: "UInt256", 772 exp.DataType.Type.USMALLINT: "UInt16", 773 exp.DataType.Type.UTINYINT: "UInt8", 774 exp.DataType.Type.IPV4: "IPv4", 775 exp.DataType.Type.IPV6: "IPv6", 776 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 777 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 778 } 779 780 TRANSFORMS = { 781 **generator.Generator.TRANSFORMS, 782 exp.AnyValue: rename_func("any"), 783 exp.ApproxDistinct: rename_func("uniq"), 784 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 785 exp.ArraySize: rename_func("LENGTH"), 786 exp.ArraySum: rename_func("arraySum"), 787 exp.ArgMax: arg_max_or_min_no_count("argMax"), 788 exp.ArgMin: arg_max_or_min_no_count("argMin"), 789 exp.Array: inline_array_sql, 790 exp.CastToStrType: rename_func("CAST"), 791 exp.CountIf: rename_func("countIf"), 792 exp.CompressColumnConstraint: lambda self, 793 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 794 exp.ComputedColumnConstraint: lambda self, 795 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 796 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 797 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 798 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 799 exp.DateStrToDate: rename_func("toDate"), 800 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 801 exp.Explode: rename_func("arrayJoin"), 802 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 803 exp.IsNan: rename_func("isNaN"), 804 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 805 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 806 exp.JSONPathKey: json_path_key_only_name, 807 exp.JSONPathRoot: lambda *_: "", 808 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 809 exp.Nullif: rename_func("nullIf"), 810 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 811 exp.Pivot: no_pivot_sql, 812 exp.Quantile: _quantile_sql, 813 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 814 exp.Rand: rename_func("randCanonical"), 815 exp.StartsWith: rename_func("startsWith"), 816 exp.StrPosition: lambda self, e: self.func( 817 "position", e.this, e.args.get("substr"), e.args.get("position") 818 ), 819 exp.TimeToStr: lambda self, e: self.func( 820 "DATE_FORMAT", e.this, self.format_time(e), e.args.get("timezone") 821 ), 822 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 823 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 824 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 825 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 826 exp.MD5Digest: rename_func("MD5"), 827 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 828 exp.SHA: rename_func("SHA1"), 829 exp.SHA2: sha256_sql, 830 exp.UnixToTime: _unix_to_time_sql, 831 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 832 exp.Variance: rename_func("varSamp"), 833 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 834 exp.Stddev: rename_func("stddevSamp"), 835 } 836 837 PROPERTIES_LOCATION = { 838 **generator.Generator.PROPERTIES_LOCATION, 839 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 840 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 841 exp.OnCluster: exp.Properties.Location.POST_NAME, 842 } 843 844 # there's no list in docs, but it can be found in Clickhouse code 845 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 846 ON_CLUSTER_TARGETS = { 847 "DATABASE", 848 "TABLE", 849 "VIEW", 850 "DICTIONARY", 851 "INDEX", 852 "FUNCTION", 853 "NAMED COLLECTION", 854 } 855 856 def strtodate_sql(self, expression: exp.StrToDate) -> str: 857 strtodate_sql = self.function_fallback_sql(expression) 858 859 if not isinstance(expression.parent, exp.Cast): 860 # StrToDate returns DATEs in other dialects (eg. postgres), so 861 # this branch aims to improve the transpilation to clickhouse 862 return f"CAST({strtodate_sql} AS DATE)" 863 864 return strtodate_sql 865 866 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 867 this = expression.this 868 869 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 870 return self.sql(this) 871 872 return super().cast_sql(expression, safe_prefix=safe_prefix) 873 874 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 875 this = self.json_path_part(expression.this) 876 return str(int(this) + 1) if is_int(this) else this 877 878 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 879 return f"AS {self.sql(expression, 'this')}" 880 881 def _any_to_has( 882 self, 883 expression: exp.EQ | exp.NEQ, 884 default: t.Callable[[t.Any], str], 885 prefix: str = "", 886 ) -> str: 887 if isinstance(expression.left, exp.Any): 888 arr = expression.left 889 this = expression.right 890 elif isinstance(expression.right, exp.Any): 891 arr = expression.right 892 this = expression.left 893 else: 894 return default(expression) 895 896 return prefix + self.func("has", arr.this.unnest(), this) 897 898 def eq_sql(self, expression: exp.EQ) -> str: 899 return self._any_to_has(expression, super().eq_sql) 900 901 def neq_sql(self, expression: exp.NEQ) -> str: 902 return self._any_to_has(expression, super().neq_sql, "NOT ") 903 904 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 905 # Manually add a flag to make the search case-insensitive 906 regex = self.func("CONCAT", "'(?i)'", expression.expression) 907 return self.func("match", expression.this, regex) 908 909 def datatype_sql(self, expression: exp.DataType) -> str: 910 # String is the standard ClickHouse type, every other variant is just an alias. 911 # Additionally, any supplied length parameter will be ignored. 912 # 913 # https://clickhouse.com/docs/en/sql-reference/data-types/string 914 if expression.this in self.STRING_TYPE_MAPPING: 915 return "String" 916 917 return super().datatype_sql(expression) 918 919 def cte_sql(self, expression: exp.CTE) -> str: 920 if expression.args.get("scalar"): 921 this = self.sql(expression, "this") 922 alias = self.sql(expression, "alias") 923 return f"{this} AS {alias}" 924 925 return super().cte_sql(expression) 926 927 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 928 return super().after_limit_modifiers(expression) + [ 929 ( 930 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 931 if expression.args.get("settings") 932 else "" 933 ), 934 ( 935 self.seg("FORMAT ") + self.sql(expression, "format") 936 if expression.args.get("format") 937 else "" 938 ), 939 ] 940 941 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 942 params = self.expressions(expression, key="params", flat=True) 943 return self.func(expression.name, *expression.expressions) + f"({params})" 944 945 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 946 return self.func(expression.name, *expression.expressions) 947 948 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 949 return self.anonymousaggfunc_sql(expression) 950 951 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 952 return self.parameterizedagg_sql(expression) 953 954 def placeholder_sql(self, expression: exp.Placeholder) -> str: 955 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 956 957 def oncluster_sql(self, expression: exp.OnCluster) -> str: 958 return f"ON CLUSTER {self.sql(expression, 'this')}" 959 960 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 961 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 962 exp.Properties.Location.POST_NAME 963 ): 964 this_name = self.sql( 965 expression.this if isinstance(expression.this, exp.Schema) else expression, 966 "this", 967 ) 968 this_properties = " ".join( 969 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 970 ) 971 this_schema = self.schema_columns_sql(expression.this) 972 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 973 974 return super().createable_sql(expression, locations) 975 976 def prewhere_sql(self, expression: exp.PreWhere) -> str: 977 this = self.indent(self.sql(expression, "this")) 978 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 979 980 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 981 this = self.sql(expression, "this") 982 this = f" {this}" if this else "" 983 expr = self.sql(expression, "expression") 984 expr = f" {expr}" if expr else "" 985 index_type = self.sql(expression, "index_type") 986 index_type = f" TYPE {index_type}" if index_type else "" 987 granularity = self.sql(expression, "granularity") 988 granularity = f" GRANULARITY {granularity}" if granularity else "" 989 990 return f"INDEX{this}{expr}{index_type}{granularity}" 991 992 def partition_sql(self, expression: exp.Partition) -> str: 993 return f"PARTITION {self.expressions(expression, flat=True)}" 994 995 def partitionid_sql(self, expression: exp.PartitionId) -> str: 996 return f"ID {self.sql(expression.this)}" 997 998 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 999 return ( 1000 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1001 ) 1002 1003 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1004 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
856 def strtodate_sql(self, expression: exp.StrToDate) -> str: 857 strtodate_sql = self.function_fallback_sql(expression) 858 859 if not isinstance(expression.parent, exp.Cast): 860 # StrToDate returns DATEs in other dialects (eg. postgres), so 861 # this branch aims to improve the transpilation to clickhouse 862 return f"CAST({strtodate_sql} AS DATE)" 863 864 return strtodate_sql
866 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 867 this = expression.this 868 869 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 870 return self.sql(this) 871 872 return super().cast_sql(expression, safe_prefix=safe_prefix)
909 def datatype_sql(self, expression: exp.DataType) -> str: 910 # String is the standard ClickHouse type, every other variant is just an alias. 911 # Additionally, any supplied length parameter will be ignored. 912 # 913 # https://clickhouse.com/docs/en/sql-reference/data-types/string 914 if expression.this in self.STRING_TYPE_MAPPING: 915 return "String" 916 917 return super().datatype_sql(expression)
927 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 928 return super().after_limit_modifiers(expression) + [ 929 ( 930 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 931 if expression.args.get("settings") 932 else "" 933 ), 934 ( 935 self.seg("FORMAT ") + self.sql(expression, "format") 936 if expression.args.get("format") 937 else "" 938 ), 939 ]
960 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 961 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 962 exp.Properties.Location.POST_NAME 963 ): 964 this_name = self.sql( 965 expression.this if isinstance(expression.this, exp.Schema) else expression, 966 "this", 967 ) 968 this_properties = " ".join( 969 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 970 ) 971 this_schema = self.schema_columns_sql(expression.this) 972 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 973 974 return super().createable_sql(expression, locations)
980 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 981 this = self.sql(expression, "this") 982 this = f" {this}" if this else "" 983 expr = self.sql(expression, "expression") 984 expr = f" {expr}" if expr else "" 985 index_type = self.sql(expression, "index_type") 986 index_type = f" TYPE {index_type}" if index_type else "" 987 granularity = self.sql(expression, "granularity") 988 granularity = f" GRANULARITY {granularity}" if granularity else "" 989 990 return f"INDEX{this}{expr}{index_type}{granularity}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_EXPLODING_PROJECTIONS
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql