Import upstream version 0.2.6
Debian Janitor
11 months ago
0 | 0 | [flake8] |
1 | 1 | filename = *.py, *.pyx |
2 | exclude = perf | |
2 | 3 | per-file-ignores = |
3 | 4 | clickhouse_driver/columns/largeint.pyx: E225, E226, E227, E999 |
4 | 5 | clickhouse_driver/bufferedreader.pyx: E225, E226, E227, E999 |
1 | 1 | name: build |
2 | 2 | jobs: |
3 | 3 | tests: |
4 | runs-on: ubuntu-20.04 | |
4 | runs-on: ubuntu-22.04 | |
5 | 5 | strategy: |
6 | 6 | matrix: |
7 | 7 | use-numpy: |
8 | 8 | - 0 |
9 | 9 | python-version: |
10 | - "3.6" | |
11 | 10 | - "3.7" |
12 | 11 | - "3.8" |
13 | 12 | - "3.9" |
16 | 15 | - "pypy-3.6" |
17 | 16 | - "pypy-3.7" |
18 | 17 | clickhouse-version: |
18 | - 23.2.6.34 | |
19 | - 22.9.5.25 | |
19 | 20 | - 22.2.3.5 |
20 | 21 | - 21.12.3.32 |
21 | - 21.9.3.30 | |
22 | 22 | - 21.9.3.30 |
23 | 23 | - 21.4.6.55 |
24 | 24 | - 21.3.10.1 |
117 | 117 | valgrind: |
118 | 118 | name: Valgrind check |
119 | 119 | needs: tests |
120 | runs-on: ubuntu-20.04 | |
120 | runs-on: ubuntu-22.04 | |
121 | 121 | steps: |
122 | 122 | - uses: actions/checkout@v2 |
123 | 123 | - name: Set up Python |
165 | 165 | wheels-linux: |
166 | 166 | name: Wheels for Linux |
167 | 167 | needs: valgrind |
168 | runs-on: ubuntu-20.04 | |
168 | runs-on: ubuntu-22.04 | |
169 | 169 | steps: |
170 | 170 | - uses: actions/checkout@v2 |
171 | 171 | - name: Set up Python |
278 | 278 | wheels-linux-non-x86: |
279 | 279 | name: Wheels for Linux non-x86 |
280 | 280 | needs: valgrind |
281 | runs-on: ubuntu-20.04 | |
281 | runs-on: ubuntu-22.04 | |
282 | 282 | strategy: |
283 | 283 | matrix: |
284 | 284 | arch: |
0 | on: [push, pull_request] | |
1 | name: build-docs | |
2 | jobs: | |
3 | tests: | |
4 | runs-on: ubuntu-22.04 | |
5 | name: Build docs | |
6 | steps: | |
7 | - uses: actions/checkout@v2 | |
8 | - name: Set up Python | |
9 | uses: actions/setup-python@v2 | |
10 | with: | |
11 | python-version: 3.11 | |
12 | architecture: x64 | |
13 | - name: Update tools | |
14 | run: pip install --upgrade pip setuptools wheel | |
15 | - name: Install sphinx | |
16 | run: pip install sphinx | |
17 | - name: Install package | |
18 | run: pip install -e . | |
19 | - name: Build docs | |
20 | run: cd docs && make html |
0 | # Byte-compiled / optimized / DLL files | |
1 | __pycache__/ | |
2 | *.py[cod] | |
3 | *$py.class | |
4 | ||
5 | # C extensions | |
6 | *.so | |
7 | ||
8 | # Distribution / packaging | |
9 | .Python | |
10 | env/ | |
11 | build/ | |
12 | develop-eggs/ | |
13 | dist/ | |
14 | downloads/ | |
15 | eggs/ | |
16 | .eggs/ | |
17 | lib/ | |
18 | lib64/ | |
19 | parts/ | |
20 | sdist/ | |
21 | var/ | |
22 | *.egg-info/ | |
23 | .installed.cfg | |
24 | *.egg | |
25 | ||
26 | # PyInstaller | |
27 | # Usually these files are written by a python script from a template | |
28 | # before PyInstaller builds the exe, so as to inject date/other infos into it. | |
29 | *.manifest | |
30 | *.spec | |
31 | ||
32 | # Installer logs | |
33 | pip-log.txt | |
34 | pip-delete-this-directory.txt | |
35 | ||
36 | # Unit test / coverage reports | |
37 | htmlcov/ | |
38 | .tox/ | |
39 | .coverage | |
40 | .coverage.* | |
41 | .cache | |
42 | nosetests.xml | |
43 | coverage.xml | |
44 | *,cover | |
45 | .hypothesis/ | |
46 | ||
47 | # Translations | |
48 | *.mo | |
49 | *.pot | |
50 | ||
51 | # Django stuff: | |
52 | *.log | |
53 | local_settings.py | |
54 | ||
55 | # Flask stuff: | |
56 | instance/ | |
57 | .webassets-cache | |
58 | ||
59 | # Scrapy stuff: | |
60 | .scrapy | |
61 | ||
62 | # Sphinx documentation | |
63 | docs/_build/ | |
64 | ||
65 | # PyBuilder | |
66 | target/ | |
67 | ||
68 | # IPython Notebook | |
69 | .ipynb_checkpoints | |
70 | ||
71 | # pyenv | |
72 | .python-version | |
73 | ||
74 | # celery beat schedule file | |
75 | celerybeat-schedule | |
76 | ||
77 | # dotenv | |
78 | .env | |
79 | ||
80 | # virtualenv | |
81 | venv/ | |
82 | ENV/ | |
83 | ||
84 | # Spyder project settings | |
85 | .spyderproject | |
86 | ||
87 | # Rope project settings | |
88 | .ropeproject | |
89 | ||
90 | # PyCharm project settings | |
91 | .idea/ |
0 | 0 | # Changelog |
1 | 1 | |
2 | 2 | ## Unreleased |
3 | ||
4 | ## [0.2.6] - 2023-05-02 | |
5 | ### Added | |
6 | - JSON type. Solves issue [#320](https://github.com/mymarilyn/clickhouse-driver/issues/300). | |
7 | - On demand client revision downgrading. | |
8 | - Server-side query templating. | |
9 | - Sparse data type deserialization. | |
10 | - TCP keepalive. | |
11 | - [NumPy] Optional dataframe column names replacing. Pull request [#361](https://github.com/mymarilyn/clickhouse-driver/pull/361) by [notsovitalik](https://github.com/notsovitalik). | |
12 | - Substitution for parameters with time type. Solves issue [#359](https://github.com/mymarilyn/clickhouse-driver/issues/359). Pull request [#360](https://github.com/mymarilyn/clickhouse-driver/pull/360) by [ghazi-git](https://github.com/ghazi-git). | |
13 | ||
14 | ### Fixed | |
15 | - Client construction with round_robin. Solves issue [#373](https://github.com/mymarilyn/clickhouse-driver/issues/373). | |
16 | - Column state prefixes reading and writing. Solves issue [#372](https://github.com/mymarilyn/clickhouse-driver/issues/372). | |
17 | - Inserts to a nullable LowCardinality columns. Solves issue [#363](https://github.com/mymarilyn/clickhouse-driver/issues/363). Pull request [#366](https://github.com/mymarilyn/clickhouse-driver/pull/366) by [Dmitry-k42](https://github.com/Dmitry-k42). | |
18 | - [NumPy] Avoid unnecessary timezone conversion. Solves issue [#354](https://github.com/mymarilyn/clickhouse-driver/issues/354). Pull request [#355](https://github.com/mymarilyn/clickhouse-driver/pull/355) by [edwinwzhe](https://github.com/edwinwzhe). | |
19 | ||
20 | ### Changed | |
21 | - Protocol version bumped to 54459. | |
22 | - [NumPy] Speed-up reading Datetime64. Pull request [#365](https://github.com/mymarilyn/clickhouse-driver/pull/365) by [joelgibson](https://github.com/joelgibson). | |
23 | ||
24 | ### Removed | |
25 | - Python 3.6 support. | |
3 | 26 | |
4 | 27 | ## [0.2.5] - 2022-11-27 |
5 | 28 | ### Added |
419 | 442 | - Date/DateTime types. |
420 | 443 | - String types. |
421 | 444 | |
422 | [Unreleased]: https://github.com/mymarilyn/clickhouse-driver/compare/0.2.5...HEAD | |
445 | [Unreleased]: https://github.com/mymarilyn/clickhouse-driver/compare/0.2.6...HEAD | |
446 | [0.2.6]: https://github.com/mymarilyn/clickhouse-driver/compare/0.2.5...0.2.6 | |
423 | 447 | [0.2.5]: https://github.com/mymarilyn/clickhouse-driver/compare/0.2.4...0.2.5 |
424 | 448 | [0.2.4]: https://github.com/mymarilyn/clickhouse-driver/compare/0.2.3...0.2.4 |
425 | 449 | [0.2.3]: https://github.com/mymarilyn/clickhouse-driver/compare/0.2.2...0.2.3 |
2 | 2 | from .dbapi import connect |
3 | 3 | |
4 | 4 | |
5 | VERSION = (0, 2, 5) | |
5 | VERSION = (0, 2, 6) | |
6 | 6 | __version__ = '.'.join(str(x) for x in VERSION) |
7 | 7 | |
8 | 8 | __all__ = ['Client', 'connect'] |
0 | from .columns.util import get_inner_spec, get_inner_columns_with_types | |
0 | 1 | from .reader import read_varint, read_binary_uint8, read_binary_int32 |
1 | 2 | from .varint import write_varint |
2 | 3 | from .writer import write_binary_uint8, write_binary_int32 |
3 | from .columns import nestedcolumn | |
4 | 4 | |
5 | 5 | |
6 | 6 | class BlockInfo(object): |
171 | 171 | for name, type_ in columns_with_types: |
172 | 172 | cwt = None |
173 | 173 | if type_.startswith('Nested'): |
174 | cwt = nestedcolumn.get_columns_with_types(type_) | |
174 | inner_spec = get_inner_spec('Nested', type_) | |
175 | cwt = get_inner_columns_with_types(inner_spec) | |
175 | 176 | columns_with_cwt.append((name, cwt)) |
176 | 177 | |
177 | 178 | for i, row in enumerate(data): |
56 | 56 | * ``round_robin`` -- If ``alt_hosts`` are provided the query will be |
57 | 57 | executed on host picked with round-robin algorithm. |
58 | 58 | New in version *0.2.5*. |
59 | * ``namedtuple_as_json`` -- Controls named tuple and nested types | |
60 | deserialization. To interpret these column alongside | |
61 | with ``allow_experimental_object_type=1`` as Python | |
62 | tuple set ``namedtuple_as_json`` to ``False``. | |
63 | Default: True. | |
64 | New in version *0.2.6*. | |
59 | 65 | """ |
60 | 66 | |
61 | 67 | available_client_settings = ( |
66 | 72 | 'opentelemetry_traceparent', |
67 | 73 | 'opentelemetry_tracestate', |
68 | 74 | 'quota_key', |
69 | 'input_format_null_as_default' | |
75 | 'input_format_null_as_default', | |
76 | 'namedtuple_as_json' | |
70 | 77 | ) |
71 | 78 | |
72 | 79 | def __init__(self, *args, **kwargs): |
96 | 103 | ), |
97 | 104 | 'input_format_null_as_default': self.settings.pop( |
98 | 105 | 'input_format_null_as_default', False |
106 | ), | |
107 | 'namedtuple_as_json': self.settings.pop( | |
108 | 'namedtuple_as_json', True | |
99 | 109 | ) |
100 | 110 | } |
101 | 111 | |
124 | 134 | url = urlparse('clickhouse://' + host) |
125 | 135 | |
126 | 136 | connection_kwargs = kwargs.copy() |
127 | if len(args) > 2: | |
128 | # port as positional argument | |
137 | num_args = len(args) | |
138 | if num_args >= 2: | |
139 | # host and port as positional arguments | |
129 | 140 | connection_args = (url.hostname, url.port) + args[2:] |
141 | elif num_args >= 1: | |
142 | # host as positional and port as keyword argument | |
143 | connection_args = (url.hostname, ) + args[1:] | |
144 | connection_kwargs['port'] = url.port | |
130 | 145 | else: |
131 | # port as keyword argument | |
132 | connection_args = (url.hostname, ) + args[1:] | |
146 | # host and port as keyword arguments | |
147 | connection_args = tuple() | |
148 | connection_kwargs['host'] = url.hostname | |
133 | 149 | connection_kwargs['port'] = url.port |
134 | 150 | |
135 | 151 | connection = Connection(*connection_args, **connection_kwargs) |
437 | 453 | |
438 | 454 | def query_dataframe( |
439 | 455 | self, query, params=None, external_tables=None, query_id=None, |
440 | settings=None): | |
456 | settings=None, replace_nonwords=True): | |
441 | 457 | """ |
442 | 458 | *New in version 0.2.0.* |
443 | 459 | |
452 | 468 | ClickHouse server will generate it. |
453 | 469 | :param settings: dictionary of query settings. |
454 | 470 | Defaults to ``None`` (no additional settings). |
471 | :param replace_nonwords: boolean to replace non-words in column names | |
472 | to underscores. Defaults to ``True``. | |
455 | 473 | :return: pandas DataFrame. |
456 | 474 | """ |
457 | 475 | |
466 | 484 | settings=settings |
467 | 485 | ) |
468 | 486 | |
469 | columns = [re.sub(r'\W', '_', name) for name, type_ in columns] | |
487 | columns = [name for name, type_ in columns] | |
488 | if replace_nonwords: | |
489 | columns = [re.sub(r'\W', '_', x) for x in columns] | |
490 | ||
470 | 491 | return pd.DataFrame( |
471 | 492 | {col: d for d, col in zip(data, columns)}, columns=columns |
472 | 493 | ) |
528 | 549 | query, params, self.connection.context |
529 | 550 | ) |
530 | 551 | |
531 | self.connection.send_query(query, query_id=query_id) | |
552 | self.connection.send_query(query, query_id=query_id, params=params) | |
532 | 553 | self.connection.send_external_tables(external_tables, |
533 | 554 | types_check=types_check) |
534 | 555 | return self.receive_result(with_column_types=with_column_types, |
543 | 564 | query = self.substitute_params( |
544 | 565 | query, params, self.connection.context |
545 | 566 | ) |
546 | ||
547 | self.connection.send_query(query, query_id=query_id) | |
567 | self.connection.send_query(query, query_id=query_id, params=params) | |
548 | 568 | self.connection.send_external_tables(external_tables, |
549 | 569 | types_check=types_check) |
550 | 570 | return self.receive_result(with_column_types=with_column_types, |
560 | 580 | query, params, self.connection.context |
561 | 581 | ) |
562 | 582 | |
563 | self.connection.send_query(query, query_id=query_id) | |
583 | self.connection.send_query(query, query_id=query_id, params=params) | |
564 | 584 | self.connection.send_external_tables(external_tables, |
565 | 585 | types_check=types_check) |
566 | 586 | return self.iter_receive_result(with_column_types=with_column_types) |
771 | 791 | |
772 | 792 | elif name == 'settings_is_important': |
773 | 793 | kwargs[name] = asbool(value) |
794 | ||
795 | elif name == 'tcp_keepalive': | |
796 | try: | |
797 | kwargs[name] = asbool(value) | |
798 | except ValueError: | |
799 | parts = value.split(',') | |
800 | kwargs[name] = ( | |
801 | float(parts[0]), float(parts[1]), int(parts[2]) | |
802 | ) | |
803 | elif name == 'client_revision': | |
804 | kwargs[name] = int(value) | |
774 | 805 | |
775 | 806 | # ssl |
776 | 807 | elif name == 'verify': |
27 | 27 | client_version_major = defines.CLIENT_VERSION_MAJOR |
28 | 28 | client_version_minor = defines.CLIENT_VERSION_MINOR |
29 | 29 | client_version_patch = defines.CLIENT_VERSION_PATCH |
30 | client_revision = defines.CLIENT_REVISION | |
31 | 30 | interface = Interface.TCP |
32 | 31 | |
33 | 32 | initial_user = '' |
34 | 33 | initial_query_id = '' |
35 | 34 | initial_address = '0.0.0.0:0' |
36 | 35 | |
37 | def __init__(self, client_name, context): | |
36 | def __init__(self, client_name, context, client_revision): | |
38 | 37 | self.query_kind = ClientInfo.QueryKind.NO_QUERY |
39 | 38 | |
40 | 39 | try: |
43 | 42 | self.os_user = '' |
44 | 43 | self.client_hostname = socket.gethostname() |
45 | 44 | self.client_name = client_name |
45 | self.client_revision = client_revision | |
46 | 46 | |
47 | 47 | self.client_trace_context = OpenTelemetryTraceContext( |
48 | 48 | context.client_settings['opentelemetry_traceparent'], |
27 | 27 | py_types = (list, tuple) |
28 | 28 | |
29 | 29 | def __init__(self, nested_column, **kwargs): |
30 | self.size_column = UInt64Column() | |
30 | self.init_kwargs = kwargs | |
31 | self.size_column = UInt64Column(**kwargs) | |
31 | 32 | self.nested_column = nested_column |
32 | 33 | self._write_depth_0_size = True |
33 | 34 | super(ArrayColumn, self).__init__(**kwargs) |
36 | 37 | def write_data(self, data, buf): |
37 | 38 | # Column of Array(T) is stored in "compact" format and passed to server |
38 | 39 | # wrapped into another Array without size of wrapper array. |
39 | self.nested_column = ArrayColumn(self.nested_column) | |
40 | self.nested_column = ArrayColumn( | |
41 | self.nested_column, **self.init_kwargs | |
42 | ) | |
40 | 43 | self.nested_column.nullable = self.nullable |
41 | 44 | self.nullable = False |
42 | 45 | self._write_depth_0_size = False |
43 | 46 | self._write(data, buf) |
44 | 47 | |
45 | def read_data(self, rows, buf): | |
46 | self.nested_column = ArrayColumn(self.nested_column) | |
48 | def read_data(self, n_rows, buf): | |
49 | self.nested_column = ArrayColumn( | |
50 | self.nested_column, **self.init_kwargs | |
51 | ) | |
47 | 52 | self.nested_column.nullable = self.nullable |
48 | 53 | self.nullable = False |
49 | return self._read(rows, buf)[0] | |
54 | return self._read(n_rows, buf)[0] | |
50 | 55 | |
51 | 56 | def _write_sizes(self, value, buf): |
52 | 57 | nulls_map = [] |
105 | 110 | self._write_data(value, buf) |
106 | 111 | |
107 | 112 | def read_state_prefix(self, buf): |
108 | return self.nested_column.read_state_prefix(buf) | |
113 | super(ArrayColumn, self).read_state_prefix(buf) | |
114 | ||
115 | self.nested_column.read_state_prefix(buf) | |
109 | 116 | |
110 | 117 | def write_state_prefix(self, buf): |
118 | super(ArrayColumn, self).write_state_prefix(buf) | |
119 | ||
111 | 120 | self.nested_column.write_state_prefix(buf) |
112 | 121 | |
113 | 122 | def _read(self, size, buf): |
0 | 0 | from struct import Struct, error as struct_error |
1 | 1 | |
2 | 2 | from . import exceptions |
3 | from ..varint import read_varint | |
4 | ||
5 | ||
6 | class CommonSerialization(object): | |
7 | def __init__(self, column): | |
8 | self.column = column | |
9 | super(CommonSerialization, self).__init__() | |
10 | ||
11 | def read_sparse(self, n_items, buf): | |
12 | return n_items | |
13 | ||
14 | def apply_sparse(self, items): | |
15 | return items | |
16 | ||
17 | ||
18 | class SparseSerialization(CommonSerialization): | |
19 | ||
20 | def __init__(self, *args, **kwargs): | |
21 | self.sparse_indexes = [] | |
22 | self.items_total = None | |
23 | super(SparseSerialization, self).__init__(*args, **kwargs) | |
24 | ||
25 | def read_sparse(self, n_items, buf): | |
26 | sparse_indexes = [] | |
27 | items_total = 0 | |
28 | non_default_items = 0 | |
29 | ||
30 | END_OF_GRANULE_FLAG = 1 << 62 | |
31 | end_of_granule = False | |
32 | ||
33 | while not end_of_granule: | |
34 | group_size = read_varint(buf) | |
35 | end_of_granule = group_size & END_OF_GRANULE_FLAG | |
36 | group_size &= ~END_OF_GRANULE_FLAG | |
37 | ||
38 | items_total += group_size + 1 | |
39 | if not end_of_granule: | |
40 | non_default_items += 1 | |
41 | sparse_indexes.append(items_total) | |
42 | ||
43 | self.sparse_indexes = sparse_indexes | |
44 | self.items_total = items_total | |
45 | ||
46 | return non_default_items | |
47 | ||
48 | def apply_sparse(self, items): | |
49 | default = self.column.null_value | |
50 | if self.column.after_read_items: | |
51 | default = self.column.after_read_items([default])[0] | |
52 | ||
53 | rv = [default] * (self.items_total - 1) | |
54 | for item_number, i in enumerate(self.sparse_indexes): | |
55 | rv[i - 1] = items[item_number] | |
56 | ||
57 | return rv | |
3 | 58 | |
4 | 59 | |
5 | 60 | class Column(object): |
14 | 69 | |
15 | 70 | null_value = 0 |
16 | 71 | |
17 | def __init__(self, types_check=False, **kwargs): | |
72 | def __init__(self, types_check=False, has_custom_serialization=False, | |
73 | **kwargs): | |
18 | 74 | self.nullable = False |
19 | 75 | self.types_check_enabled = types_check |
76 | self.has_custom_serialization = has_custom_serialization | |
77 | self.serialization = CommonSerialization(self) | |
20 | 78 | self.input_null_as_default = False |
21 | if 'context' in kwargs: | |
22 | settings = kwargs['context'].client_settings | |
23 | self.input_null_as_default = settings \ | |
24 | .get('input_format_null_as_default', False) | |
79 | ||
80 | self.context = kwargs['context'] | |
81 | self.input_null_as_default = self.context.client_settings \ | |
82 | .get('input_format_null_as_default', False) | |
25 | 83 | |
26 | 84 | super(Column, self).__init__() |
27 | 85 | |
93 | 151 | raise NotImplementedError |
94 | 152 | |
95 | 153 | def read_data(self, n_items, buf): |
154 | n_items = self.serialization.read_sparse(n_items, buf) | |
155 | ||
96 | 156 | if self.nullable: |
97 | 157 | nulls_map = self._read_nulls_map(n_items, buf) |
98 | 158 | else: |
99 | 159 | nulls_map = None |
100 | 160 | |
101 | return self._read_data(n_items, buf, nulls_map=nulls_map) | |
161 | items = self._read_data(n_items, buf, nulls_map=nulls_map) | |
162 | return self.serialization.apply_sparse(items) | |
102 | 163 | |
103 | 164 | def _read_data(self, n_items, buf, nulls_map=None): |
104 | 165 | items = self.read_items(n_items, buf) |
116 | 177 | raise NotImplementedError |
117 | 178 | |
118 | 179 | def read_state_prefix(self, buf): |
119 | pass | |
180 | if self.has_custom_serialization: | |
181 | use_custom_serialization = read_varint(buf) | |
182 | if use_custom_serialization: | |
183 | self.serialization = SparseSerialization(self) | |
120 | 184 | |
121 | 185 | def write_state_prefix(self, buf): |
122 | 186 | pass |
0 | from .base import Column | |
1 | from .stringcolumn import String | |
2 | from ..reader import read_binary_uint8, read_binary_str | |
3 | from ..util.compat import json | |
4 | from ..writer import write_binary_uint8 | |
5 | ||
6 | ||
7 | class JsonColumn(Column): | |
8 | py_types = (dict, ) | |
9 | ||
10 | # No NULL value actually | |
11 | null_value = {} | |
12 | ||
13 | def __init__(self, column_by_spec_getter, **kwargs): | |
14 | self.column_by_spec_getter = column_by_spec_getter | |
15 | self.string_column = String(**kwargs) | |
16 | super(JsonColumn, self).__init__(**kwargs) | |
17 | ||
18 | def write_state_prefix(self, buf): | |
19 | # Read in binary format. | |
20 | # Write in text format. | |
21 | write_binary_uint8(1, buf) | |
22 | ||
23 | def read_items(self, n_items, buf): | |
24 | read_binary_uint8(buf) | |
25 | spec = read_binary_str(buf) | |
26 | col = self.column_by_spec_getter(spec) | |
27 | col.read_state_prefix(buf) | |
28 | return col.read_data(n_items, buf) | |
29 | ||
30 | def write_items(self, items, buf): | |
31 | items = [x if isinstance(x, str) else json.dumps(x) for x in items] | |
32 | self.string_column.write_items(items, buf) | |
33 | ||
34 | ||
35 | def create_json_column(spec, column_by_spec_getter, column_options): | |
36 | return JsonColumn(column_by_spec_getter, **column_options) |
34 | 34 | serialization_type = has_additional_keys_bit | need_update_dictionary |
35 | 35 | |
36 | 36 | def __init__(self, nested_column, **kwargs): |
37 | self.init_kwargs = kwargs | |
37 | 38 | self.nested_column = nested_column |
38 | 39 | super(LowCardinalityColumn, self).__init__(**kwargs) |
39 | 40 | |
40 | 41 | def read_state_prefix(self, buf): |
41 | return read_binary_uint64(buf) | |
42 | super(LowCardinalityColumn, self).read_state_prefix(buf) | |
43 | ||
44 | read_binary_uint64(buf) | |
42 | 45 | |
43 | 46 | def write_state_prefix(self, buf): |
47 | super(LowCardinalityColumn, self).write_state_prefix(buf) | |
48 | ||
44 | 49 | # KeysSerializationVersion. See ClickHouse docs. |
45 | 50 | write_binary_int64(1, buf) |
46 | 51 | |
47 | 52 | def _write_data(self, items, buf): |
48 | 53 | index, keys = [], [] |
49 | 54 | key_by_index_element = {} |
55 | nested_is_nullable = False | |
50 | 56 | |
51 | 57 | if self.nested_column.nullable: |
52 | 58 | # First element represents NULL if column is nullable. |
53 | 59 | index.append(self.nested_column.null_value) |
54 | 60 | # Prevent null map writing. Reset nested column nullable flag. |
55 | 61 | self.nested_column.nullable = False |
62 | nested_is_nullable = True | |
56 | 63 | |
57 | 64 | for x in items: |
58 | 65 | if x is None: |
86 | 93 | return |
87 | 94 | |
88 | 95 | int_type = int(log(len(index), 2) / 8) |
89 | int_column = self.int_types[int_type]() | |
96 | int_column = self.int_types[int_type](**self.init_kwargs) | |
90 | 97 | |
91 | 98 | serialization_type = self.serialization_type | int_type |
92 | 99 | |
93 | 100 | write_binary_int64(serialization_type, buf) |
94 | 101 | write_binary_int64(len(index), buf) |
95 | 102 | |
96 | self.nested_column.write_data(index, buf) | |
103 | if nested_is_nullable: | |
104 | # Given we reset nested column nullable flag above, | |
105 | # we need to write null map manually. If to invoke | |
106 | # write_data method, it will cause an exception, | |
107 | # because `prepare_data` may not be able to handle | |
108 | # null value correctly. | |
109 | self.nested_column.write_items( | |
110 | [self.nested_column.null_value], buf) | |
111 | # Remove null map from index, because it is already written. | |
112 | index_to_write = index[1:] | |
113 | self.nested_column.write_data(index_to_write, buf) | |
114 | else: | |
115 | self.nested_column.write_data(index, buf) | |
97 | 116 | write_binary_int64(len(items), buf) |
98 | 117 | int_column.write_items(keys, buf) |
99 | 118 | |
105 | 124 | |
106 | 125 | # Lowest byte contains info about key type. |
107 | 126 | key_type = serialization_type & 0xf |
108 | keys_column = self.int_types[key_type]() | |
127 | keys_column = self.int_types[key_type](**self.init_kwargs) | |
109 | 128 | |
110 | 129 | nullable = self.nested_column.nullable |
111 | 130 | # Prevent null map reading. Reset nested column nullable flag. |
12 | 12 | null_value = {} |
13 | 13 | |
14 | 14 | def __init__(self, key_column, value_column, **kwargs): |
15 | self.offset_column = UInt64Column() | |
15 | self.offset_column = UInt64Column(**kwargs) | |
16 | 16 | self.key_column = key_column |
17 | 17 | self.value_column = value_column |
18 | 18 | super(MapColumn, self).__init__(**kwargs) |
19 | 19 | |
20 | 20 | def read_state_prefix(self, buf): |
21 | super(MapColumn, self).read_state_prefix(buf) | |
22 | ||
21 | 23 | self.key_column.read_state_prefix(buf) |
22 | 24 | self.value_column.read_state_prefix(buf) |
23 | 25 | |
24 | 26 | def write_state_prefix(self, buf): |
27 | super(MapColumn, self).write_state_prefix(buf) | |
28 | ||
25 | 29 | self.key_column.write_state_prefix(buf) |
26 | 30 | self.value_column.write_state_prefix(buf) |
27 | 31 | |
56 | 60 | |
57 | 61 | |
58 | 62 | def create_map_column(spec, column_by_spec_getter, column_options): |
59 | # Match commas outside of parentheses so we don't match the comma in | |
63 | # Match commas outside of parentheses, so we don't match the comma in | |
60 | 64 | # Decimal types. |
61 | 65 | key, value = comma_re.split(spec[4:-1]) |
62 | 66 | key_column = column_by_spec_getter(key.strip()) |
0 | 0 | |
1 | 1 | from .arraycolumn import create_array_column |
2 | from .util import get_inner_spec, get_inner_columns, \ | |
3 | get_inner_columns_with_types | |
2 | from .util import get_inner_spec | |
4 | 3 | |
5 | 4 | |
6 | 5 | def create_nested_column(spec, column_by_spec_getter, column_options): |
7 | 6 | return create_array_column( |
8 | 'Array(Tuple({}))'.format(','.join(get_nested_columns(spec))), | |
7 | 'Array(Tuple({}))'.format(get_inner_spec('Nested', spec)), | |
9 | 8 | column_by_spec_getter, column_options |
10 | 9 | ) |
11 | ||
12 | ||
13 | def get_nested_columns(spec): | |
14 | inner_spec = get_inner_spec('Nested', spec) | |
15 | return get_inner_columns(inner_spec) | |
16 | ||
17 | ||
18 | def get_columns_with_types(spec): | |
19 | inner_spec = get_inner_spec('Nested', spec) | |
20 | return get_inner_columns_with_types(inner_spec) |
20 | 20 | def apply_timezones_after_read(self, dt): |
21 | 21 | timezone = self.timezone if self.timezone else self.local_timezone |
22 | 22 | |
23 | ts = pd.to_datetime(dt, utc=True).tz_convert(timezone) | |
23 | if self.offset_naive and timezone.zone != 'UTC': | |
24 | ts = pd.to_datetime(dt, utc=True).tz_convert(timezone) | |
25 | ts = ts.tz_localize(None) | |
26 | return ts.to_numpy(self.datetime_dtype) | |
24 | 27 | |
25 | if self.offset_naive: | |
26 | ts = ts.tz_localize(None) | |
27 | ||
28 | return ts.to_numpy(self.datetime_dtype) | |
28 | return dt | |
29 | 29 | |
30 | 30 | def apply_timezones_before_write(self, items): |
31 | 31 | if isinstance(items, pd.DatetimeIndex): |
64 | 64 | |
65 | 65 | |
66 | 66 | class NumpyDateTime64Column(NumpyDateTimeColumnBase): |
67 | dtype = np.dtype(np.uint64) | |
67 | dtype = np.dtype(np.int64) | |
68 | 68 | datetime_dtype = 'datetime64[ns]' |
69 | 69 | |
70 | 70 | max_scale = 9 |
74 | 74 | super(NumpyDateTime64Column, self).__init__(**kwargs) |
75 | 75 | |
76 | 76 | def read_items(self, n_items, buf): |
77 | scale = 10 ** self.scale | |
78 | frac_scale = 10 ** (self.max_scale - self.scale) | |
79 | ||
77 | # Clickhouse: t seconds is represented as t * 10^scale. | |
78 | # datetime64[ns]: t seconds is represented as t * 10^9. | |
79 | # Since 0 <= scale <= 9, multiply by the integer 10^(9 - scale). | |
80 | 80 | items = super(NumpyDateTime64Column, self).read_items(n_items, buf) |
81 | 81 | |
82 | seconds = (items // scale).astype('datetime64[s]') | |
83 | microseconds = ((items % scale) * frac_scale).astype('timedelta64[ns]') | |
82 | tmp = np.copy(items) | |
83 | tmp *= 10 ** (9 - self.scale) | |
84 | dt = tmp.view(dtype='datetime64[ns]') | |
84 | 85 | |
85 | dt = seconds + microseconds | |
86 | 86 | return self.apply_timezones_after_read(dt) |
87 | 87 | |
88 | 88 | def write_items(self, items, buf): |
119 | 119 | |
120 | 120 | context = column_options['context'] |
121 | 121 | |
122 | tz_name = timezone = None | |
122 | tz_name = None | |
123 | 123 | offset_naive = True |
124 | 124 | |
125 | 125 | # As Numpy do not use local timezone for converting timestamp to |
126 | 126 | # datetime we need always detect local timezone for manual converting. |
127 | local_timezone = get_localzone_name_compat() | |
127 | local_tz_name = get_localzone_name_compat() | |
128 | 128 | |
129 | 129 | # Use column's timezone if it's specified. |
130 | 130 | if spec and spec[-1] == ')': |
132 | 132 | offset_naive = False |
133 | 133 | else: |
134 | 134 | if not context.settings.get('use_client_time_zone', False): |
135 | if local_timezone != context.server_info.timezone: | |
135 | if local_tz_name != context.server_info.timezone: | |
136 | 136 | tz_name = context.server_info.timezone |
137 | 137 | |
138 | if tz_name: | |
139 | timezone = get_timezone(tz_name) | |
138 | timezone = get_timezone(tz_name) if tz_name else None | |
139 | local_timezone = get_timezone(local_tz_name) if local_tz_name else None | |
140 | 140 | |
141 | 141 | return cls(timezone=timezone, offset_naive=offset_naive, |
142 | 142 | local_timezone=local_timezone, **column_options) |
36 | 36 | c = pd.Categorical(items) |
37 | 37 | |
38 | 38 | int_type = int(log(len(c.codes), 2) / 8) |
39 | int_column = self.int_types[int_type]() | |
39 | int_column = self.int_types[int_type](**self.init_kwargs) | |
40 | 40 | |
41 | 41 | serialization_type = self.serialization_type | int_type |
42 | 42 | |
65 | 65 | |
66 | 66 | # Lowest byte contains info about key type. |
67 | 67 | key_type = serialization_type & 0xf |
68 | keys_column = self.int_types[key_type]() | |
68 | keys_column = self.int_types[key_type](**self.init_kwargs) | |
69 | 69 | |
70 | 70 | nullable = self.nested_column.nullable |
71 | 71 | # Prevent null map reading. Reset nested column nullable flag. |
14 | 14 | UInt8Column, UInt16Column, UInt32Column, UInt64Column |
15 | 15 | ) |
16 | 16 | from .lowcardinalitycolumn import create_low_cardinality_column |
17 | from .jsoncolumn import create_json_column | |
17 | 18 | from .mapcolumn import create_map_column |
18 | 19 | from .nothingcolumn import NothingColumn |
19 | 20 | from .nullcolumn import NullColumn |
121 | 122 | spec, create_column_with_options, column_options |
122 | 123 | ) |
123 | 124 | |
125 | elif spec.startswith("Object('json')"): | |
126 | return create_json_column( | |
127 | spec, create_column_with_options, column_options | |
128 | ) | |
129 | ||
124 | 130 | else: |
125 | 131 | for alias, primitive in aliases: |
126 | 132 | if spec.startswith(alias): |
136 | 142 | raise errors.UnknownTypeError('Unknown type {}'.format(spec)) |
137 | 143 | |
138 | 144 | |
139 | def read_column(context, column_spec, n_items, buf, use_numpy=None): | |
140 | column_options = {'context': context} | |
145 | def read_column(context, column_spec, n_items, buf, use_numpy=None, | |
146 | has_custom_serialization=False): | |
147 | column_options = { | |
148 | 'context': context, | |
149 | 'has_custom_serialization': has_custom_serialization | |
150 | } | |
141 | 151 | col = get_column_by_spec(column_spec, column_options, use_numpy=use_numpy) |
142 | 152 | col.read_state_prefix(buf) |
143 | 153 | return col.read_data(n_items, buf) |
0 | 0 | |
1 | 1 | from .base import Column |
2 | from .util import get_inner_spec, get_inner_columns | |
2 | from .util import get_inner_spec, get_inner_columns_with_types | |
3 | 3 | |
4 | 4 | |
5 | 5 | class TupleColumn(Column): |
6 | 6 | py_types = (list, tuple) |
7 | 7 | |
8 | def __init__(self, nested_columns, **kwargs): | |
8 | def __init__(self, names, nested_columns, **kwargs): | |
9 | self.names = names | |
9 | 10 | self.nested_columns = nested_columns |
11 | client_settings = kwargs['context'].client_settings | |
12 | settings = kwargs['context'].settings | |
13 | self.namedtuple_as_json = ( | |
14 | settings.get('allow_experimental_object_type', False) and | |
15 | client_settings.get('namedtuple_as_json', True) | |
16 | ) | |
17 | ||
10 | 18 | super(TupleColumn, self).__init__(**kwargs) |
11 | 19 | self.null_value = tuple(x.null_value for x in nested_columns) |
12 | 20 | |
22 | 30 | |
23 | 31 | def read_data(self, n_items, buf): |
24 | 32 | rv = [x.read_data(n_items, buf) for x in self.nested_columns] |
25 | return list(zip(*rv)) | |
33 | rv = list(zip(*rv)) | |
34 | ||
35 | if self.names[0] and self.namedtuple_as_json: | |
36 | return [dict(zip(self.names, x)) for x in rv] | |
37 | else: | |
38 | return rv | |
26 | 39 | |
27 | 40 | def read_items(self, n_items, buf): |
28 | 41 | return self.read_data(n_items, buf) |
29 | 42 | |
43 | def read_state_prefix(self, buf): | |
44 | super(TupleColumn, self).read_state_prefix(buf) | |
45 | ||
46 | for x in self.nested_columns: | |
47 | x.read_state_prefix(buf) | |
48 | ||
49 | def write_state_prefix(self, buf): | |
50 | super(TupleColumn, self).write_state_prefix(buf) | |
51 | ||
52 | for x in self.nested_columns: | |
53 | x.write_state_prefix(buf) | |
54 | ||
30 | 55 | |
31 | 56 | def create_tuple_column(spec, column_by_spec_getter, column_options): |
32 | 57 | inner_spec = get_inner_spec('Tuple', spec) |
33 | columns = get_inner_columns(inner_spec) | |
58 | columns_with_types = get_inner_columns_with_types(inner_spec) | |
59 | names, types = zip(*columns_with_types) | |
34 | 60 | |
35 | return TupleColumn([column_by_spec_getter(x) for x in columns], | |
61 | return TupleColumn(names, [column_by_spec_getter(x) for x in types], | |
36 | 62 | **column_options) |
2 | 2 | import ssl |
3 | 3 | from collections import deque |
4 | 4 | from contextlib import contextmanager |
5 | from sys import platform | |
5 | 6 | from time import time |
6 | 7 | from urllib.parse import urlparse |
7 | 8 | |
20 | 21 | from .queryprocessingstage import QueryProcessingStage |
21 | 22 | from .reader import read_binary_str |
22 | 23 | from .readhelpers import read_exception |
23 | from .settings.writer import write_settings | |
24 | from .settings.writer import write_settings, SettingsFlags | |
24 | 25 | from .streams.native import BlockInputStream, BlockOutputStream |
25 | 26 | from .util.compat import threading |
27 | from .util.escape import escape_params | |
26 | 28 | from .varint import write_varint, read_varint |
27 | 29 | from .writer import write_binary_str |
28 | 30 | |
43 | 45 | |
44 | 46 | class ServerInfo(object): |
45 | 47 | def __init__(self, name, version_major, version_minor, version_patch, |
46 | revision, timezone, display_name): | |
48 | revision, timezone, display_name, used_revision): | |
47 | 49 | self.name = name |
48 | 50 | self.version_major = version_major |
49 | 51 | self.version_minor = version_minor |
51 | 53 | self.revision = revision |
52 | 54 | self.timezone = timezone |
53 | 55 | self.display_name = display_name |
56 | self.used_revision = used_revision | |
54 | 57 | |
55 | 58 | super(ServerInfo, self).__init__() |
56 | 59 | |
65 | 68 | ('name', self.name), |
66 | 69 | ('version', version), |
67 | 70 | ('revision', self.revision), |
71 | ('used revision', self.used_revision), | |
68 | 72 | ('timezone', self.timezone), |
69 | 73 | ('display_name', self.display_name) |
70 | 74 | ] |
123 | 127 | ignored, ``True`` means that the query will |
124 | 128 | fail with UNKNOWN_SETTING error. |
125 | 129 | Defaults to ``False``. |
130 | :param tcp_keepalive: enables `TCP keepalive <https://tldp.org/HOWTO/ | |
131 | TCP-Keepalive-HOWTO/overview.html>`_ on established | |
132 | connection. If is set to ``True``` system keepalive | |
133 | settings are used. You can also specify custom | |
134 | keepalive setting with tuple: | |
135 | ``(idle_time_sec, interval_sec, probes)``. | |
136 | Defaults to ``False``. | |
137 | :param client_revision: can be used for client version downgrading. | |
138 | Defaults to ``None``. | |
126 | 139 | """ |
127 | 140 | |
128 | 141 | def __init__( |
142 | 155 | server_hostname=None, |
143 | 156 | alt_hosts=None, |
144 | 157 | settings_is_important=False, |
158 | tcp_keepalive=False, | |
159 | client_revision=None | |
145 | 160 | ): |
146 | 161 | if secure: |
147 | 162 | default_port = defines.DEFAULT_SECURE_PORT |
163 | 178 | self.send_receive_timeout = send_receive_timeout |
164 | 179 | self.sync_request_timeout = sync_request_timeout |
165 | 180 | self.settings_is_important = settings_is_important |
181 | self.tcp_keepalive = tcp_keepalive | |
182 | self.client_revision = min( | |
183 | client_revision or defines.CLIENT_REVISION, defines.CLIENT_REVISION | |
184 | ) | |
166 | 185 | |
167 | 186 | self.secure_socket = secure |
168 | 187 | self.verify_cert = verify |
309 | 328 | |
310 | 329 | # performance tweak |
311 | 330 | self.socket.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) |
331 | if self.tcp_keepalive: | |
332 | self._set_keepalive() | |
312 | 333 | |
313 | 334 | self.fin = BufferedSocketReader(self.socket, defines.BUFFER_SIZE) |
314 | 335 | self.fout = BufferedSocketWriter(self.socket, defines.BUFFER_SIZE) |
315 | 336 | |
316 | 337 | self.send_hello() |
317 | 338 | self.receive_hello() |
339 | ||
340 | revision = self.server_info.used_revision | |
341 | if revision >= defines.DBMS_MIN_PROTOCOL_VERSION_WITH_ADDENDUM: | |
342 | self.send_addendum() | |
318 | 343 | |
319 | 344 | self.block_in = self.get_block_in_stream() |
320 | 345 | self.block_in_raw = BlockInputStream(self.fin, self.context) |
321 | 346 | self.block_out = self.get_block_out_stream() |
347 | ||
348 | def _set_keepalive(self): | |
349 | self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1) | |
350 | ||
351 | if not isinstance(self.tcp_keepalive, tuple): | |
352 | return | |
353 | ||
354 | idle_time_sec, interval_sec, probes = self.tcp_keepalive | |
355 | ||
356 | if platform == 'linux' or platform == 'win32': | |
357 | # This should also work for Windows | |
358 | # starting with Windows 10, version 1709. | |
359 | self.socket.setsockopt( | |
360 | socket.IPPROTO_TCP, socket.TCP_KEEPIDLE, idle_time_sec | |
361 | ) | |
362 | self.socket.setsockopt( | |
363 | socket.IPPROTO_TCP, socket.TCP_KEEPINTVL, interval_sec | |
364 | ) | |
365 | self.socket.setsockopt( | |
366 | socket.IPPROTO_TCP, socket.TCP_KEEPCNT, probes | |
367 | ) | |
368 | ||
369 | elif platform == 'darwin': | |
370 | TCP_KEEPALIVE = 0x10 | |
371 | # Only interval is available in mac os. | |
372 | self.socket.setsockopt( | |
373 | socket.IPPROTO_TCP, TCP_KEEPALIVE, interval_sec | |
374 | ) | |
322 | 375 | |
323 | 376 | def _format_connection_error(self, e, host, port): |
324 | 377 | err = (e.strerror + ' ') if e.strerror else '' |
409 | 462 | write_varint(defines.CLIENT_VERSION_MINOR, self.fout) |
410 | 463 | # NOTE For backward compatibility of the protocol, |
411 | 464 | # client cannot send its version_patch. |
412 | write_varint(defines.CLIENT_REVISION, self.fout) | |
465 | write_varint(self.client_revision, self.fout) | |
413 | 466 | write_binary_str(self.database, self.fout) |
414 | 467 | write_binary_str(self.user, self.fout) |
415 | 468 | write_binary_str(self.password, self.fout) |
425 | 478 | server_version_minor = read_varint(self.fin) |
426 | 479 | server_revision = read_varint(self.fin) |
427 | 480 | |
481 | used_revision = min(self.client_revision, server_revision) | |
482 | ||
428 | 483 | server_timezone = None |
429 | if server_revision >= \ | |
484 | if used_revision >= \ | |
430 | 485 | defines.DBMS_MIN_REVISION_WITH_SERVER_TIMEZONE: |
431 | 486 | server_timezone = read_binary_str(self.fin) |
432 | 487 | |
433 | 488 | server_display_name = '' |
434 | if server_revision >= \ | |
489 | if used_revision >= \ | |
435 | 490 | defines.DBMS_MIN_REVISION_WITH_SERVER_DISPLAY_NAME: |
436 | 491 | server_display_name = read_binary_str(self.fin) |
437 | 492 | |
438 | 493 | server_version_patch = server_revision |
439 | if server_revision >= \ | |
494 | if used_revision >= \ | |
440 | 495 | defines.DBMS_MIN_REVISION_WITH_VERSION_PATCH: |
441 | 496 | server_version_patch = read_varint(self.fin) |
442 | 497 | |
443 | 498 | self.server_info = ServerInfo( |
444 | 499 | server_name, server_version_major, server_version_minor, |
445 | 500 | server_version_patch, server_revision, |
446 | server_timezone, server_display_name | |
501 | server_timezone, server_display_name, used_revision | |
447 | 502 | ) |
448 | 503 | self.context.server_info = self.server_info |
449 | 504 | |
461 | 516 | packet_type) |
462 | 517 | self.disconnect() |
463 | 518 | raise errors.UnexpectedPacketFromServerError(message) |
519 | ||
520 | def send_addendum(self): | |
521 | revision = self.server_info.used_revision | |
522 | ||
523 | if revision >= defines.DBMS_MIN_PROTOCOL_VERSION_WITH_QUOTA_KEY: | |
524 | write_binary_str( | |
525 | self.context.client_settings['quota_key'], self.fout | |
526 | ) | |
464 | 527 | |
465 | 528 | def ping(self): |
466 | 529 | timeout = self.sync_request_timeout |
566 | 629 | return BlockOutputStream(self.fout, self.context) |
567 | 630 | |
568 | 631 | def receive_data(self, may_be_compressed=True, may_be_use_numpy=False): |
569 | revision = self.server_info.revision | |
632 | revision = self.server_info.used_revision | |
570 | 633 | |
571 | 634 | if revision >= defines.DBMS_MIN_REVISION_WITH_TEMPORARY_TABLES: |
572 | 635 | read_binary_str(self.fin) |
580 | 643 | |
581 | 644 | def receive_progress(self): |
582 | 645 | progress = Progress() |
583 | progress.read(self.server_info.revision, self.fin) | |
646 | progress.read(self.server_info, self.fin) | |
584 | 647 | return progress |
585 | 648 | |
586 | 649 | def receive_profile_info(self): |
596 | 659 | start = time() |
597 | 660 | write_varint(ClientPacketTypes.DATA, self.fout) |
598 | 661 | |
599 | revision = self.server_info.revision | |
662 | revision = self.server_info.used_revision | |
600 | 663 | if revision >= defines.DBMS_MIN_REVISION_WITH_TEMPORARY_TABLES: |
601 | 664 | write_binary_str(table_name, self.fout) |
602 | 665 | |
603 | 666 | self.block_out.write(block) |
604 | 667 | logger.debug('Block "%s" send time: %f', table_name, time() - start) |
605 | 668 | |
606 | def send_query(self, query, query_id=None): | |
669 | def send_query(self, query, query_id=None, params=None): | |
607 | 670 | if not self.connected: |
608 | 671 | self.connect() |
609 | 672 | |
611 | 674 | |
612 | 675 | write_binary_str(query_id or '', self.fout) |
613 | 676 | |
614 | revision = self.server_info.revision | |
677 | revision = self.server_info.used_revision | |
615 | 678 | if revision >= defines.DBMS_MIN_REVISION_WITH_CLIENT_INFO: |
616 | client_info = ClientInfo(self.client_name, self.context) | |
679 | client_info = ClientInfo(self.client_name, self.context, | |
680 | client_revision=self.client_revision) | |
617 | 681 | client_info.query_kind = ClientInfo.QueryKind.INITIAL_QUERY |
618 | 682 | |
619 | 683 | client_info.write(revision, self.fout) |
622 | 686 | revision >= defines |
623 | 687 | .DBMS_MIN_REVISION_WITH_SETTINGS_SERIALIZED_AS_STRINGS |
624 | 688 | ) |
689 | settings_flags = 0 | |
690 | if self.settings_is_important: | |
691 | settings_flags |= SettingsFlags.IMPORTANT | |
625 | 692 | write_settings(self.context.settings, self.fout, settings_as_strings, |
626 | self.settings_is_important) | |
693 | settings_flags) | |
627 | 694 | |
628 | 695 | if revision >= defines.DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET: |
629 | 696 | write_binary_str('', self.fout) |
632 | 699 | write_varint(self.compression, self.fout) |
633 | 700 | |
634 | 701 | write_binary_str(query, self.fout) |
702 | ||
703 | if revision >= defines.DBMS_MIN_PROTOCOL_VERSION_WITH_PARAMETERS: | |
704 | # Always settings_as_strings = True | |
705 | escaped = escape_params( | |
706 | params or {}, self.context, for_server=True | |
707 | ) | |
708 | write_settings(escaped, self.fout, True, SettingsFlags.CUSTOM) | |
635 | 709 | |
636 | 710 | logger.debug('Query: %s', query) |
637 | 711 |
24 | 24 | DBMS_MIN_PROTOCOL_VERSION_WITH_INITIAL_QUERY_START_TIME = 54449 |
25 | 25 | DBMS_MIN_PROTOCOL_VERSION_WITH_INCREMENTAL_PROFILE_EVENTS = 54451 |
26 | 26 | DBMS_MIN_REVISION_WITH_PARALLEL_REPLICAS = 54453 |
27 | DBMS_MIN_REVISION_WITH_CUSTOM_SERIALIZATION = 54454 | |
28 | DBMS_MIN_PROTOCOL_VERSION_WITH_ADDENDUM = 54458 | |
29 | DBMS_MIN_PROTOCOL_VERSION_WITH_QUOTA_KEY = 54458 | |
30 | DBMS_MIN_PROTOCOL_VERSION_WITH_PARAMETERS = 54459 | |
27 | 31 | |
28 | 32 | # Timeouts |
29 | 33 | DBMS_DEFAULT_CONNECT_TIMEOUT_SEC = 10 |
39 | 43 | CLIENT_VERSION_MAJOR = 20 |
40 | 44 | CLIENT_VERSION_MINOR = 10 |
41 | 45 | CLIENT_VERSION_PATCH = 2 |
42 | CLIENT_REVISION = 54453 | |
46 | CLIENT_REVISION = DBMS_MIN_PROTOCOL_VERSION_WITH_PARAMETERS | |
43 | 47 | |
44 | 48 | BUFFER_SIZE = 1048576 |
45 | 49 |
11 | 11 | |
12 | 12 | super(Progress, self).__init__() |
13 | 13 | |
14 | def read(self, server_revision, fin): | |
14 | def read(self, server_info, fin): | |
15 | 15 | self.rows = read_varint(fin) |
16 | 16 | self.bytes = read_varint(fin) |
17 | 17 | |
18 | revision = server_revision | |
18 | revision = server_info.used_revision | |
19 | 19 | if revision >= defines.DBMS_MIN_REVISION_WITH_TOTAL_ROWS_IN_PROGRESS: |
20 | 20 | self.total_rows = read_varint(fin) |
21 | 21 |
6 | 6 | logger = logging.getLogger(__name__) |
7 | 7 | |
8 | 8 | |
9 | def write_settings(settings, buf, settings_as_strings, is_important=False): | |
9 | class SettingsFlags: | |
10 | IMPORTANT = 0x1 | |
11 | CUSTOM = 0x2 | |
12 | ||
13 | ||
14 | def write_settings(settings, buf, settings_as_strings, flags): | |
10 | 15 | for setting, value in (settings or {}).items(): |
11 | 16 | # If the server support settings as string we do not need to know |
12 | 17 | # anything about them, so we can write any setting. |
13 | 18 | if settings_as_strings: |
14 | 19 | write_binary_str(setting, buf) |
15 | write_binary_uint8(int(is_important), buf) | |
20 | write_binary_uint8(flags, buf) | |
16 | 21 | write_binary_str(str(value), buf) |
17 | 22 | |
18 | 23 | else: |
0 | 0 | from ..block import ColumnOrientedBlock, BlockInfo |
1 | 1 | from ..columns.service import read_column, write_column |
2 | from ..reader import read_binary_str | |
2 | from ..reader import read_binary_str, read_binary_uint8 | |
3 | 3 | from ..varint import write_varint, read_varint |
4 | from ..writer import write_binary_str | |
4 | from ..writer import write_binary_str, write_binary_uint8 | |
5 | 5 | from .. import defines |
6 | 6 | |
7 | 7 | |
13 | 13 | super(BlockOutputStream, self).__init__() |
14 | 14 | |
15 | 15 | def write(self, block): |
16 | revision = self.context.server_info.revision | |
16 | revision = self.context.server_info.used_revision | |
17 | 17 | if revision >= defines.DBMS_MIN_REVISION_WITH_BLOCK_INFO: |
18 | 18 | block.info.write(self.fout) |
19 | 19 | |
34 | 34 | except IndexError: |
35 | 35 | raise ValueError('Different rows length') |
36 | 36 | |
37 | if revision >= \ | |
38 | defines.DBMS_MIN_REVISION_WITH_CUSTOM_SERIALIZATION: | |
39 | # We write always sparse data without custom serialization. | |
40 | write_binary_uint8(0, self.fout) | |
41 | ||
37 | 42 | write_column(self.context, col_name, col_type, items, |
38 | 43 | self.fout, types_check=block.types_check) |
39 | 44 | |
53 | 58 | def read(self, use_numpy=None): |
54 | 59 | info = BlockInfo() |
55 | 60 | |
56 | revision = self.context.server_info.revision | |
61 | revision = self.context.server_info.used_revision | |
57 | 62 | if revision >= defines.DBMS_MIN_REVISION_WITH_BLOCK_INFO: |
58 | 63 | info.read(self.fin) |
59 | 64 | |
69 | 74 | names.append(column_name) |
70 | 75 | types.append(column_type) |
71 | 76 | |
77 | has_custom_serialization = False | |
78 | if revision >= defines.DBMS_MIN_REVISION_WITH_CUSTOM_SERIALIZATION: | |
79 | has_custom_serialization = bool(read_binary_uint8(self.fin)) | |
80 | ||
72 | 81 | if n_rows: |
73 | column = read_column(self.context, column_type, n_rows, | |
74 | self.fin, use_numpy=use_numpy) | |
82 | column = read_column( | |
83 | self.context, column_type, n_rows, | |
84 | self.fin, use_numpy=use_numpy, | |
85 | has_custom_serialization=has_custom_serialization | |
86 | ) | |
75 | 87 | data.append(column) |
76 | 88 | |
77 | 89 | if self.context.client_settings['use_numpy']: |
3 | 3 | import threading |
4 | 4 | except ImportError: |
5 | 5 | import dummy_threading as threading # noqa: F401 |
6 | ||
7 | import json # noqa: F401 | |
8 | try: | |
9 | import orjson as json # noqa: F811 | |
10 | except ImportError: | |
11 | pass | |
12 | ||
13 | try: | |
14 | import ujson as json # noqa: F811,F401 | |
15 | except ImportError: | |
16 | pass | |
17 | ||
6 | 18 | |
7 | 19 | try: |
8 | 20 | # since tzlocal 4.0+ |
0 | from datetime import date, datetime | |
0 | from datetime import date, datetime, time | |
1 | 1 | from enum import Enum |
2 | from functools import wraps | |
2 | 3 | from uuid import UUID |
3 | 4 | |
4 | 5 | from pytz import timezone |
27 | 28 | return "'%s'" % item.strftime('%Y-%m-%d %H:%M:%S') |
28 | 29 | |
29 | 30 | |
30 | def escape_param(item, context): | |
31 | def maybe_enquote_for_server(f): | |
32 | @wraps(f) | |
33 | def wrapper(*args, **kwargs): | |
34 | rv = f(*args, **kwargs) | |
35 | ||
36 | if kwargs.get('for_server'): | |
37 | is_str = isinstance(rv, str) | |
38 | ||
39 | if not is_str or (is_str and not rv.startswith("'")): | |
40 | rv = "'%s'" % rv | |
41 | ||
42 | return rv | |
43 | ||
44 | return wrapper | |
45 | ||
46 | ||
47 | @maybe_enquote_for_server | |
48 | def escape_param(item, context, for_server=False): | |
31 | 49 | if item is None: |
32 | 50 | return 'NULL' |
33 | 51 | |
37 | 55 | elif isinstance(item, date): |
38 | 56 | return "'%s'" % item.strftime('%Y-%m-%d') |
39 | 57 | |
58 | elif isinstance(item, time): | |
59 | return "'%s'" % item.strftime('%H:%M:%S') | |
60 | ||
40 | 61 | elif isinstance(item, str): |
62 | # We need double escaping for server-side parameters. | |
63 | if for_server: | |
64 | item = ''.join(escape_chars_map.get(c, c) for c in item) | |
41 | 65 | return "'%s'" % ''.join(escape_chars_map.get(c, c) for c in item) |
42 | 66 | |
43 | 67 | elif isinstance(item, list): |
44 | return "[%s]" % ', '.join(str(escape_param(x, context)) for x in item) | |
68 | return "[%s]" % ', '.join( | |
69 | str(escape_param(x, context, for_server=for_server)) for x in item | |
70 | ) | |
45 | 71 | |
46 | 72 | elif isinstance(item, tuple): |
47 | return "(%s)" % ', '.join(str(escape_param(x, context)) for x in item) | |
73 | return "(%s)" % ', '.join( | |
74 | str(escape_param(x, context, for_server=for_server)) for x in item | |
75 | ) | |
48 | 76 | |
49 | 77 | elif isinstance(item, Enum): |
50 | return escape_param(item.value, context) | |
78 | return escape_param(item.value, context, for_server=for_server) | |
51 | 79 | |
52 | 80 | elif isinstance(item, UUID): |
53 | 81 | return "'%s'" % str(item) |
56 | 84 | return item |
57 | 85 | |
58 | 86 | |
59 | def escape_params(params, context): | |
87 | def escape_params(params, context, for_server=False): | |
60 | 88 | escaped = {} |
61 | 89 | |
62 | 90 | for key, value in params.items(): |
63 | escaped[key] = escape_param(value, context) | |
91 | escaped[key] = escape_param(value, context, for_server=for_server) | |
64 | 92 | |
65 | 93 | return escaped |
1 | 1 | # |
2 | 2 | |
3 | 3 | # You can set these variables from the command line. |
4 | SPHINXOPTS = | |
4 | SPHINXOPTS = -W | |
5 | 5 | SPHINXBUILD = sphinx-build |
6 | 6 | SOURCEDIR = . |
7 | 7 | BUILDDIR = _build |
65 | 65 | # |
66 | 66 | # This is also used if you do content translation via gettext catalogs. |
67 | 67 | # Usually you set "language" from the command line for these cases. |
68 | language = None | |
68 | language = 'en' | |
69 | 69 | |
70 | 70 | # List of patterns, relative to source directory, that match files and |
71 | 71 | # directories to ignore when looking for source files. |
44 | 44 | |
45 | 45 | .. code-block:: bash |
46 | 46 | |
47 | docker run --rm -e "TZ=Europe/Moscow" -p 127.0.0.1:9000:9000 --name test-clickhouse-server yandex/clickhouse-server:$VERSION | |
47 | docker run --rm -e "TZ=Europe/Moscow" -p 127.0.0.1:9000:9000 --name test-clickhouse-server clickhouse/clickhouse-server:$VERSION | |
48 | 48 | |
49 | 49 | Create container with the same version of ``clickhouse-client``: |
50 | 50 | |
51 | 51 | .. code-block:: bash |
52 | 52 | |
53 | docker run --rm --entrypoint "/bin/sh" --name test-clickhouse-client --link test-clickhouse-server:clickhouse-server yandex/clickhouse-client:$VERSION -c 'while :; do sleep 1; done' | |
53 | docker run --rm --entrypoint "/bin/sh" --name test-clickhouse-client --link test-clickhouse-server:clickhouse-server clickhouse/clickhouse-client:$VERSION -c 'while :; do sleep 1; done' | |
54 | 54 | |
55 | 55 | Create ``clickhouse-client`` script on your host machine: |
56 | 56 |
278 | 278 | |
279 | 279 | All queries within established connection will be sent to the same host. |
280 | 280 | |
281 | *New in version 0.2.5.* | |
282 | ||
281 | 283 | You can specify `round_robin` parameter alongside with `alt_hosts`. The host |
282 | 284 | for query execution will be picked with round-robin algorithm. |
283 | 285 | |
543 | 545 | |
544 | 546 | Upon exit, any established connection to the ClickHouse server will be closed |
545 | 547 | automatically. |
548 | ||
549 | ||
550 | TCP keepalive | |
551 | ------------- | |
552 | ||
553 | *New in version 0.2.6.* | |
554 | ||
555 | You can enable `TCP keepalive | |
556 | <https://tldp.org/HOWTO/TCP-Keepalive-HOWTO/overview.html>`_ on connection with | |
557 | ClickHouse server. This setting is disabled by default. When parameter | |
558 | ``tcp_keepalive`` is set to ``True`` system TCP keepalive settings are used. | |
559 | ||
560 | .. code-block:: python | |
561 | ||
562 | >>> client = Client('localhost', tcp_keepalive=True) | |
563 | ||
564 | ||
565 | For Linux default TCP keepalive settings can be found in: | |
566 | ||
567 | .. code-block:: bash | |
568 | ||
569 | idle_time_sec - /proc/sys/net/ipv4/tcp_keepalive_time | |
570 | interval_sec - /proc/sys/net/ipv4/tcp_keepalive_intvl | |
571 | probes - /proc/sys/net/ipv4/tcp_keepalive_probes | |
572 | ||
573 | You can also specify custom keepalive settings with tuple | |
574 | ``(idle_time_sec, interval_sec, probes)``: | |
575 | ||
576 | .. code-block:: python | |
577 | ||
578 | >>> client = Client('localhost', tcp_keepalive=(60.5, 5.1, 2)) | |
579 | ||
580 | .. note:: | |
581 | ||
582 | For Linux and Windows all parameters: idle time, interval and probes | |
583 | can be changed for socket. | |
584 | ||
585 | For Mac OS only the second parameter ``interval_sec`` can be changed | |
586 | for socket. ``idle_time_sec``, ``probes`` are not used, but should be | |
587 | specified for uniformity. |
5 | 5 | Python Version |
6 | 6 | -------------- |
7 | 7 | |
8 | Clickhouse-driver supports Python 3.4 and newer and PyPy. | |
8 | Clickhouse-driver supports Python 3.7 and newer and PyPy. | |
9 | 9 | |
10 | 10 | Build Dependencies |
11 | 11 | ------------------ |
20 | 20 | |
21 | 21 | By default there are wheels for Linux, Mac OS X and Windows. |
22 | 22 | |
23 | Packages for Linux and Mac OS X are available for python: 3.6 -- 3.11. | |
23 | Packages for Linux and Mac OS X are available for python: 3.7 -- 3.11. | |
24 | 24 | |
25 | Packages for Windows are available for python: 3.6 -- 3.11. | |
25 | Packages for Windows are available for python: 3.7 -- 3.11. | |
26 | 26 | |
27 | 27 | Starting from version *0.2.3* there are wheels for musl-based Linux distributions. |
28 | 28 |
138 | 138 | |
139 | 139 | >>> settings = {'input_format_null_as_default': True} |
140 | 140 | >>> client = Client('localhost', settings=settings) |
141 | ||
142 | ||
143 | Client revision downgrading | |
144 | --------------------------- | |
145 | ||
146 | *New in version 0.2.6.* | |
147 | ||
148 | For various purposes client can be downgraded with ``client_revision`` | |
149 | parameter. | |
150 | ||
151 | .. code-block:: python | |
152 | ||
153 | >>> from clickhouse_driver import Client, defines | |
154 | >>> | |
155 | >>> client = Client('localhost', client_revision=defines.DBMS_MIN_PROTOCOL_VERSION_WITH_INITIAL_QUERY_START_TIME) | |
156 | >>> client.execute('SELECT version()') |
163 | 163 | |
164 | 164 | .. code-block:: bash |
165 | 165 | |
166 | pip install clickhouse-driver requests | |
166 | pip install clickhouse-driver requests clickhouse-connect | |
167 | 167 | |
168 | 168 | For fast json parsing we'll use ``ujson`` package: |
169 | 169 | |
174 | 174 | Installed packages: :: |
175 | 175 | |
176 | 176 | $ pip freeze |
177 | certifi==2020.4.5.1 | |
178 | chardet==3.0.4 | |
179 | clickhouse-driver==0.1.3 | |
180 | idna==2.9 | |
181 | pytz==2019.3 | |
182 | requests==2.23.0 | |
183 | tzlocal==2.0.0 | |
184 | ujson==2.0.3 | |
185 | urllib3==1.25.9 | |
177 | backports.zoneinfo==0.2.1 | |
178 | certifi==2022.12.7 | |
179 | charset-normalizer==3.0.1 | |
180 | clickhouse-connect==0.5.0 | |
181 | clickhouse-driver==0.2.5 | |
182 | idna==3.4 | |
183 | lz4==4.3.2 | |
184 | pytz==2022.7.1 | |
185 | pytz-deprecation-shim==0.1.0.post0 | |
186 | requests==2.28.2 | |
187 | tzdata==2022.7 | |
188 | tzlocal==4.2 | |
189 | ujson==5.7.0 | |
190 | urllib3==1.26.14 | |
191 | zstandard==0.19.0 | |
192 | ||
193 | For ``clickhouse-connect`` we need to turn off compression with | |
194 | ``compress=False`` for elimination decompression overhead. This package also | |
195 | adds ``LIMIT`` clause to the query by default. | |
196 | Let's disable it off with ``query_limit=None``. | |
186 | 197 | |
187 | 198 | Versions |
188 | 199 | -------- |
189 | 200 | |
190 | Machine: Linux ThinkPad-T460 4.4.0-177-generic #207-Ubuntu SMP Mon Mar 16 01:16:10 UTC 2020 x86_64 x86_64 x86_64 GNU/Linux | |
191 | ||
192 | Python: CPython 3.6.5 (default, May 30 2019, 14:48:31) [GCC 5.4.0 20160609] | |
201 | Machine: Linux klebedev-ThinkPad-T460 5.15.0-57-generic #63-Ubuntu SMP Thu Nov 24 13:43:17 UTC 2022 x86_64 x86_64 x86_64 GNU/Linux | |
202 | ||
203 | Python: Python 3.8.12 (default, Apr 13 2022, 21:16:23) [GCC 11.2.0] | |
193 | 204 | |
194 | 205 | |
195 | 206 | Benchmarking |
246 | 257 | |
247 | 258 | Fetching not parsed data with pure requests (1) |
248 | 259 | |
249 | .. code-block:: python | |
250 | ||
251 | import sys | |
252 | import requests | |
253 | ||
254 | query = "SELECT * FROM perftest.ontime WHERE FlightDate < '{}' FORMAT {}".format(sys.argv[1], sys.argv[2]) | |
255 | data = requests.get('http://localhost:8123/', params={'query': query}) | |
260 | .. literalinclude:: ../perf/script_01.py | |
261 | :language: python | |
256 | 262 | |
257 | 263 | |
258 | 264 | Parsed rows |
260 | 266 | |
261 | 267 | Line split into elements will be consider as "parsed" for TSV format (2) |
262 | 268 | |
263 | .. code-block:: python | |
264 | ||
265 | import sys | |
266 | import requests | |
267 | ||
268 | query = "SELECT * FROM perftest.ontime WHERE FlightDate < '{}' FORMAT TSV".format(sys.argv[1]) | |
269 | resp = requests.get('http://localhost:8123/', stream=True, params={'query': query}) | |
270 | ||
271 | data = [line.decode('utf-8').split('\t') for line in resp.iter_lines(chunk_size=10000)] | |
272 | ||
269 | .. literalinclude:: ../perf/script_02.py | |
270 | :language: python | |
273 | 271 | |
274 | 272 | Now we cast each element to it's data type (2.5) |
275 | 273 | |
276 | .. code-block:: python | |
277 | ||
278 | from datetime import date | |
279 | import sys | |
280 | import requests | |
281 | ||
282 | ||
283 | def get_python_type(ch_type): | |
284 | if ch_type.startswith('Int') or ch_type.startswith('UInt'): | |
285 | return int | |
286 | ||
287 | elif ch_type == 'String' or ch_type.startswith('FixedString'): | |
288 | return None | |
289 | ||
290 | elif ch_type == 'Date': | |
291 | return lambda value: date(*[int(x) for x in value.split('-')]) | |
292 | ||
293 | raise ValueError(f'Unsupported type: "{ch_type}"') | |
294 | ||
295 | ||
296 | resp = requests.get('http://localhost:8123', params={'query': 'describe table perftest.ontime FORMAT TSV'}) | |
297 | ch_types = [x.split('\t')[1] for x in resp.text.split('\n') if x] | |
298 | python_types = [get_python_type(x) for x in ch_types] | |
299 | ||
300 | query = "SELECT * FROM perftest.ontime WHERE FlightDate < '{}' FORMAT TSV".format(sys.argv[1]) | |
301 | resp = requests.get('http://localhost:8123/', stream=True, params={'query': query}) | |
302 | ||
303 | data = [] | |
304 | ||
305 | for line in resp.iter_lines(chunk_size=10000): | |
306 | data.append([cls(x) if cls else x for x, cls in zip(line.decode('utf-8').split('\t'), python_types)]) | |
307 | ||
274 | .. literalinclude:: ../perf/script_02_5.py | |
275 | :language: python | |
308 | 276 | |
309 | 277 | JSONEachRow format can be loaded with json loads (3) |
310 | 278 | |
311 | .. code-block:: python | |
312 | ||
313 | import sys | |
314 | import requests | |
315 | from ujson import loads | |
316 | ||
317 | query = "SELECT * FROM perftest.ontime WHERE FlightDate < '{}' FORMAT JSONEachRow".format(sys.argv[1]) | |
318 | resp = requests.get('http://localhost:8123/', stream=True, params={'query': query}) | |
319 | ||
320 | data = [list(loads(line).values()) for line in resp.iter_lines(chunk_size=10000)] | |
321 | ||
279 | .. literalinclude:: ../perf/script_03.py | |
280 | :language: python | |
322 | 281 | |
323 | 282 | Get fully parsed rows with ``clickhouse-driver`` in Native format (4) |
324 | 283 | |
325 | .. code-block:: python | |
326 | ||
327 | import sys | |
328 | from clickhouse_driver import Client | |
329 | ||
330 | query = "SELECT * FROM perftest.ontime WHERE FlightDate < '{}'".format(sys.argv[1]) | |
331 | client = Client.from_url('clickhouse://localhost') | |
332 | ||
333 | data = client.execute(query) | |
284 | .. literalinclude:: ../perf/script_04.py | |
285 | :language: python | |
286 | ||
287 | Get fully parsed rows with ``clickhouse-connect`` (14) | |
288 | ||
289 | .. literalinclude:: ../perf/script_14.py | |
290 | :language: python | |
334 | 291 | |
335 | 292 | |
336 | 293 | Iteration over rows |
338 | 295 | |
339 | 296 | Iteration over TSV (5) |
340 | 297 | |
341 | .. code-block:: python | |
342 | ||
343 | import sys | |
344 | import requests | |
345 | ||
346 | query = "SELECT * FROM perftest.ontime WHERE FlightDate < '{}' FORMAT TSV".format(sys.argv[1]) | |
347 | resp = requests.get('http://localhost:8123/', stream=True, params={'query': query}) | |
348 | ||
349 | for line in resp.iter_lines(chunk_size=10000): | |
350 | line = line.decode('utf-8').split('\t') | |
351 | ||
298 | .. literalinclude:: ../perf/script_05.py | |
299 | :language: python | |
352 | 300 | |
353 | 301 | Now we cast each element to it's data type (5.5) |
354 | 302 | |
355 | .. code-block:: python | |
356 | ||
357 | from datetime import date | |
358 | import sys | |
359 | import requests | |
360 | ||
361 | ||
362 | def get_python_type(ch_type): | |
363 | if ch_type.startswith('Int') or ch_type.startswith('UInt'): | |
364 | return int | |
365 | ||
366 | elif ch_type == 'String' or ch_type.startswith('FixedString'): | |
367 | return None | |
368 | ||
369 | elif ch_type == 'Date': | |
370 | return lambda value: date(*[int(x) for x in value.split('-')]) | |
371 | ||
372 | raise ValueError(f'Unsupported type: "{ch_type}"') | |
373 | ||
374 | ||
375 | resp = requests.get('http://localhost:8123', params={'query': 'describe table perftest.ontime FORMAT TSV'}) | |
376 | ch_types = [x.split('\t')[1] for x in resp.text.split('\n') if x] | |
377 | python_types = [get_python_type(x) for x in ch_types] | |
378 | ||
379 | query = "SELECT * FROM perftest.ontime WHERE FlightDate < '{}' FORMAT TSV".format(sys.argv[1]) | |
380 | resp = requests.get('http://localhost:8123/', stream=True, params={'query': query}) | |
381 | ||
382 | for line in resp.iter_lines(chunk_size=10000): | |
383 | line = [cls(x) if cls else x for x, cls in zip(line.decode('utf-8').split('\t'), python_types)] | |
384 | ||
303 | .. literalinclude:: ../perf/script_05_5.py | |
304 | :language: python | |
385 | 305 | |
386 | 306 | Iteration over JSONEachRow (6) |
387 | 307 | |
388 | .. code-block:: python | |
389 | ||
390 | import sys | |
391 | import requests | |
392 | from ujson import loads | |
393 | ||
394 | query = "SELECT * FROM perftest.ontime WHERE FlightDate < '{}' FORMAT JSONEachRow".format(sys.argv[1]) | |
395 | resp = requests.get('http://localhost:8123/', stream=True, params={'query': query}) | |
396 | ||
397 | for line in resp.iter_lines(chunk_size=10000): | |
398 | line = list(loads(line).values()) | |
399 | ||
308 | .. literalinclude:: ../perf/script_06.py | |
309 | :language: python | |
400 | 310 | |
401 | 311 | Iteration over rows with ``clickhouse-driver`` in Native format (7) |
402 | 312 | |
403 | .. code-block:: python | |
404 | ||
405 | import sys | |
406 | from clickhouse_driver import Client | |
407 | ||
408 | query = "SELECT * FROM perftest.ontime WHERE FlightDate < '{}'".format(sys.argv[1]) | |
409 | client = Client.from_url('clickhouse://localhost') | |
410 | ||
411 | for row in client.execute_iter(query): | |
412 | pass | |
313 | .. literalinclude:: ../perf/script_07.py | |
314 | :language: python | |
315 | ||
316 | Iteration over rows with ``clickhouse-connect`` (17) | |
317 | ||
318 | .. literalinclude:: ../perf/script_17.py | |
319 | :language: python | |
413 | 320 | |
414 | 321 | |
415 | 322 | Iteration over string rows |
419 | 326 | |
420 | 327 | Iteration over TSV (8) |
421 | 328 | |
422 | .. code-block:: python | |
423 | ||
424 | import sys | |
425 | import requests | |
426 | ||
427 | cols = [ | |
428 | 'UniqueCarrier', 'Carrier', 'TailNum', 'FlightNum', 'Origin', 'OriginCityName', 'OriginState', | |
429 | 'OriginStateFips', 'OriginStateName', 'Dest', 'DestCityName', 'DestState', 'DestStateFips', | |
430 | 'DestStateName', 'DepartureDelayGroups', 'DepTimeBlk', 'ArrTimeBlk', 'CancellationCode', | |
431 | 'FirstDepTime', 'TotalAddGTime', 'LongestAddGTime', 'DivAirportLandings', 'DivReachedDest', | |
432 | 'DivActualElapsedTime', 'DivArrDelay', 'DivDistance', 'Div1Airport', 'Div1WheelsOn', 'Div1TotalGTime', | |
433 | 'Div1LongestGTime', 'Div1WheelsOff', 'Div1TailNum', 'Div2Airport', 'Div2WheelsOn', 'Div2TotalGTime', | |
434 | 'Div2LongestGTime', 'Div2WheelsOff', 'Div2TailNum', 'Div3Airport', 'Div3WheelsOn', 'Div3TotalGTime', | |
435 | 'Div3LongestGTime', 'Div3WheelsOff', 'Div3TailNum', 'Div4Airport', 'Div4WheelsOn', 'Div4TotalGTime', | |
436 | 'Div4LongestGTime', 'Div4WheelsOff', 'Div4TailNum', 'Div5Airport', 'Div5WheelsOn', 'Div5TotalGTime', | |
437 | 'Div5LongestGTime', 'Div5WheelsOff', 'Div5TailNum' | |
438 | ] | |
439 | ||
440 | query = "SELECT {} FROM perftest.ontime WHERE FlightDate < '{}' FORMAT TSV".format(', '.join(cols), sys.argv[1]) | |
441 | resp = requests.get('http://localhost:8123/', stream=True, params={'query': query}) | |
442 | ||
443 | for line in resp.iter_lines(chunk_size=10000): | |
444 | line = line.decode('utf-8').split('\t') | |
445 | ||
329 | .. literalinclude:: ../perf/script_08.py | |
330 | :language: python | |
446 | 331 | |
447 | 332 | Iteration over JSONEachRow (9) |
448 | 333 | |
449 | .. code-block:: python | |
450 | ||
451 | import sys | |
452 | import requests | |
453 | from ujson import loads | |
454 | ||
455 | cols = [ | |
456 | 'UniqueCarrier', 'Carrier', 'TailNum', 'FlightNum', 'Origin', 'OriginCityName', 'OriginState', | |
457 | 'OriginStateFips', 'OriginStateName', 'Dest', 'DestCityName', 'DestState', 'DestStateFips', | |
458 | 'DestStateName', 'DepartureDelayGroups', 'DepTimeBlk', 'ArrTimeBlk', 'CancellationCode', | |
459 | 'FirstDepTime', 'TotalAddGTime', 'LongestAddGTime', 'DivAirportLandings', 'DivReachedDest', | |
460 | 'DivActualElapsedTime', 'DivArrDelay', 'DivDistance', 'Div1Airport', 'Div1WheelsOn', 'Div1TotalGTime', | |
461 | 'Div1LongestGTime', 'Div1WheelsOff', 'Div1TailNum', 'Div2Airport', 'Div2WheelsOn', 'Div2TotalGTime', | |
462 | 'Div2LongestGTime', 'Div2WheelsOff', 'Div2TailNum', 'Div3Airport', 'Div3WheelsOn', 'Div3TotalGTime', | |
463 | 'Div3LongestGTime', 'Div3WheelsOff', 'Div3TailNum', 'Div4Airport', 'Div4WheelsOn', 'Div4TotalGTime', | |
464 | 'Div4LongestGTime', 'Div4WheelsOff', 'Div4TailNum', 'Div5Airport', 'Div5WheelsOn', 'Div5TotalGTime', | |
465 | 'Div5LongestGTime', 'Div5WheelsOff', 'Div5TailNum' | |
466 | ] | |
467 | ||
468 | query = "SELECT {} FROM perftest.ontime WHERE FlightDate < '{}' FORMAT JSONEachRow".format(', '.join(cols), sys.argv[1]) | |
469 | resp = requests.get('http://localhost:8123/', stream=True, params={'query': query}) | |
470 | ||
471 | for line in resp.iter_lines(chunk_size=10000): | |
472 | line = list(loads(line).values()) | |
473 | ||
334 | .. literalinclude:: ../perf/script_09.py | |
335 | :language: python | |
474 | 336 | |
475 | 337 | Iteration over string rows with ``clickhouse-driver`` in Native format (10) |
476 | 338 | |
477 | .. code-block:: python | |
478 | ||
479 | import sys | |
480 | from clickhouse_driver import Client | |
481 | ||
482 | cols = [ | |
483 | 'UniqueCarrier', 'Carrier', 'TailNum', 'FlightNum', 'Origin', 'OriginCityName', 'OriginState', | |
484 | 'OriginStateFips', 'OriginStateName', 'Dest', 'DestCityName', 'DestState', 'DestStateFips', | |
485 | 'DestStateName', 'DepartureDelayGroups', 'DepTimeBlk', 'ArrTimeBlk', 'CancellationCode', | |
486 | 'FirstDepTime', 'TotalAddGTime', 'LongestAddGTime', 'DivAirportLandings', 'DivReachedDest', | |
487 | 'DivActualElapsedTime', 'DivArrDelay', 'DivDistance', 'Div1Airport', 'Div1WheelsOn', 'Div1TotalGTime', | |
488 | 'Div1LongestGTime', 'Div1WheelsOff', 'Div1TailNum', 'Div2Airport', 'Div2WheelsOn', 'Div2TotalGTime', | |
489 | 'Div2LongestGTime', 'Div2WheelsOff', 'Div2TailNum', 'Div3Airport', 'Div3WheelsOn', 'Div3TotalGTime', | |
490 | 'Div3LongestGTime', 'Div3WheelsOff', 'Div3TailNum', 'Div4Airport', 'Div4WheelsOn', 'Div4TotalGTime', | |
491 | 'Div4LongestGTime', 'Div4WheelsOff', 'Div4TailNum', 'Div5Airport', 'Div5WheelsOn', 'Div5TotalGTime', | |
492 | 'Div5LongestGTime', 'Div5WheelsOff', 'Div5TailNum' | |
493 | ] | |
494 | ||
495 | query = "SELECT {} FROM perftest.ontime WHERE FlightDate < '{}'".format(', '.join(cols), sys.argv[1]) | |
496 | client = Client.from_url('clickhouse://localhost') | |
497 | ||
498 | for row in client.execute_iter(query): | |
499 | pass | |
339 | .. literalinclude:: ../perf/script_10.py | |
340 | :language: python | |
341 | ||
342 | Iteration over string rows with ``clickhouse-connect`` (15) | |
343 | ||
344 | .. literalinclude:: ../perf/script_15.py | |
345 | :language: python | |
500 | 346 | |
501 | 347 | |
502 | 348 | Iteration over int rows |
504 | 350 | |
505 | 351 | Iteration over TSV (11) |
506 | 352 | |
507 | .. code-block:: python | |
508 | ||
509 | import sys | |
510 | import requests | |
511 | ||
512 | cols = [ | |
513 | 'Year', 'Quarter', 'Month', 'DayofMonth', 'DayOfWeek', 'AirlineID', 'OriginAirportID', 'OriginAirportSeqID', | |
514 | 'OriginCityMarketID', 'OriginWac', 'DestAirportID', 'DestAirportSeqID', 'DestCityMarketID', 'DestWac', | |
515 | 'CRSDepTime', 'DepTime', 'DepDelay', 'DepDelayMinutes', 'DepDel15', 'TaxiOut', 'WheelsOff', 'WheelsOn', | |
516 | 'TaxiIn', 'CRSArrTime', 'ArrTime', 'ArrDelay', 'ArrDelayMinutes', 'ArrDel15', 'ArrivalDelayGroups', | |
517 | 'Cancelled', 'Diverted', 'CRSElapsedTime', 'ActualElapsedTime', 'AirTime', 'Flights', 'Distance', | |
518 | 'DistanceGroup', 'CarrierDelay', 'WeatherDelay', 'NASDelay', 'SecurityDelay', 'LateAircraftDelay', | |
519 | 'Div1AirportID', 'Div1AirportSeqID', 'Div2AirportID', 'Div2AirportSeqID', 'Div3AirportID', | |
520 | 'Div3AirportSeqID', 'Div4AirportID', 'Div4AirportSeqID', 'Div5AirportID', 'Div5AirportSeqID' | |
521 | ] | |
522 | ||
523 | query = "SELECT {} FROM perftest.ontime WHERE FlightDate < '{}' FORMAT TSV".format(', '.join(cols), sys.argv[1]) | |
524 | resp = requests.get('http://localhost:8123/', stream=True, params={'query': query}) | |
525 | ||
526 | for line in resp.iter_lines(chunk_size=10000): | |
527 | line = [int(x) for x in line.split(b'\t')] | |
528 | ||
353 | .. literalinclude:: ../perf/script_11.py | |
354 | :language: python | |
529 | 355 | |
530 | 356 | Iteration over JSONEachRow (12) |
531 | 357 | |
532 | .. code-block:: python | |
533 | ||
534 | import sys | |
535 | import requests | |
536 | from ujson import loads | |
537 | ||
538 | cols = [ | |
539 | 'Year', 'Quarter', 'Month', 'DayofMonth', 'DayOfWeek', 'AirlineID', 'OriginAirportID', 'OriginAirportSeqID', | |
540 | 'OriginCityMarketID', 'OriginWac', 'DestAirportID', 'DestAirportSeqID', 'DestCityMarketID', 'DestWac', | |
541 | 'CRSDepTime', 'DepTime', 'DepDelay', 'DepDelayMinutes', 'DepDel15', 'TaxiOut', 'WheelsOff', 'WheelsOn', | |
542 | 'TaxiIn', 'CRSArrTime', 'ArrTime', 'ArrDelay', 'ArrDelayMinutes', 'ArrDel15', 'ArrivalDelayGroups', | |
543 | 'Cancelled', 'Diverted', 'CRSElapsedTime', 'ActualElapsedTime', 'AirTime', 'Flights', 'Distance', | |
544 | 'DistanceGroup', 'CarrierDelay', 'WeatherDelay', 'NASDelay', 'SecurityDelay', 'LateAircraftDelay', | |
545 | 'Div1AirportID', 'Div1AirportSeqID', 'Div2AirportID', 'Div2AirportSeqID', 'Div3AirportID', | |
546 | 'Div3AirportSeqID', 'Div4AirportID', 'Div4AirportSeqID', 'Div5AirportID', 'Div5AirportSeqID' | |
547 | ] | |
548 | ||
549 | query = "SELECT {} FROM perftest.ontime WHERE FlightDate < '{}' FORMAT JSONEachRow".format(', '.join(cols), sys.argv[1]) | |
550 | resp = requests.get('http://localhost:8123/', stream=True, params={'query': query}) | |
551 | ||
552 | for line in resp.iter_lines(chunk_size=10000): | |
553 | line = list(loads(line).values()) | |
554 | ||
358 | .. literalinclude:: ../perf/script_12.py | |
359 | :language: python | |
555 | 360 | |
556 | 361 | Iteration over int rows with ``clickhouse-driver`` in Native format (13) |
557 | 362 | |
558 | .. code-block:: python | |
559 | ||
560 | import sys | |
561 | from clickhouse_driver import Client | |
562 | ||
563 | cols = [ | |
564 | 'Year', 'Quarter', 'Month', 'DayofMonth', 'DayOfWeek', 'AirlineID', 'OriginAirportID', 'OriginAirportSeqID', | |
565 | 'OriginCityMarketID', 'OriginWac', 'DestAirportID', 'DestAirportSeqID', 'DestCityMarketID', 'DestWac', | |
566 | 'CRSDepTime', 'DepTime', 'DepDelay', 'DepDelayMinutes', 'DepDel15', 'TaxiOut', 'WheelsOff', 'WheelsOn', | |
567 | 'TaxiIn', 'CRSArrTime', 'ArrTime', 'ArrDelay', 'ArrDelayMinutes', 'ArrDel15', 'ArrivalDelayGroups', | |
568 | 'Cancelled', 'Diverted', 'CRSElapsedTime', 'ActualElapsedTime', 'AirTime', 'Flights', 'Distance', | |
569 | 'DistanceGroup', 'CarrierDelay', 'WeatherDelay', 'NASDelay', 'SecurityDelay', 'LateAircraftDelay', | |
570 | 'Div1AirportID', 'Div1AirportSeqID', 'Div2AirportID', 'Div2AirportSeqID', 'Div3AirportID', | |
571 | 'Div3AirportSeqID', 'Div4AirportID', 'Div4AirportSeqID', 'Div5AirportID', 'Div5AirportSeqID' | |
572 | ] | |
573 | ||
574 | query = "SELECT {} FROM perftest.ontime WHERE FlightDate < '{}'".format(', '.join(cols), sys.argv[1]) | |
575 | client = Client.from_url('clickhouse://localhost') | |
576 | ||
577 | for row in client.execute_iter(query): | |
578 | pass | |
363 | .. literalinclude:: ../perf/script_13.py | |
364 | :language: python | |
365 | ||
366 | Iteration over int rows with ``clickhouse-connect`` (16) | |
367 | ||
368 | .. literalinclude:: ../perf/script_16.py | |
369 | :language: python | |
579 | 370 | |
580 | 371 | |
581 | 372 | Results |
594 | 385 | +==================================+===========+===========+===========+===========+===========+ |
595 | 386 | |**Plain text without parsing: timing** | |
596 | 387 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ |
597 | |Naive requests.get TSV (1) | 0.40 s | 0.67 s | 0.95 s | 1.67 s | 2.52 s | | |
598 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
599 | |Naive requests.get JSON (1) | 0.61 s | 1.23 s | 2.09 s | 3.52 s | 5.20 s | | |
388 | |Naive requests.get TSV (1) | 0.35 s | 0.56 s | 0.83 s | 1.15 s | 1.72 s | | |
389 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
390 | |Naive requests.get JSON (1) | 0.99 s | 1.80 s | 2.77 s | 5.15 s | 7.80 s | | |
600 | 391 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ |
601 | 392 | |**Plain text without parsing: memory** | |
602 | 393 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ |
603 | |Naive requests.get TSV (1) | 49 MB | 107 MB | 165 MB | 322 MB | 488 MB | | |
604 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
605 | |Naive requests.get JSON (1) | 206 MB | 564 MB | 916 MB | 1.83 GB | 2.83 GB | | |
394 | |Naive requests.get TSV (1) | 52 MB | 110 MB | 167 MB | 323 MB | 489 MB | | |
395 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
396 | |Naive requests.get JSON (1) | 263 MB | 726 MB | 1.88 GB | 2.42 GB | 3.75 GB | | |
606 | 397 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ |
607 | 398 | |**Parsed rows: timing** | |
608 | 399 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ |
609 | |requests.get TSV (2) | 0.81 s | 1.81 s | 3.09 s | 7.22 s | 11.87 s | | |
610 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
611 | |requests.get TSV with cast (2.5) | 1.78 s | 4.58 s | 7.42 s | 16.12 s | 25.52 s | | |
612 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
613 | |requests.get JSON (3) | 2.14 s | 5.65 s | 9.20 s | 20.43 s | 31.72 s | | |
614 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
615 | |clickhouse-driver Native (4) | 0.73 s | 1.40 s | 2.08 s | 4.03 s | 6.20 s | | |
400 | |requests.get TSV (2) | 0.83 s | 1.97 s | 3.32 s | 7.90 s | 13.13 s | | |
401 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
402 | |requests.get TSV with cast (2.5) | 1.59 s | 4.31 s | 6.99 s | 15.60 s | 25.89 s | | |
403 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
404 | |requests.get JSON (3) | 2.78 s | 5.55 s | 9.23 s | 21.45 s | 31.50 s | | |
405 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
406 | |clickhouse-driver Native (4) | 0.87 s | 1.50 s | 2.21 s | 4.20 s | 6.32 s | | |
407 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
408 | |clickhouse-connect (14) | 0.89 s | 1.72 s | 2.46 s | 4.85 s | 7.19 s | | |
616 | 409 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ |
617 | 410 | |**Parsed rows: memory** | |
618 | 411 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ |
619 | |requests.get TSV (2) | 171 MB | 462 MB | 753 MB | 1.51 GB | 2.33 GB | | |
620 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
621 | |requests.get TSV with cast (2.5) | 135 MB | 356 MB | 576 MB | 1.15 GB | 1.78 GB | | |
622 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
623 | |requests.get JSON (3) | 139 MB | 366 MB | 591 MB | 1.18 GB | 1.82 GB | | |
624 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
625 | |clickhouse-driver Native (4) | 135 MB | 337 MB | 535 MB | 1.05 GB | 1.62 GB | | |
412 | |requests.get TSV (2) | 182 MB | 487 MB | 794 MB | 1.63 GB | 2.51 GB | | |
413 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
414 | |requests.get TSV with cast (2.5) | 138 MB | 359 MB | 579 MB | 1.18 GB | 1.82 GB | | |
415 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
416 | |requests.get JSON (3) | 136 MB | 351 MB | 565 MB | 1.15 GB | 1.77 GB | | |
417 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
418 | |clickhouse-driver Native (4) | 155 MB | 343 MB | 530 MB | 1.04 GB | 1.58 GB | | |
419 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
420 | |clickhouse-connect (14) | 139 MB | 333 MB | 524 MB | 1.05 GB | 1.61 GB | | |
626 | 421 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ |
627 | 422 | |**Iteration over rows: timing** | |
628 | 423 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ |
629 | |requests.get TSV (5) | 0.49 s | 0.99 s | 1.34 s | 2.58 s | 4.00 s | | |
630 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
631 | |requests.get TSV with cast (5.5) | 1.38 s | 3.38 s | 5.40 s | 10.89 s | 16.59 s | | |
632 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
633 | |requests.get JSON (6) | 1.89 s | 4.73 s | 7.63 s | 15.63 s | 24.60 s | | |
634 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
635 | |clickhouse-driver Native (7) | 0.62 s | 1.28 s | 1.93 s | 3.68 s | 5.54 s | | |
424 | |requests.get TSV (5) | 0.48 s | 0.91 s | 1.28 s | 2.57 s | 3.72 s | | |
425 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
426 | |requests.get TSV with cast (5.5) | 1.25 s | 3.05 s | 4.77 s | 9.67 s | 15.04 s | | |
427 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
428 | |requests.get JSON (6) | 1.80 s | 4.48 s | 7.10 s | 14.45 s | 22.17 s | | |
429 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
430 | |clickhouse-driver Native (7) | 0.72 s | 1.38 s | 2.01 s | 3.65 s | 5.45 s | | |
431 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
432 | |clickhouse-connect (17) | 0.85 s | 1.62 s | 2.12 s | 4.12 s | 6.05 s | | |
636 | 433 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ |
637 | 434 | |**Iteration over rows: memory** | |
638 | 435 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ |
639 | |requests.get TSV (5) | 19 MB | 19 MB | 19 MB | 19 MB | 19 MB | | |
640 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
641 | |requests.get TSV with cast (5.5) | 19 MB | 19 MB | 19 MB | 19 MB | 19 MB | | |
642 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
643 | |requests.get JSON (6) | 20 MB | 20 MB | 20 MB | 20 MB | 20 MB | | |
644 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
645 | |clickhouse-driver Native (7) | 56 MB | 70 MB | 71 MB | 71 MB | 71 MB | | |
436 | |requests.get TSV (5) | 22 MB | 22 MB | 22 MB | 22 MB | 22 MB | | |
437 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
438 | |requests.get TSV with cast (5.5) | 22 MB | 22 MB | 22 MB | 22 MB | 22 MB | | |
439 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
440 | |requests.get JSON (6) | 24 MB | 24 MB | 24 MB | 24 MB | 24 MB | | |
441 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
442 | |clickhouse-driver Native (7) | 91 MB | 93 MB | 93 MB | 94 MB | 94 MB | | |
443 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
444 | |clickhouse-connect (17) | 68 MB | 68 MB | 68 MB | 68 MB | 68 MB | | |
646 | 445 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ |
647 | 446 | |**Iteration over string rows: timing** | |
648 | 447 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ |
649 | |requests.get TSV (8) | 0.40 s | 0.67 s | 0.80 s | 1.55 s | 2.18 s | | |
650 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
651 | |requests.get JSON (9) | 1.14 s | 2.64 s | 4.22 s | 8.48 s | 12.96 s | | |
652 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
653 | |clickhouse-driver Native (10) | 0.46 s | 0.91 s | 1.35 s | 2.49 s | 3.67 s | | |
448 | |requests.get TSV (8) | 0.44 s | 0.57 s | 0.77 s | 1.40 s | 1.94 s | | |
449 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
450 | |requests.get JSON (9) | 1.03 s | 2.46 s | 3.87 s | 7.76 s | 11.96 s | | |
451 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
452 | |clickhouse-driver Native (10) | 0.63 s | 1.06 s | 1.44 s | 2.45 s | 3.57 s | | |
453 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
454 | |clickhouse-connect (15) | 0.62 s | 1.13 s | 1.53 s | 2.84 s | 4.00 s | | |
654 | 455 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ |
655 | 456 | |**Iteration over string rows: memory** | |
656 | 457 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ |
657 | |requests.get TSV (8) | 19 MB | 19 MB | 19 MB | 19 MB | 19 MB | | |
658 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
659 | |requests.get JSON (9) | 20 MB | 20 MB | 20 MB | 20 MB | 20 MB | | |
660 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
661 | |clickhouse-driver Native (10) | 46 MB | 56 MB | 57 MB | 57 MB | 57 MB | | |
458 | |requests.get TSV (8) | 22 MB | 22 MB | 22 MB | 22 MB | 22 MB | | |
459 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
460 | |requests.get JSON (9) | 24 MB | 24 MB | 24 MB | 24 MB | 24 MB | | |
461 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
462 | |clickhouse-driver Native (10) | 77 MB | 79 MB | 79 MB | 79 MB | 79 MB | | |
463 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
464 | |clickhouse-connect (15) | 60 MB | 60 MB | 60 MB | 60 MB | 60 MB | | |
662 | 465 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ |
663 | 466 | |**Iteration over int rows: timing** | |
664 | 467 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ |
665 | |requests.get TSV (11) | 0.84 s | 2.06 s | 3.22 s | 6.27 s | 10.06 s | | |
666 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
667 | |requests.get JSON (12) | 0.95 s | 2.15 s | 3.55 s | 6.93 s | 10.82 s | | |
668 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
669 | |clickhouse-driver Native (13) | 0.43 s | 0.61 s | 0.86 s | 1.53 s | 2.27 s | | |
468 | |requests.get TSV (11) | 0.81 s | 1.66 s | 2.61 s | 5.08 s | 7.91 s | | |
469 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
470 | |requests.get JSON (12) | 0.97 s | 2.02 s | 3.29 s | 6.50 s | 10.00 s | | |
471 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
472 | |clickhouse-driver Native (13) | 0.55 s | 0.78 s | 1.02 s | 1.73 s | 2.44 s | | |
473 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
474 | |clickhouse-connect (16) | 0.54 s | 0.79 s | 1.01 s | 1.68 s | 2.20 s | | |
670 | 475 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ |
671 | 476 | |**Iteration over int rows: memory** | |
672 | 477 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ |
673 | |requests.get TSV (11) | 19 MB | 19 MB | 19 MB | 19 MB | 19 MB | | |
674 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
675 | |requests.get JSON (12) | 20 MB | 20 MB | 20 MB | 20 MB | 20 MB | | |
676 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
677 | |clickhouse-driver Native (13) | 41 MB | 48 MB | 48 MB | 48 MB | 49 MB | | |
478 | |requests.get TSV (11) | 22 MB | 22 MB | 22 MB | 22 MB | 22 MB | | |
479 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
480 | |requests.get JSON (12) | 24 MB | 24 MB | 24 MB | 24 MB | 24 MB | | |
481 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
482 | |clickhouse-driver Native (13) | 71 MB | 72 MB | 72 MB | 73 MB | 73 MB | | |
483 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ | |
484 | |clickhouse-connect (16) | 41 MB | 41 MB | 41 MB | 41 MB | 41 MB | | |
678 | 485 | +----------------------------------+-----------+-----------+-----------+-----------+-----------+ |
679 | 486 | |
680 | 487 |
67 | 67 | |
68 | 68 | NOTE: formatting queries using Python's f-strings or concatenation can lead to SQL injections. |
69 | 69 | Use ``%(myvar)s`` parameters instead. |
70 | ||
71 | Server-side parameters supported since 0.2.6 version. | |
72 | You can mix client-side and server-side formatting in one query: | |
73 | ||
74 | .. code-block:: python | |
75 | ||
76 | >>> client.execute( | |
77 | ... "SELECT 'test' like '%%es%%', %(myvar)s, {myvar:Int32}", | |
78 | ... {'myvar': 1} | |
79 | ... ) | |
70 | 80 | |
71 | 81 | Customisation ``SELECT`` output with ``FORMAT`` clause is not supported. |
72 | 82 |
55 | 55 | String/FixedString(N) |
56 | 56 | --------------------- |
57 | 57 | |
58 | INSERT types: :class:`str`/:func:`basestring <basestring>`, :class:`bytes`. See note below. | |
59 | ||
60 | SELECT type: :class:`str`/:func:`basestring <basestring>`, :class:`bytes`. See note below. | |
58 | INSERT types: :class:`str`, :class:`bytes`. See note below. | |
59 | ||
60 | SELECT type: :class:`str`, :class:`bytes`. See note below. | |
61 | 61 | |
62 | 62 | String column is encoded/decoded with encoding specified by ``strings_encoding`` setting. Default encoding is UTF-8. |
63 | 63 | |
96 | 96 | Enum8/16 |
97 | 97 | -------- |
98 | 98 | |
99 | INSERT types: :class:`~enum.Enum`, :class:`int`, :class:`long`, :class:`str`/:func:`basestring <basestring>`. | |
100 | ||
101 | SELECT type: :class:`str`/:func:`basestring <basestring>`. | |
99 | INSERT types: :class:`~enum.Enum`, :class:`int`, :class:`long`, :class:`str`. | |
100 | ||
101 | SELECT type: :class:`str`. | |
102 | 102 | |
103 | 103 | .. code-block:: python |
104 | 104 | |
179 | 179 | UUID |
180 | 180 | ---- |
181 | 181 | |
182 | INSERT types: :class:`str`/:func:`basestring <basestring>`, :class:`~uuid.UUID`. | |
182 | INSERT types: :class:`str`, :class:`~uuid.UUID`. | |
183 | 183 | |
184 | 184 | SELECT type: :class:`~uuid.UUID`. |
185 | 185 | |
206 | 206 | |
207 | 207 | *New in version 0.0.19.* |
208 | 208 | |
209 | INSERT types: :class:`~ipaddress.IPv4Address`/:class:`~ipaddress.IPv6Address`, :class:`int`, :class:`long`, :class:`str`/:func:`basestring <basestring>`. | |
209 | INSERT types: :class:`~ipaddress.IPv4Address`/:class:`~ipaddress.IPv6Address`, :class:`int`, :class:`long`, :class:`str`. | |
210 | 210 | |
211 | 211 | SELECT type: :class:`~ipaddress.IPv4Address`/:class:`~ipaddress.IPv6Address`. |
212 | 212 | |
280 | 280 | INSERT types: :class:`list`, :class:`tuple`. |
281 | 281 | |
282 | 282 | SELECT type: :class:`tuple`. |
283 | ||
284 | .. note:: | |
285 | ||
286 | Currently, for ClickHouse server 23.3.1, JSON column ``Object('json')`` | |
287 | and **namedtuple** column ``Tuple(b Int8)`` have the same binary | |
288 | representation. There is no way to distinct one column from another without | |
289 | additional inspection like ``DESCRIBE TABLE`` `query | |
290 | <https://github.com/ClickHouse/ClickHouse/issues/48822>`_. But this will | |
291 | not work for complicated queries with joins. | |
292 | ||
293 | To interpret ClickHouse namedtuple column alongside with | |
294 | ``allow_experimental_object_type=1`` as Python tuple set | |
295 | ``namedtuple_as_json`` setting to ``False``. | |
296 | ||
297 | .. code-block:: python | |
298 | ||
299 | client.execute(..., settings={'namedtuple_as_json': False}) | |
300 | ||
301 | .. code-block:: sql | |
302 | ||
303 | CREATE TABLE test ( | |
304 | a Tuple(b Int8), | |
305 | c Object('json') | |
306 | ) ENGINE = Memory | |
307 | ||
308 | INSERT INTO test VALUES ((1), '{"x": 2}'); | |
309 | ||
310 | .. code-block:: python | |
311 | ||
312 | >>> client.execute('SELECT * FROM test') | |
313 | [((1,), (2,))] | |
314 | ||
315 | >>> client.execute( | |
316 | ... 'SELECT * FROM test', | |
317 | ... settings={'allow_experimental_object_type': 1} | |
318 | ... ) | |
319 | [({'b': 1}, {'x': 2})] | |
320 | ||
321 | >>> client.execute( | |
322 | ... 'SELECT * FROM test', | |
323 | ... settings={ | |
324 | ... 'allow_experimental_object_type': 1, | |
325 | ... 'namedtuple_as_json': False | |
326 | ... } | |
327 | ... ) | |
328 | [((1,), (2,))] | |
283 | 329 | |
284 | 330 | |
285 | 331 | Nested(flatten_nested=1, default) |
421 | 467 | * Ring: Array(Point) |
422 | 468 | * Polygon: Array(Ring) |
423 | 469 | * MultiPolygon: Array(Polygon) |
470 | ||
471 | ||
472 | Object('json') | |
473 | -------------- | |
474 | ||
475 | *New in version 0.2.6.* | |
476 | ||
477 | INSERT types: :class:`dict`. | |
478 | ||
479 | SELECT type: :class:`dict`, :class:`str`. | |
480 | ||
481 | ``orjson`` and ``ujson`` implementations are supported for dumping data into | |
482 | json during ``INSERT``. | |
483 | ||
484 | Set ``allow_experimental_object_type=1`` for to enable json support. |
0 | import sys | |
1 | import requests | |
2 | ||
3 | query = "SELECT * FROM perftest.ontime WHERE FlightDate < '{}' FORMAT {}".format(sys.argv[1], sys.argv[2]) | |
4 | data = requests.get('http://localhost:8123/', params={'query': query}) |
0 | import sys | |
1 | import requests | |
2 | ||
3 | query = "SELECT * FROM perftest.ontime WHERE FlightDate < '{}' FORMAT TSV".format(sys.argv[1]) | |
4 | resp = requests.get('http://localhost:8123/', stream=True, params={'query': query}) | |
5 | ||
6 | data = [line.decode('utf-8').split('\t') for line in resp.iter_lines(chunk_size=10000)] |
0 | from datetime import date | |
1 | import sys | |
2 | import requests | |
3 | ||
4 | ||
5 | def get_python_type(ch_type): | |
6 | if ch_type.startswith('Int') or ch_type.startswith('UInt'): | |
7 | return int | |
8 | ||
9 | elif ch_type == 'String' or ch_type.startswith('FixedString'): | |
10 | return None | |
11 | ||
12 | elif ch_type == 'Date': | |
13 | return lambda value: date(*[int(x) for x in value.split('-')]) | |
14 | ||
15 | raise ValueError(f'Unsupported type: "{ch_type}"') | |
16 | ||
17 | ||
18 | resp = requests.get('http://localhost:8123', params={'query': 'describe table perftest.ontime FORMAT TSV'}) | |
19 | ch_types = [x.split('\t')[1] for x in resp.text.split('\n') if x] | |
20 | python_types = [get_python_type(x) for x in ch_types] | |
21 | ||
22 | query = "SELECT * FROM perftest.ontime WHERE FlightDate < '{}' FORMAT TSV".format(sys.argv[1]) | |
23 | resp = requests.get('http://localhost:8123/', stream=True, params={'query': query}) | |
24 | ||
25 | data = [] | |
26 | ||
27 | for line in resp.iter_lines(chunk_size=10000): | |
28 | data.append([cls(x) if cls else x for x, cls in zip(line.decode('utf-8').split('\t'), python_types)]) |
0 | import sys | |
1 | import requests | |
2 | from ujson import loads | |
3 | ||
4 | query = "SELECT * FROM perftest.ontime WHERE FlightDate < '{}' FORMAT JSONEachRow".format(sys.argv[1]) | |
5 | resp = requests.get('http://localhost:8123/', stream=True, params={'query': query}) | |
6 | ||
7 | data = [list(loads(line).values()) for line in resp.iter_lines(chunk_size=10000)] |
0 | import sys | |
1 | from clickhouse_driver import Client | |
2 | ||
3 | query = "SELECT * FROM perftest.ontime WHERE FlightDate < '{}'".format(sys.argv[1]) | |
4 | client = Client.from_url('clickhouse://localhost') | |
5 | ||
6 | data = client.execute(query) |
0 | import sys | |
1 | import requests | |
2 | ||
3 | query = "SELECT * FROM perftest.ontime WHERE FlightDate < '{}' FORMAT TSV".format(sys.argv[1]) | |
4 | resp = requests.get('http://localhost:8123/', stream=True, params={'query': query}) | |
5 | ||
6 | for line in resp.iter_lines(chunk_size=10000): | |
7 | line = line.decode('utf-8').split('\t') |
0 | from datetime import date | |
1 | import sys | |
2 | import requests | |
3 | ||
4 | ||
5 | def get_python_type(ch_type): | |
6 | if ch_type.startswith('Int') or ch_type.startswith('UInt'): | |
7 | return int | |
8 | ||
9 | elif ch_type == 'String' or ch_type.startswith('FixedString'): | |
10 | return None | |
11 | ||
12 | elif ch_type == 'Date': | |
13 | return lambda value: date(*[int(x) for x in value.split('-')]) | |
14 | ||
15 | raise ValueError(f'Unsupported type: "{ch_type}"') | |
16 | ||
17 | ||
18 | resp = requests.get('http://localhost:8123', params={'query': 'describe table perftest.ontime FORMAT TSV'}) | |
19 | ch_types = [x.split('\t')[1] for x in resp.text.split('\n') if x] | |
20 | python_types = [get_python_type(x) for x in ch_types] | |
21 | ||
22 | query = "SELECT * FROM perftest.ontime WHERE FlightDate < '{}' FORMAT TSV".format(sys.argv[1]) | |
23 | resp = requests.get('http://localhost:8123/', stream=True, params={'query': query}) | |
24 | ||
25 | for line in resp.iter_lines(chunk_size=10000): | |
26 | line = [cls(x) if cls else x for x, cls in zip(line.decode('utf-8').split('\t'), python_types)] |
0 | import sys | |
1 | import requests | |
2 | from ujson import loads | |
3 | ||
4 | query = "SELECT * FROM perftest.ontime WHERE FlightDate < '{}' FORMAT JSONEachRow".format(sys.argv[1]) | |
5 | resp = requests.get('http://localhost:8123/', stream=True, params={'query': query}) | |
6 | ||
7 | for line in resp.iter_lines(chunk_size=10000): | |
8 | line = list(loads(line).values()) |
0 | import sys | |
1 | from clickhouse_driver import Client | |
2 | ||
3 | query = "SELECT * FROM perftest.ontime WHERE FlightDate < '{}'".format(sys.argv[1]) | |
4 | client = Client.from_url('clickhouse://localhost') | |
5 | ||
6 | for row in client.execute_iter(query): | |
7 | pass |
0 | import sys | |
1 | import requests | |
2 | ||
3 | cols = [ | |
4 | 'UniqueCarrier', 'Carrier', 'TailNum', 'FlightNum', 'Origin', 'OriginCityName', 'OriginState', | |
5 | 'OriginStateFips', 'OriginStateName', 'Dest', 'DestCityName', 'DestState', 'DestStateFips', | |
6 | 'DestStateName', 'DepartureDelayGroups', 'DepTimeBlk', 'ArrTimeBlk', 'CancellationCode', | |
7 | 'FirstDepTime', 'TotalAddGTime', 'LongestAddGTime', 'DivAirportLandings', 'DivReachedDest', | |
8 | 'DivActualElapsedTime', 'DivArrDelay', 'DivDistance', 'Div1Airport', 'Div1WheelsOn', 'Div1TotalGTime', | |
9 | 'Div1LongestGTime', 'Div1WheelsOff', 'Div1TailNum', 'Div2Airport', 'Div2WheelsOn', 'Div2TotalGTime', | |
10 | 'Div2LongestGTime', 'Div2WheelsOff', 'Div2TailNum', 'Div3Airport', 'Div3WheelsOn', 'Div3TotalGTime', | |
11 | 'Div3LongestGTime', 'Div3WheelsOff', 'Div3TailNum', 'Div4Airport', 'Div4WheelsOn', 'Div4TotalGTime', | |
12 | 'Div4LongestGTime', 'Div4WheelsOff', 'Div4TailNum', 'Div5Airport', 'Div5WheelsOn', 'Div5TotalGTime', | |
13 | 'Div5LongestGTime', 'Div5WheelsOff', 'Div5TailNum' | |
14 | ] | |
15 | ||
16 | query = "SELECT {} FROM perftest.ontime WHERE FlightDate < '{}' FORMAT TSV".format(', '.join(cols), sys.argv[1]) | |
17 | resp = requests.get('http://localhost:8123/', stream=True, params={'query': query}) | |
18 | ||
19 | for line in resp.iter_lines(chunk_size=10000): | |
20 | line = line.decode('utf-8').split('\t') |
0 | import sys | |
1 | import requests | |
2 | from ujson import loads | |
3 | ||
4 | cols = [ | |
5 | 'UniqueCarrier', 'Carrier', 'TailNum', 'FlightNum', 'Origin', 'OriginCityName', 'OriginState', | |
6 | 'OriginStateFips', 'OriginStateName', 'Dest', 'DestCityName', 'DestState', 'DestStateFips', | |
7 | 'DestStateName', 'DepartureDelayGroups', 'DepTimeBlk', 'ArrTimeBlk', 'CancellationCode', | |
8 | 'FirstDepTime', 'TotalAddGTime', 'LongestAddGTime', 'DivAirportLandings', 'DivReachedDest', | |
9 | 'DivActualElapsedTime', 'DivArrDelay', 'DivDistance', 'Div1Airport', 'Div1WheelsOn', 'Div1TotalGTime', | |
10 | 'Div1LongestGTime', 'Div1WheelsOff', 'Div1TailNum', 'Div2Airport', 'Div2WheelsOn', 'Div2TotalGTime', | |
11 | 'Div2LongestGTime', 'Div2WheelsOff', 'Div2TailNum', 'Div3Airport', 'Div3WheelsOn', 'Div3TotalGTime', | |
12 | 'Div3LongestGTime', 'Div3WheelsOff', 'Div3TailNum', 'Div4Airport', 'Div4WheelsOn', 'Div4TotalGTime', | |
13 | 'Div4LongestGTime', 'Div4WheelsOff', 'Div4TailNum', 'Div5Airport', 'Div5WheelsOn', 'Div5TotalGTime', | |
14 | 'Div5LongestGTime', 'Div5WheelsOff', 'Div5TailNum' | |
15 | ] | |
16 | ||
17 | query = "SELECT {} FROM perftest.ontime WHERE FlightDate < '{}' FORMAT JSONEachRow".format(', '.join(cols), sys.argv[1]) | |
18 | resp = requests.get('http://localhost:8123/', stream=True, params={'query': query}) | |
19 | ||
20 | for line in resp.iter_lines(chunk_size=10000): | |
21 | line = list(loads(line).values()) |
0 | import sys | |
1 | from clickhouse_driver import Client | |
2 | ||
3 | cols = [ | |
4 | 'UniqueCarrier', 'Carrier', 'TailNum', 'FlightNum', 'Origin', 'OriginCityName', 'OriginState', | |
5 | 'OriginStateFips', 'OriginStateName', 'Dest', 'DestCityName', 'DestState', 'DestStateFips', | |
6 | 'DestStateName', 'DepartureDelayGroups', 'DepTimeBlk', 'ArrTimeBlk', 'CancellationCode', | |
7 | 'FirstDepTime', 'TotalAddGTime', 'LongestAddGTime', 'DivAirportLandings', 'DivReachedDest', | |
8 | 'DivActualElapsedTime', 'DivArrDelay', 'DivDistance', 'Div1Airport', 'Div1WheelsOn', 'Div1TotalGTime', | |
9 | 'Div1LongestGTime', 'Div1WheelsOff', 'Div1TailNum', 'Div2Airport', 'Div2WheelsOn', 'Div2TotalGTime', | |
10 | 'Div2LongestGTime', 'Div2WheelsOff', 'Div2TailNum', 'Div3Airport', 'Div3WheelsOn', 'Div3TotalGTime', | |
11 | 'Div3LongestGTime', 'Div3WheelsOff', 'Div3TailNum', 'Div4Airport', 'Div4WheelsOn', 'Div4TotalGTime', | |
12 | 'Div4LongestGTime', 'Div4WheelsOff', 'Div4TailNum', 'Div5Airport', 'Div5WheelsOn', 'Div5TotalGTime', | |
13 | 'Div5LongestGTime', 'Div5WheelsOff', 'Div5TailNum' | |
14 | ] | |
15 | ||
16 | query = "SELECT {} FROM perftest.ontime WHERE FlightDate < '{}'".format(', '.join(cols), sys.argv[1]) | |
17 | client = Client.from_url('clickhouse://localhost') | |
18 | ||
19 | for row in client.execute_iter(query): | |
20 | pass |
0 | import sys | |
1 | import requests | |
2 | ||
3 | cols = [ | |
4 | 'Year', 'Quarter', 'Month', 'DayofMonth', 'DayOfWeek', 'AirlineID', 'OriginAirportID', 'OriginAirportSeqID', | |
5 | 'OriginCityMarketID', 'OriginWac', 'DestAirportID', 'DestAirportSeqID', 'DestCityMarketID', 'DestWac', | |
6 | 'CRSDepTime', 'DepTime', 'DepDelay', 'DepDelayMinutes', 'DepDel15', 'TaxiOut', 'WheelsOff', 'WheelsOn', | |
7 | 'TaxiIn', 'CRSArrTime', 'ArrTime', 'ArrDelay', 'ArrDelayMinutes', 'ArrDel15', 'ArrivalDelayGroups', | |
8 | 'Cancelled', 'Diverted', 'CRSElapsedTime', 'ActualElapsedTime', 'AirTime', 'Flights', 'Distance', | |
9 | 'DistanceGroup', 'CarrierDelay', 'WeatherDelay', 'NASDelay', 'SecurityDelay', 'LateAircraftDelay', | |
10 | 'Div1AirportID', 'Div1AirportSeqID', 'Div2AirportID', 'Div2AirportSeqID', 'Div3AirportID', | |
11 | 'Div3AirportSeqID', 'Div4AirportID', 'Div4AirportSeqID', 'Div5AirportID', 'Div5AirportSeqID' | |
12 | ] | |
13 | ||
14 | query = "SELECT {} FROM perftest.ontime WHERE FlightDate < '{}' FORMAT TSV".format(', '.join(cols), sys.argv[1]) | |
15 | resp = requests.get('http://localhost:8123/', stream=True, params={'query': query}) | |
16 | ||
17 | for line in resp.iter_lines(chunk_size=10000): | |
18 | line = [int(x) for x in line.split(b'\t')] |
0 | import sys | |
1 | import requests | |
2 | from ujson import loads | |
3 | ||
4 | cols = [ | |
5 | 'Year', 'Quarter', 'Month', 'DayofMonth', 'DayOfWeek', 'AirlineID', 'OriginAirportID', 'OriginAirportSeqID', | |
6 | 'OriginCityMarketID', 'OriginWac', 'DestAirportID', 'DestAirportSeqID', 'DestCityMarketID', 'DestWac', | |
7 | 'CRSDepTime', 'DepTime', 'DepDelay', 'DepDelayMinutes', 'DepDel15', 'TaxiOut', 'WheelsOff', 'WheelsOn', | |
8 | 'TaxiIn', 'CRSArrTime', 'ArrTime', 'ArrDelay', 'ArrDelayMinutes', 'ArrDel15', 'ArrivalDelayGroups', | |
9 | 'Cancelled', 'Diverted', 'CRSElapsedTime', 'ActualElapsedTime', 'AirTime', 'Flights', 'Distance', | |
10 | 'DistanceGroup', 'CarrierDelay', 'WeatherDelay', 'NASDelay', 'SecurityDelay', 'LateAircraftDelay', | |
11 | 'Div1AirportID', 'Div1AirportSeqID', 'Div2AirportID', 'Div2AirportSeqID', 'Div3AirportID', | |
12 | 'Div3AirportSeqID', 'Div4AirportID', 'Div4AirportSeqID', 'Div5AirportID', 'Div5AirportSeqID' | |
13 | ] | |
14 | ||
15 | query = "SELECT {} FROM perftest.ontime WHERE FlightDate < '{}' FORMAT JSONEachRow".format(', '.join(cols), sys.argv[1]) | |
16 | resp = requests.get('http://localhost:8123/', stream=True, params={'query': query}) | |
17 | ||
18 | for line in resp.iter_lines(chunk_size=10000): | |
19 | line = list(loads(line).values()) |
0 | import sys | |
1 | from clickhouse_driver import Client | |
2 | ||
3 | cols = [ | |
4 | 'Year', 'Quarter', 'Month', 'DayofMonth', 'DayOfWeek', 'AirlineID', 'OriginAirportID', 'OriginAirportSeqID', | |
5 | 'OriginCityMarketID', 'OriginWac', 'DestAirportID', 'DestAirportSeqID', 'DestCityMarketID', 'DestWac', | |
6 | 'CRSDepTime', 'DepTime', 'DepDelay', 'DepDelayMinutes', 'DepDel15', 'TaxiOut', 'WheelsOff', 'WheelsOn', | |
7 | 'TaxiIn', 'CRSArrTime', 'ArrTime', 'ArrDelay', 'ArrDelayMinutes', 'ArrDel15', 'ArrivalDelayGroups', | |
8 | 'Cancelled', 'Diverted', 'CRSElapsedTime', 'ActualElapsedTime', 'AirTime', 'Flights', 'Distance', | |
9 | 'DistanceGroup', 'CarrierDelay', 'WeatherDelay', 'NASDelay', 'SecurityDelay', 'LateAircraftDelay', | |
10 | 'Div1AirportID', 'Div1AirportSeqID', 'Div2AirportID', 'Div2AirportSeqID', 'Div3AirportID', | |
11 | 'Div3AirportSeqID', 'Div4AirportID', 'Div4AirportSeqID', 'Div5AirportID', 'Div5AirportSeqID' | |
12 | ] | |
13 | ||
14 | query = "SELECT {} FROM perftest.ontime WHERE FlightDate < '{}'".format(', '.join(cols), sys.argv[1]) | |
15 | client = Client.from_url('clickhouse://localhost') | |
16 | ||
17 | for row in client.execute_iter(query): | |
18 | pass |
0 | import sys | |
1 | import clickhouse_connect | |
2 | ||
3 | query = "SELECT * FROM perftest.ontime WHERE FlightDate < '{}'".format(sys.argv[1]) | |
4 | client = clickhouse_connect.get_client(host='localhost', query_limit=None, compress=False) | |
5 | ||
6 | data = client.query(query).result_rows |
0 | import sys | |
1 | import clickhouse_connect | |
2 | ||
3 | cols = [ | |
4 | 'UniqueCarrier', 'Carrier', 'TailNum', 'FlightNum', 'Origin', 'OriginCityName', 'OriginState', | |
5 | 'OriginStateFips', 'OriginStateName', 'Dest', 'DestCityName', 'DestState', 'DestStateFips', | |
6 | 'DestStateName', 'DepartureDelayGroups', 'DepTimeBlk', 'ArrTimeBlk', 'CancellationCode', | |
7 | 'FirstDepTime', 'TotalAddGTime', 'LongestAddGTime', 'DivAirportLandings', 'DivReachedDest', | |
8 | 'DivActualElapsedTime', 'DivArrDelay', 'DivDistance', 'Div1Airport', 'Div1WheelsOn', 'Div1TotalGTime', | |
9 | 'Div1LongestGTime', 'Div1WheelsOff', 'Div1TailNum', 'Div2Airport', 'Div2WheelsOn', 'Div2TotalGTime', | |
10 | 'Div2LongestGTime', 'Div2WheelsOff', 'Div2TailNum', 'Div3Airport', 'Div3WheelsOn', 'Div3TotalGTime', | |
11 | 'Div3LongestGTime', 'Div3WheelsOff', 'Div3TailNum', 'Div4Airport', 'Div4WheelsOn', 'Div4TotalGTime', | |
12 | 'Div4LongestGTime', 'Div4WheelsOff', 'Div4TailNum', 'Div5Airport', 'Div5WheelsOn', 'Div5TotalGTime', | |
13 | 'Div5LongestGTime', 'Div5WheelsOff', 'Div5TailNum' | |
14 | ] | |
15 | ||
16 | query = "SELECT {} FROM perftest.ontime WHERE FlightDate < '{}'".format(', '.join(cols), sys.argv[1]) | |
17 | client = clickhouse_connect.get_client(host='localhost', query_limit=None, compress=False) | |
18 | ||
19 | rv = client.query(query) | |
20 | with rv: | |
21 | for row in rv.stream_rows(): | |
22 | pass |
0 | import sys | |
1 | import clickhouse_connect | |
2 | ||
3 | cols = [ | |
4 | 'Year', 'Quarter', 'Month', 'DayofMonth', 'DayOfWeek', 'AirlineID', 'OriginAirportID', 'OriginAirportSeqID', | |
5 | 'OriginCityMarketID', 'OriginWac', 'DestAirportID', 'DestAirportSeqID', 'DestCityMarketID', 'DestWac', | |
6 | 'CRSDepTime', 'DepTime', 'DepDelay', 'DepDelayMinutes', 'DepDel15', 'TaxiOut', 'WheelsOff', 'WheelsOn', | |
7 | 'TaxiIn', 'CRSArrTime', 'ArrTime', 'ArrDelay', 'ArrDelayMinutes', 'ArrDel15', 'ArrivalDelayGroups', | |
8 | 'Cancelled', 'Diverted', 'CRSElapsedTime', 'ActualElapsedTime', 'AirTime', 'Flights', 'Distance', | |
9 | 'DistanceGroup', 'CarrierDelay', 'WeatherDelay', 'NASDelay', 'SecurityDelay', 'LateAircraftDelay', | |
10 | 'Div1AirportID', 'Div1AirportSeqID', 'Div2AirportID', 'Div2AirportSeqID', 'Div3AirportID', | |
11 | 'Div3AirportSeqID', 'Div4AirportID', 'Div4AirportSeqID', 'Div5AirportID', 'Div5AirportSeqID' | |
12 | ] | |
13 | ||
14 | query = "SELECT {} FROM perftest.ontime WHERE FlightDate < '{}'".format(', '.join(cols), sys.argv[1]) | |
15 | client = clickhouse_connect.get_client(host='localhost', query_limit=None, compress=False) | |
16 | ||
17 | rv = client.query(query) | |
18 | with rv: | |
19 | for row in rv.stream_rows(): | |
20 | pass |
0 | import sys | |
1 | import clickhouse_connect | |
2 | ||
3 | query = "SELECT * FROM perftest.ontime WHERE FlightDate < '{}'".format(sys.argv[1]) | |
4 | client = clickhouse_connect.get_client(host='localhost', query_limit=None, compress=False) | |
5 | ||
6 | rv = client.query(query) | |
7 | with rv: | |
8 | for row in rv.stream_rows(): | |
9 | pass |
96 | 96 | |
97 | 97 | 'Programming Language :: SQL', |
98 | 98 | 'Programming Language :: Python :: 3', |
99 | 'Programming Language :: Python :: 3.6', | |
100 | 99 | 'Programming Language :: Python :: 3.7', |
101 | 100 | 'Programming Language :: Python :: 3.8', |
102 | 101 | 'Programming Language :: Python :: 3.9', |
119 | 118 | 'Changes': github_url + '/blob/master/CHANGELOG.md' |
120 | 119 | }, |
121 | 120 | packages=find_packages('.', exclude=['tests*']), |
122 | python_requires='>=3.6, <4', | |
121 | python_requires='>=3.7, <4', | |
123 | 122 | install_requires=[ |
124 | 123 | 'pytz', |
125 | 124 | 'tzlocal' |
0 | import json | |
1 | ||
2 | from tests.testcase import BaseTestCase | |
3 | ||
4 | ||
5 | class JSONTestCase(BaseTestCase): | |
6 | required_server_version = (22, 3, 2) | |
7 | ||
8 | def client_kwargs(self, version): | |
9 | return {'settings': {'allow_experimental_object_type': True}} | |
10 | ||
11 | def cli_client_kwargs(self): | |
12 | return {'allow_experimental_object_type': 1} | |
13 | ||
14 | def test_simple(self): | |
15 | rv = self.client.execute("SELECT '{\"bb\": {\"cc\": [255, 1]}}'::JSON") | |
16 | self.assertEqual(rv, [({'bb': {'cc': [255, 1]}},)]) | |
17 | ||
18 | def test_from_table(self): | |
19 | with self.create_table('a JSON'): | |
20 | data = [ | |
21 | ({},), | |
22 | ({'key1': 1}, ), | |
23 | ({'key1': 2.1, 'key2': {'nested': 'key'}}, ), | |
24 | ({'key1': 3, 'key3': ['test'], 'key4': [10, 20]}, ) | |
25 | ] | |
26 | self.client.execute('INSERT INTO test (a) VALUES', data) | |
27 | query = 'SELECT * FROM test' | |
28 | inserted = self.emit_cli(query) | |
29 | self.assertEqual( | |
30 | inserted, | |
31 | "(0,(''),[],[])\n" | |
32 | "(1,(''),[],[])\n" | |
33 | "(2.1,('key'),[],[])\n" | |
34 | "(3,(''),['test'],[10,20])\n" | |
35 | ) | |
36 | inserted = self.client.execute(query) | |
37 | data_with_all_keys = [ | |
38 | ({'key1': 0, 'key2': {'nested': ''}, 'key3': [], 'key4': []},), | |
39 | ({'key1': 1, 'key2': {'nested': ''}, 'key3': [], 'key4': []},), | |
40 | ({'key1': 2.1, 'key2': {'nested': 'key'}, 'key3': [], | |
41 | 'key4': []},), | |
42 | ({'key1': 3, 'key2': {'nested': ''}, 'key3': ['test'], | |
43 | 'key4': [10, 20]},) | |
44 | ] | |
45 | self.assertEqual(inserted, data_with_all_keys) | |
46 | ||
47 | def test_insert_json_strings(self): | |
48 | with self.create_table('a JSON'): | |
49 | data = [ | |
50 | (json.dumps({'i-am': 'dumped json'}),), | |
51 | ] | |
52 | self.client.execute('INSERT INTO test (a) VALUES', data) | |
53 | query = 'SELECT * FROM test' | |
54 | inserted = self.emit_cli(query) | |
55 | self.assertEqual( | |
56 | inserted, | |
57 | "('dumped json')\n" | |
58 | ) | |
59 | inserted = self.client.execute(query) | |
60 | data_with_all_keys = [ | |
61 | ({'`i-am`': 'dumped json'},) | |
62 | ] | |
63 | self.assertEqual(inserted, data_with_all_keys) | |
64 | ||
65 | def test_json_as_named_tuple(self): | |
66 | settings = {'namedtuple_as_json': False} | |
67 | query = 'SELECT * FROM test' | |
68 | ||
69 | with self.create_table('a JSON'): | |
70 | data = [ | |
71 | ({'key': 'value'}, ), | |
72 | ] | |
73 | self.client.execute('INSERT INTO test (a) VALUES', data) | |
74 | inserted = self.client.execute(query) | |
75 | self.assertEqual(inserted, data) | |
76 | ||
77 | with self.created_client(settings=settings) as client: | |
78 | inserted = client.execute(query) | |
79 | self.assertEqual(inserted, [(('value',),)]) |
0 | 0 | from datetime import date, timedelta |
1 | 1 | from decimal import Decimal |
2 | from uuid import UUID | |
2 | 3 | |
3 | 4 | from tests.testcase import BaseTestCase |
5 | from tests.util import require_server_version | |
4 | 6 | |
5 | 7 | |
6 | 8 | class LowCardinalityTestCase(BaseTestCase): |
58 | 60 | with self.create_table('a LowCardinality(Date)'): |
59 | 61 | start = date(1970, 1, 1) |
60 | 62 | data = [(start + timedelta(x), ) for x in range(300)] |
63 | self.client.execute('INSERT INTO test (a) VALUES', data) | |
64 | ||
65 | query = 'SELECT * FROM test' | |
66 | inserted = self.client.execute(query) | |
67 | self.assertEqual(inserted, data) | |
68 | ||
69 | def test_nullable_date(self): | |
70 | with self.create_table('a LowCardinality(Nullable(Date))'): | |
71 | data = [(date(2023, 4, 1), ), (None, ), (date(1970, 1, 1), )] | |
72 | self.client.execute('INSERT INTO test (a) VALUES', data) | |
73 | ||
74 | query = 'SELECT * FROM test' | |
75 | inserted = self.client.execute(query) | |
76 | self.assertEqual(inserted, data) | |
77 | ||
78 | @require_server_version(21, 6) | |
79 | def test_nullable_uuid(self): | |
80 | with self.create_table('a LowCardinality(Nullable(UUID))'): | |
81 | data = [(UUID('2efcead4-ff55-4db5-bdb4-6b36a308d8e0'), ), (None, )] | |
61 | 82 | self.client.execute('INSERT INTO test (a) VALUES', data) |
62 | 83 | |
63 | 84 | query = 'SELECT * FROM test' |
0 | 0 | from tests.testcase import BaseTestCase |
1 | from tests.util import require_server_version | |
2 | from clickhouse_driver.columns import nestedcolumn | |
3 | 1 | |
4 | 2 | |
5 | 3 | class NestedTestCase(BaseTestCase): |
4 | required_server_version = (21, 3, 13) | |
5 | ||
6 | 6 | def entuple(self, lst): |
7 | 7 | return tuple( |
8 | 8 | self.entuple(x) if isinstance(x, list) else x for x in lst |
9 | 9 | ) |
10 | 10 | |
11 | @require_server_version(21, 3, 13) | |
12 | 11 | def test_simple(self): |
13 | 12 | columns = 'n Nested(i Int32, s String)' |
14 | 13 | |
39 | 38 | [(['a', 'b'],)] |
40 | 39 | ) |
41 | 40 | |
42 | @require_server_version(21, 3, 13) | |
43 | 41 | def test_multiple_rows(self): |
44 | 42 | columns = 'n Nested(i Int32, s String)' |
45 | 43 | |
60 | 58 | inserted = self.client.execute(query) |
61 | 59 | self.assertEqual(inserted, data) |
62 | 60 | |
63 | @require_server_version(21, 3, 13) | |
64 | 61 | def test_dict(self): |
65 | 62 | columns = 'n Nested(i Int32, s String)' |
66 | 63 | |
87 | 84 | [([(0, 'a'), (1, 'b')],), ([(3, 'd'), (4, 'e')],)] |
88 | 85 | ) |
89 | 86 | |
90 | def test_get_nested_columns(self): | |
91 | self.assertEqual( | |
92 | nestedcolumn.get_nested_columns( | |
93 | 'Nested(a Tuple(Array(Int8)),\n b Nullable(String))', | |
94 | ), | |
95 | ['Tuple(Array(Int8))', 'Nullable(String)'] | |
96 | ) | |
87 | def test_nested_side_effect_as_json(self): | |
88 | client_settings = { | |
89 | 'allow_experimental_object_type': True | |
90 | } | |
91 | columns = 'n Nested(i Int32, s String)' | |
97 | 92 | |
98 | def test_get_columns_with_types(self): | |
99 | self.assertEqual( | |
100 | nestedcolumn.get_columns_with_types( | |
101 | 'Nested(a Tuple(Array(Int8)),\n b Nullable(String))', | |
102 | ), | |
103 | [('a', 'Tuple(Array(Int8))'), ('b', 'Nullable(String)')] | |
104 | ) | |
93 | data = [([(0, 'a'), (1, 'b')],)] | |
94 | ||
95 | with self.create_table(columns, flatten_nested=0): | |
96 | with self.created_client(settings=client_settings) as client: | |
97 | client.execute( | |
98 | 'INSERT INTO test (n) VALUES', data | |
99 | ) | |
100 | ||
101 | inserted = client.execute('SELECT * FROM test') | |
102 | self.assertEqual( | |
103 | inserted, | |
104 | [([{'i': 0, 's': 'a'}, {'i': 1, 's': 'b'}],)] | |
105 | ) |
0 | from datetime import date | |
1 | ||
2 | from tests.testcase import BaseTestCase | |
3 | from clickhouse_driver import errors | |
4 | ||
5 | ErrorCodes = errors.ErrorCodes | |
6 | ||
7 | ||
8 | class SparseTestCase(BaseTestCase): | |
9 | required_server_version = (22, 1) | |
10 | ||
11 | create_table_template = ( | |
12 | 'CREATE TABLE test ({}) ' | |
13 | 'ENGINE = MergeTree ' | |
14 | 'ORDER BY tuple() ' | |
15 | 'SETTINGS ratio_of_defaults_for_sparse_serialization = 0.5' | |
16 | ) | |
17 | ||
18 | def test_int_all_defaults(self): | |
19 | columns = 'a Int32' | |
20 | ||
21 | data = [(0, ), (0, ), (0, )] | |
22 | with self.create_table(columns): | |
23 | self.client.execute( | |
24 | 'INSERT INTO test (a) VALUES', data | |
25 | ) | |
26 | ||
27 | query = 'SELECT * FROM test' | |
28 | inserted = self.emit_cli(query) | |
29 | self.assertEqual(inserted, '0\n0\n0\n') | |
30 | ||
31 | inserted = self.client.execute(query) | |
32 | self.assertEqual(inserted, data) | |
33 | ||
34 | data = [(0,)] | |
35 | with self.create_table(columns): | |
36 | self.client.execute( | |
37 | 'INSERT INTO test (a) VALUES', data | |
38 | ) | |
39 | ||
40 | query = 'SELECT * FROM test' | |
41 | inserted = self.emit_cli(query) | |
42 | self.assertEqual(inserted, '0\n') | |
43 | ||
44 | inserted = self.client.execute(query) | |
45 | self.assertEqual(inserted, data) | |
46 | ||
47 | def test_int_borders_cases(self): | |
48 | columns = 'a Int32' | |
49 | ||
50 | data = [(1, ), (0, ), (0, ), (1, ), (0, ), (0, ), (1, )] | |
51 | with self.create_table(columns): | |
52 | self.client.execute( | |
53 | 'INSERT INTO test (a) VALUES', data | |
54 | ) | |
55 | ||
56 | query = 'SELECT * FROM test' | |
57 | inserted = self.emit_cli(query) | |
58 | self.assertEqual(inserted, '1\n0\n0\n1\n0\n0\n1\n') | |
59 | ||
60 | inserted = self.client.execute(query) | |
61 | self.assertEqual(inserted, data) | |
62 | ||
63 | def test_int_default_last(self): | |
64 | columns = 'a Int32' | |
65 | ||
66 | data = [(1, ), (0, ), (0, )] | |
67 | with self.create_table(columns): | |
68 | self.client.execute( | |
69 | 'INSERT INTO test (a) VALUES', data | |
70 | ) | |
71 | ||
72 | query = 'SELECT * FROM test' | |
73 | inserted = self.emit_cli(query) | |
74 | self.assertEqual(inserted, '1\n0\n0\n') | |
75 | ||
76 | inserted = self.client.execute(query) | |
77 | self.assertEqual(inserted, data) | |
78 | ||
79 | def test_sparse_tuples(self): | |
80 | columns = 'a Int32, b Tuple(Int32, Tuple(Int32, Int32))' | |
81 | ||
82 | data = [ | |
83 | (1, (1, (1, 0))), | |
84 | (0, (0, (0, 0))), | |
85 | (0, (0, (0, 0))) | |
86 | ] | |
87 | with self.create_table(columns): | |
88 | self.client.execute( | |
89 | 'INSERT INTO test VALUES', data | |
90 | ) | |
91 | ||
92 | query = 'SELECT * FROM test' | |
93 | inserted = self.emit_cli(query) | |
94 | self.assertEqual( | |
95 | inserted, | |
96 | '1\t(1,(1,0))\n' | |
97 | '0\t(0,(0,0))\n' | |
98 | '0\t(0,(0,0))\n' | |
99 | ) | |
100 | ||
101 | inserted = self.client.execute(query) | |
102 | self.assertEqual(inserted, data) | |
103 | ||
104 | def test_sparse_dates(self): | |
105 | columns = 'a Date32' | |
106 | ||
107 | data = [ | |
108 | (date(1970, 1, 1), ), | |
109 | (date(1970, 1, 1), ), | |
110 | ] | |
111 | with self.create_table(columns): | |
112 | self.client.execute( | |
113 | 'INSERT INTO test VALUES', data | |
114 | ) | |
115 | ||
116 | query = 'SELECT * FROM test' | |
117 | inserted = self.emit_cli(query) | |
118 | self.assertEqual( | |
119 | inserted, | |
120 | '1970-01-01\n' | |
121 | '1970-01-01\n' | |
122 | ) | |
123 | ||
124 | inserted = self.client.execute(query) | |
125 | self.assertEqual(inserted, data) |
177 | 177 | with self.create_table(columns): |
178 | 178 | with self.assertRaises(errors.TypeMismatchError): |
179 | 179 | self.client.execute('INSERT INTO test (a) VALUES', data) |
180 | ||
181 | def test_tuple_of_low_cardinality(self): | |
182 | data = [((1, 2), )] | |
183 | columns = 'a Tuple(LowCardinality(Int32), LowCardinality(Int32))' | |
184 | ||
185 | with self.create_table(columns): | |
186 | self.client.execute( | |
187 | 'INSERT INTO test (a) VALUES', data | |
188 | ) | |
189 | ||
190 | query = 'SELECT * FROM test' | |
191 | inserted = self.emit_cli(query) | |
192 | self.assertEqual( | |
193 | inserted, | |
194 | '(1,2)\n' | |
195 | ) | |
196 | ||
197 | inserted = self.client.execute(query) | |
198 | self.assertEqual(inserted, data) |
225 | 225 | inserted = self.emit_cli(query) |
226 | 226 | self.assertEqual(inserted, '0\n1\n1500000000\n4294967295\n') |
227 | 227 | |
228 | @require_server_version(20, 1, 2) | |
229 | def test_negative_timestamps(self): | |
230 | with self.create_table("a DateTime64(3, 'UTC')"): | |
231 | times = np.array(['1900-01-01 00:00'], dtype='datetime64[ns]') | |
232 | self.client.execute( | |
233 | 'INSERT INTO test(a) VALUES', | |
234 | [times], | |
235 | columnar=True, | |
236 | ) | |
237 | ||
238 | inserted = self.client.execute('SELECT * FROM test', columnar=True) | |
239 | self.assertArraysEqual(inserted[0], times) | |
240 | ||
228 | 241 | |
229 | 242 | class DateTimeTimezonesTestCase(BaseDateTimeTestCase): |
230 | 243 | dt_type = 'DateTime' |
569 | 582 | inserted[0], self.make_tz_numpy_array(dt, self.col_tz_name) |
570 | 583 | ) |
571 | 584 | |
585 | @require_server_version(1, 1, 54337) | |
586 | def test_read_tz_aware_column(self): | |
587 | # read data from tz aware column Asia/Novosibirsk | |
588 | # offset_naive is False -> tz convert not needed | |
589 | ||
590 | with self.create_table(self.table_columns(with_tz=True)): | |
591 | with patch.object( | |
592 | pd, 'to_datetime', wraps=pd.to_datetime | |
593 | ) as to_datetime_spy: | |
594 | self.client.execute( | |
595 | 'INSERT INTO test (a) VALUES', [self.dt_arr], columnar=True | |
596 | ) | |
597 | ||
598 | self.emit_cli( | |
599 | "INSERT INTO test (a) VALUES ('2017-07-14 05:40:00')", | |
600 | ) | |
601 | ||
602 | to_datetime_calls_before_read = to_datetime_spy.call_count | |
603 | ||
604 | query = 'SELECT * FROM test' | |
605 | inserted = self.client.execute(query, columnar=True) | |
606 | ||
607 | self.assertEqual( | |
608 | to_datetime_calls_before_read, | |
609 | to_datetime_spy.call_count | |
610 | ) | |
611 | ||
612 | self.assertArraysEqual( | |
613 | inserted[0], | |
614 | self.make_tz_numpy_array(self.dt, self.col_tz_name) | |
615 | ) | |
616 | ||
617 | @require_server_version(1, 1, 54337) | |
618 | def test_read_tz_naive_column_with_client_timezone(self): | |
619 | # read data from column without timezone | |
620 | # client timezone = Asia/Novosibirsk | |
621 | # offset_naive is True and timezone is not UTC -> tz convert needed | |
622 | ||
623 | settings = {'use_client_time_zone': True} | |
624 | ||
625 | with patch_env_tz('Asia/Novosibirsk'): | |
626 | with self.create_table(self.table_columns()): | |
627 | with patch.object( | |
628 | pd, 'to_datetime', wraps=pd.to_datetime | |
629 | ) as to_datetime_spy: | |
630 | self.client.execute( | |
631 | 'INSERT INTO test (a) VALUES', [self.dt_arr], | |
632 | settings=settings, columnar=True | |
633 | ) | |
634 | ||
635 | self.emit_cli( | |
636 | "INSERT INTO test (a) VALUES ('2017-07-14 05:40:00')", | |
637 | use_client_time_zone=1 | |
638 | ) | |
639 | ||
640 | to_datetime_calls_before_read = to_datetime_spy.call_count | |
641 | ||
642 | query = 'SELECT * FROM test' | |
643 | inserted = self.client.execute( | |
644 | query, settings=settings, columnar=True | |
645 | ) | |
646 | ||
647 | self.assertEqual( | |
648 | to_datetime_calls_before_read + 2, | |
649 | to_datetime_spy.call_count | |
650 | ) | |
651 | ||
652 | self.assertArraysEqual( | |
653 | inserted[0], | |
654 | self.make_numpy_d64ns([self.dt_str] * 2) | |
655 | ) | |
656 | ||
572 | 657 | |
573 | 658 | class DateTime64TimezonesTestCase(DateTimeTimezonesTestCase): |
574 | 659 | dt_type = 'DateTime64' |
0 | from tests.numpy.testcase import NumpyBaseTestCase | |
1 | ||
2 | ||
3 | class ColumnsNamesTestCase(NumpyBaseTestCase): | |
4 | ||
5 | def test_columns_names_replace_nonwords(self): | |
6 | columns = ( | |
7 | 'regular Int64, ' | |
8 | 'CamelCase Int64, ' | |
9 | 'With_Underscores Int64, ' | |
10 | '`Any%different.Column?` Int64' | |
11 | ) | |
12 | ||
13 | expected_columns = [ | |
14 | 'regular', 'CamelCase', 'With_Underscores', 'Any%different.Column?' | |
15 | ] | |
16 | ||
17 | with self.create_table(columns): | |
18 | df = self.client.query_dataframe( | |
19 | 'SELECT * FROM test', replace_nonwords=False | |
20 | ) | |
21 | self.assertEqual(expected_columns, list(df.columns)) |
274 | 274 | 'clickhouse://host?round_robin=true&alt_hosts=host2' |
275 | 275 | ) |
276 | 276 | self.assertEqual(len(c.connections), 1) |
277 | ||
278 | def test_tcp_keepalive(self): | |
279 | c = Client.from_url('clickhouse://host?tcp_keepalive=true') | |
280 | self.assertTrue(c.connection.tcp_keepalive) | |
281 | ||
282 | c = Client.from_url('clickhouse://host?tcp_keepalive=10.5,2.5,3') | |
283 | self.assertEqual( | |
284 | c.connection.tcp_keepalive, (10.5, 2.5, 3) | |
285 | ) | |
286 | ||
287 | def test_client_revision(self): | |
288 | c = Client.from_url('clickhouse://host?client_revision=54032') | |
289 | self.assertEqual(c.connection.client_revision, 54032) |
262 | 262 | |
263 | 263 | self.assertFalse(client.connection.connected) |
264 | 264 | self.assertFalse(list(client.connections)[0].connected) |
265 | ||
266 | def test_round_robin_client_construction(self): | |
267 | # host and port as keyword args | |
268 | Client( | |
269 | host='host', | |
270 | port=9000, | |
271 | round_robin=True, | |
272 | alt_hosts='host2' | |
273 | ) | |
274 | ||
275 | # host as positional and port as keyword arg | |
276 | Client( | |
277 | 'host', | |
278 | 9000, | |
279 | round_robin=True, | |
280 | alt_hosts='host2' | |
281 | ) | |
282 | ||
283 | # host and port as positional args | |
284 | Client( | |
285 | 'host', | |
286 | 9000, | |
287 | round_robin=True, | |
288 | alt_hosts='host2' | |
289 | ) | |
290 | ||
291 | def test_tcp_keepalive(self): | |
292 | self.assertFalse(self.client.connection.tcp_keepalive) | |
293 | ||
294 | with self.created_client(tcp_keepalive=True) as client: | |
295 | self.assertTrue(client.connection.tcp_keepalive) | |
296 | ||
297 | client.execute('SELECT 1') | |
298 | ||
299 | with self.created_client(tcp_keepalive=(100, 20, 2)) as client: | |
300 | self.assertEqual(client.connection.tcp_keepalive, (100, 20, 2)) | |
301 | ||
302 | client.execute('SELECT 1') | |
303 | ||
304 | def test_client_revision(self): | |
305 | with self.created_client(client_revision=54032) as client: | |
306 | client.execute('SELECT 1') | |
265 | 307 | |
266 | 308 | |
267 | 309 | class FakeBufferedReader(BufferedReader): |
154 | 154 | data = [{'a': 1}] |
155 | 155 | self.client.execute( |
156 | 156 | "INSERT INTO test (a) " |
157 | "SELECT a from input ('a Int8')", | |
157 | "SELECT a FROM input ('a Int8') FORMAT Native", | |
158 | 158 | data |
159 | 159 | ) |
160 | 160 |
0 | 0 | # coding=utf-8 |
1 | 1 | from __future__ import unicode_literals |
2 | 2 | |
3 | from datetime import date, datetime | |
3 | from datetime import date, datetime, time | |
4 | 4 | from decimal import Decimal |
5 | 5 | from unittest.mock import Mock |
6 | 6 | from uuid import UUID |
51 | 51 | |
52 | 52 | rv = self.client.execute(tpl, params) |
53 | 53 | self.assertEqual(rv, [(d, )]) |
54 | ||
55 | def test_time(self): | |
56 | t = time(8, 20, 15) | |
57 | params = {'x': t} | |
58 | ||
59 | self.assert_subst(self.single_tpl, params, "SELECT '08:20:15'") | |
60 | ||
61 | rv = self.client.execute(self.single_tpl, params) | |
62 | self.assertEqual(rv, [('08:20:15', )]) | |
54 | 63 | |
55 | 64 | def test_datetime(self): |
56 | 65 | dt = datetime(2017, 10, 16, 0, 18, 50) |
230 | 239 | |
231 | 240 | self.assertEqual(e.exception.args[0], |
232 | 241 | 'Parameters are expected in dict form') |
242 | ||
243 | ||
244 | class ServerSideParametersSubstitutionTestCase(BaseTestCase): | |
245 | required_server_version = (22, 8) | |
246 | ||
247 | def test_int(self): | |
248 | rv = self.client.execute('SELECT {x:Int32}', {'x': 123}) | |
249 | self.assertEqual(rv, [(123, )]) | |
250 | ||
251 | def test_str(self): | |
252 | rv = self.client.execute('SELECT {x:Int32}', {'x': '123'}) | |
253 | self.assertEqual(rv, [(123, )]) | |
254 | ||
255 | def test_escaped_str(self): | |
256 | rv = self.client.execute( | |
257 | 'SELECT {x:String}, length({x:String})', {'x': '\t'} | |
258 | ) | |
259 | self.assertEqual(rv, [('\t', 1)]) | |
260 | ||
261 | rv = self.client.execute( | |
262 | 'SELECT {x:String}, length({x:String})', {'x': '\\'} | |
263 | ) | |
264 | self.assertEqual(rv, [('\\', 1)]) | |
265 | ||
266 | rv = self.client.execute( | |
267 | 'SELECT {x:String}, length({x:String})', {'x': "'"} | |
268 | ) | |
269 | self.assertEqual(rv, [("'", 1)]) |
28 | 28 | client = None |
29 | 29 | client_kwargs = None |
30 | 30 | cli_client_kwargs = None |
31 | ||
32 | create_table_template = 'CREATE TABLE test ({}) ENGINE = Memory' | |
31 | 33 | |
32 | 34 | @classmethod |
33 | 35 | def emit_cli(cls, statement, database=None, encoding='utf-8', **kwargs): |
108 | 110 | super(BaseTestCase, cls).tearDownClass() |
109 | 111 | |
110 | 112 | @contextmanager |
111 | def create_table(self, columns, **kwargs): | |
113 | def create_table(self, columns, template=None, **kwargs): | |
112 | 114 | if self.cli_client_kwargs: |
113 | 115 | if callable(self.cli_client_kwargs): |
114 | 116 | cli_client_kwargs = self.cli_client_kwargs() |
117 | 119 | else: |
118 | 120 | kwargs.update(self.cli_client_kwargs) |
119 | 121 | |
120 | self.emit_cli( | |
121 | 'CREATE TABLE test ({}) ''ENGINE = Memory'.format(columns), | |
122 | **kwargs | |
123 | ) | |
122 | template = template or self.create_table_template | |
123 | self.emit_cli(template.format(columns), **kwargs) | |
124 | 124 | try: |
125 | 125 | yield |
126 | 126 | except Exception: |