commit
82594c8fd6
53
clickhouse/Dockerfile
Normal file
53
clickhouse/Dockerfile
Normal file
@ -0,0 +1,53 @@
|
||||
FROM ubuntu:20.04
|
||||
|
||||
ARG CLICKHOUSE_VERSION=20.9.4.76
|
||||
ARG CLICKHOUSE_GOSU_VERSION=1.10
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install --yes --no-install-recommends \
|
||||
apt-transport-https \
|
||||
dirmngr \
|
||||
gnupg \
|
||||
&& mkdir -p /etc/apt/sources.list.d \
|
||||
&& apt-key adv --keyserver keyserver.ubuntu.com --recv E0C56BD4 \
|
||||
&& echo "deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" > /etc/apt/sources.list.d/clickhouse.list \
|
||||
&& apt-get update \
|
||||
&& env DEBIAN_FRONTEND=noninteractive \
|
||||
apt-get install --allow-unauthenticated --yes --no-install-recommends \
|
||||
clickhouse-common-static=$CLICKHOUSE_VERSION \
|
||||
clickhouse-client=$CLICKHOUSE_VERSION \
|
||||
clickhouse-server=$CLICKHOUSE_VERSION \
|
||||
locales \
|
||||
tzdata \
|
||||
wget \
|
||||
&& rm -rf \
|
||||
/var/lib/apt/lists/* \
|
||||
/var/cache/debconf \
|
||||
/tmp/* \
|
||||
&& apt-get clean
|
||||
|
||||
ADD https://github.com/tianon/gosu/releases/download/$CLICKHOUSE_GOSU_VERSION/gosu-amd64 /bin/gosu
|
||||
|
||||
RUN locale-gen en_US.UTF-8
|
||||
ENV LANG en_US.UTF-8
|
||||
ENV LANGUAGE en_US:en
|
||||
ENV LC_ALL en_US.UTF-8
|
||||
|
||||
RUN mkdir /docker-entrypoint-initdb.d
|
||||
|
||||
COPY docker_related_config.xml /etc/clickhouse-server/config.d/
|
||||
COPY config.xml /etc/clickhouse-server/config.xml
|
||||
COPY entrypoint.sh /entrypoint.sh
|
||||
|
||||
RUN chmod +x \
|
||||
/entrypoint.sh \
|
||||
/bin/gosu
|
||||
|
||||
EXPOSE 9000 8123 9009
|
||||
VOLUME /var/lib/clickhouse
|
||||
|
||||
ENV CLICKHOUSE_CONFIG /etc/clickhouse-server/config.xml
|
||||
ENV CLICKHOUSE_USER ${CLICKHOUSE_USER}
|
||||
ENV CLICKHOUSE_PASSWORD ${CLICKHOUSE_PASSWORD}
|
||||
|
||||
ENTRYPOINT ["/entrypoint.sh"]
|
510
clickhouse/config.xml
Normal file
510
clickhouse/config.xml
Normal file
@ -0,0 +1,510 @@
|
||||
<?xml version="1.0"?>
|
||||
<!--
|
||||
NOTE: User and query level settings are set up in "users.xml" file.
|
||||
-->
|
||||
<yandex>
|
||||
<!-- The list of hosts allowed to use in URL-related storage engines and table functions.
|
||||
If this section is not present in configuration, all hosts are allowed.
|
||||
-->
|
||||
<remote_url_allow_hosts>
|
||||
<!-- Host should be specified exactly as in URL. The name is checked before DNS resolution.
|
||||
Example: "yandex.ru", "yandex.ru." and "www.yandex.ru" are different hosts.
|
||||
If port is explicitly specified in URL, the host:port is checked as a whole.
|
||||
If host specified here without port, any port with this host allowed.
|
||||
"yandex.ru" -> "yandex.ru:443", "yandex.ru:80" etc. is allowed, but "yandex.ru:80" -> only "yandex.ru:80" is allowed.
|
||||
If the host is specified as IP address, it is checked as specified in URL. Example: "[2a02:6b8:a::a]".
|
||||
If there are redirects and support for redirects is enabled, every redirect (the Location field) is checked.
|
||||
-->
|
||||
|
||||
<!-- Regular expression can be specified. RE2 engine is used for regexps.
|
||||
Regexps are not aligned: don't forget to add ^ and $. Also don't forget to escape dot (.) metacharacter
|
||||
(forgetting to do so is a common source of error).
|
||||
-->
|
||||
</remote_url_allow_hosts>
|
||||
|
||||
<logger>
|
||||
<!-- Possible levels: https://github.com/pocoproject/poco/blob/develop/Foundation/include/Poco/Logger.h#L105 -->
|
||||
<level>trace</level>
|
||||
<log>/var/log/clickhouse-server/clickhouse-server.log</log>
|
||||
<errorlog>/var/log/clickhouse-server/clickhouse-server.err.log</errorlog>
|
||||
<size>1000M</size>
|
||||
<count>10</count>
|
||||
<!-- <console>1</console> --> <!-- Default behavior is autodetection (log to console if not daemon mode and is tty) -->
|
||||
</logger>
|
||||
<!--display_name>production</display_name--> <!-- It is the name that will be shown in the client -->
|
||||
<http_port>8123</http_port>
|
||||
<tcp_port>9000</tcp_port>
|
||||
<!-- For HTTPS and SSL over native protocol. -->
|
||||
<!--
|
||||
<https_port>8443</https_port>
|
||||
<tcp_port_secure>9440</tcp_port_secure>
|
||||
-->
|
||||
|
||||
<!-- Used with https_port and tcp_port_secure. Full ssl options list: https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h#L71 -->
|
||||
<openSSL>
|
||||
<server> <!-- Used for https server AND secure tcp port -->
|
||||
<!-- openssl req -subj "/CN=localhost" -new -newkey rsa:2048 -days 365 -nodes -x509 -keyout /etc/clickhouse-server/server.key -out /etc/clickhouse-server/server.crt -->
|
||||
<certificateFile>/etc/clickhouse-server/server.crt</certificateFile>
|
||||
<privateKeyFile>/etc/clickhouse-server/server.key</privateKeyFile>
|
||||
<!-- openssl dhparam -out /etc/clickhouse-server/dhparam.pem 4096 -->
|
||||
<dhParamsFile>/etc/clickhouse-server/dhparam.pem</dhParamsFile>
|
||||
<verificationMode>none</verificationMode>
|
||||
<loadDefaultCAFile>true</loadDefaultCAFile>
|
||||
<cacheSessions>true</cacheSessions>
|
||||
<disableProtocols>sslv2,sslv3</disableProtocols>
|
||||
<preferServerCiphers>true</preferServerCiphers>
|
||||
</server>
|
||||
|
||||
<client> <!-- Used for connecting to https dictionary source -->
|
||||
<loadDefaultCAFile>true</loadDefaultCAFile>
|
||||
<cacheSessions>true</cacheSessions>
|
||||
<disableProtocols>sslv2,sslv3</disableProtocols>
|
||||
<preferServerCiphers>true</preferServerCiphers>
|
||||
<!-- Use for self-signed: <verificationMode>none</verificationMode> -->
|
||||
<invalidCertificateHandler>
|
||||
<!-- Use for self-signed: <name>AcceptCertificateHandler</name> -->
|
||||
<name>RejectCertificateHandler</name>
|
||||
</invalidCertificateHandler>
|
||||
</client>
|
||||
</openSSL>
|
||||
|
||||
<!-- Default root page on http[s] server. For example load UI from https://tabix.io/ when opening http://localhost:8123 -->
|
||||
<!--
|
||||
<http_server_default_response><![CDATA[<html ng-app="SMI2"><head><base href="http://ui.tabix.io/"></head><body><div ui-view="" class="content-ui"></div><script src="http://loader.tabix.io/master.js"></script></body></html>]]></http_server_default_response>
|
||||
-->
|
||||
|
||||
<!-- Port for communication between replicas. Used for data exchange. -->
|
||||
<interserver_http_port>9009</interserver_http_port>
|
||||
|
||||
<!-- Hostname that is used by other replicas to request this server.
|
||||
If not specified, than it is determined analoguous to 'hostname -f' command.
|
||||
This setting could be used to switch replication to another network interface.
|
||||
-->
|
||||
<!--
|
||||
<interserver_http_host>example.yandex.ru</interserver_http_host>
|
||||
-->
|
||||
|
||||
<!-- Listen specified host. use :: (wildcard IPv6 address), if you want to accept connections both with IPv4 and IPv6 from everywhere. -->
|
||||
<!-- <listen_host>::</listen_host> -->
|
||||
<!-- Same for hosts with disabled ipv6: -->
|
||||
<!-- <listen_host>0.0.0.0</listen_host> -->
|
||||
|
||||
<!-- Default values - try listen localhost on ipv4 and ipv6: -->
|
||||
<!--
|
||||
<listen_host>::1</listen_host>
|
||||
<listen_host>127.0.0.1</listen_host>
|
||||
-->
|
||||
<!-- Don't exit if ipv6 or ipv4 unavailable, but listen_host with this protocol specified -->
|
||||
<!-- <listen_try>0</listen_try> -->
|
||||
|
||||
<!-- Allow listen on same address:port -->
|
||||
<!-- <listen_reuse_port>0</listen_reuse_port> -->
|
||||
|
||||
<!-- <listen_backlog>64</listen_backlog> -->
|
||||
|
||||
<max_connections>4096</max_connections>
|
||||
<keep_alive_timeout>3</keep_alive_timeout>
|
||||
|
||||
<!-- Maximum number of concurrent queries. -->
|
||||
<max_concurrent_queries>100</max_concurrent_queries>
|
||||
|
||||
<!-- Set limit on number of open files (default: maximum). This setting makes sense on Mac OS X because getrlimit() fails to retrieve
|
||||
correct maximum value. -->
|
||||
<!-- <max_open_files>262144</max_open_files> -->
|
||||
|
||||
<!-- Size of cache of uncompressed blocks of data, used in tables of MergeTree family.
|
||||
In bytes. Cache is single for server. Memory is allocated only on demand.
|
||||
Cache is used when 'use_uncompressed_cache' user setting turned on (off by default).
|
||||
Uncompressed cache is advantageous only for very short queries and in rare cases.
|
||||
-->
|
||||
<uncompressed_cache_size>8589934592</uncompressed_cache_size>
|
||||
|
||||
<!-- Approximate size of mark cache, used in tables of MergeTree family.
|
||||
In bytes. Cache is single for server. Memory is allocated only on demand.
|
||||
You should not lower this value.
|
||||
-->
|
||||
<mark_cache_size>5368709120</mark_cache_size>
|
||||
|
||||
|
||||
<!-- Path to data directory, with trailing slash. -->
|
||||
<path>/var/lib/clickhouse/</path>
|
||||
|
||||
<!-- Path to temporary data for processing hard queries. -->
|
||||
<tmp_path>/var/lib/clickhouse/tmp/</tmp_path>
|
||||
|
||||
<!-- Directory with user provided files that are accessible by 'file' table function. -->
|
||||
<user_files_path>/var/lib/clickhouse/user_files/</user_files_path>
|
||||
|
||||
<!-- Path to configuration file with users, access rights, profiles of settings, quotas. -->
|
||||
<users_config>users.xml</users_config>
|
||||
|
||||
<!-- Default profile of settings. -->
|
||||
<default_profile>default</default_profile>
|
||||
|
||||
<!-- System profile of settings. This settings are used by internal processes (Buffer storage, Distibuted DDL worker and so on). -->
|
||||
<!-- <system_profile>default</system_profile> -->
|
||||
|
||||
<!-- Default database. -->
|
||||
<default_database>default</default_database>
|
||||
|
||||
<!-- Server time zone could be set here.
|
||||
|
||||
Time zone is used when converting between String and DateTime types,
|
||||
when printing DateTime in text formats and parsing DateTime from text,
|
||||
it is used in date and time related functions, if specific time zone was not passed as an argument.
|
||||
|
||||
Time zone is specified as identifier from IANA time zone database, like UTC or Africa/Abidjan.
|
||||
If not specified, system time zone at server startup is used.
|
||||
|
||||
Please note, that server could display time zone alias instead of specified name.
|
||||
Example: W-SU is an alias for Europe/Moscow and Zulu is an alias for UTC.
|
||||
-->
|
||||
<!-- <timezone>Europe/Moscow</timezone> -->
|
||||
|
||||
<!-- You can specify umask here (see "man umask"). Server will apply it on startup.
|
||||
Number is always parsed as octal. Default umask is 027 (other users cannot read logs, data files, etc; group can only read).
|
||||
-->
|
||||
<!-- <umask>022</umask> -->
|
||||
|
||||
<!-- Perform mlockall after startup to lower first queries latency
|
||||
and to prevent clickhouse executable from being paged out under high IO load.
|
||||
Enabling this option is recommended but will lead to increased startup time for up to a few seconds.
|
||||
-->
|
||||
<mlock_executable>false</mlock_executable>
|
||||
|
||||
<!-- Configuration of clusters that could be used in Distributed tables.
|
||||
https://clickhouse.yandex/docs/en/table_engines/distributed/
|
||||
-->
|
||||
<remote_servers incl="clickhouse_remote_servers" >
|
||||
<!-- Test only shard config for testing distributed storage -->
|
||||
<test_shard_localhost>
|
||||
<shard>
|
||||
<replica>
|
||||
<host>localhost</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
</shard>
|
||||
</test_shard_localhost>
|
||||
<test_cluster_two_shards_localhost>
|
||||
<shard>
|
||||
<replica>
|
||||
<host>localhost</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
</shard>
|
||||
<shard>
|
||||
<replica>
|
||||
<host>localhost</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
</shard>
|
||||
</test_cluster_two_shards_localhost>
|
||||
<test_cluster_two_shards>
|
||||
<shard>
|
||||
<replica>
|
||||
<host>127.0.0.1</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
</shard>
|
||||
<shard>
|
||||
<replica>
|
||||
<host>127.0.0.2</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
</shard>
|
||||
</test_cluster_two_shards>
|
||||
<test_shard_localhost_secure>
|
||||
<shard>
|
||||
<replica>
|
||||
<host>localhost</host>
|
||||
<port>9440</port>
|
||||
<secure>1</secure>
|
||||
</replica>
|
||||
</shard>
|
||||
</test_shard_localhost_secure>
|
||||
<test_unavailable_shard>
|
||||
<shard>
|
||||
<replica>
|
||||
<host>localhost</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
</shard>
|
||||
<shard>
|
||||
<replica>
|
||||
<host>localhost</host>
|
||||
<port>1</port>
|
||||
</replica>
|
||||
</shard>
|
||||
</test_unavailable_shard>
|
||||
</remote_servers>
|
||||
|
||||
|
||||
<!-- If element has 'incl' attribute, then for it's value will be used corresponding substitution from another file.
|
||||
By default, path to file with substitutions is /etc/metrika.xml. It could be changed in config in 'include_from' element.
|
||||
Values for substitutions are specified in /yandex/name_of_substitution elements in that file.
|
||||
-->
|
||||
|
||||
<!-- ZooKeeper is used to store metadata about replicas, when using Replicated tables.
|
||||
Optional. If you don't use replicated tables, you could omit that.
|
||||
|
||||
See https://clickhouse.yandex/docs/en/table_engines/replication/
|
||||
-->
|
||||
|
||||
<zookeeper incl="zookeeper-servers" optional="true" />
|
||||
|
||||
<!-- Substitutions for parameters of replicated tables.
|
||||
Optional. If you don't use replicated tables, you could omit that.
|
||||
|
||||
See https://clickhouse.yandex/docs/en/table_engines/replication/#creating-replicated-tables
|
||||
-->
|
||||
<macros incl="macros" optional="true" />
|
||||
|
||||
|
||||
<!-- Reloading interval for embedded dictionaries, in seconds. Default: 3600. -->
|
||||
<builtin_dictionaries_reload_interval>3600</builtin_dictionaries_reload_interval>
|
||||
|
||||
|
||||
<!-- Maximum session timeout, in seconds. Default: 3600. -->
|
||||
<max_session_timeout>3600</max_session_timeout>
|
||||
|
||||
<!-- Default session timeout, in seconds. Default: 60. -->
|
||||
<default_session_timeout>60</default_session_timeout>
|
||||
|
||||
<!-- Sending data to Graphite for monitoring. Several sections can be defined. -->
|
||||
<!--
|
||||
interval - send every X second
|
||||
root_path - prefix for keys
|
||||
hostname_in_path - append hostname to root_path (default = true)
|
||||
metrics - send data from table system.metrics
|
||||
events - send data from table system.events
|
||||
asynchronous_metrics - send data from table system.asynchronous_metrics
|
||||
-->
|
||||
<!--
|
||||
<graphite>
|
||||
<host>localhost</host>
|
||||
<port>42000</port>
|
||||
<timeout>0.1</timeout>
|
||||
<interval>60</interval>
|
||||
<root_path>one_min</root_path>
|
||||
<hostname_in_path>true</hostname_in_path>
|
||||
|
||||
<metrics>true</metrics>
|
||||
<events>true</events>
|
||||
<events_cumulative>false</events_cumulative>
|
||||
<asynchronous_metrics>true</asynchronous_metrics>
|
||||
</graphite>
|
||||
<graphite>
|
||||
<host>localhost</host>
|
||||
<port>42000</port>
|
||||
<timeout>0.1</timeout>
|
||||
<interval>1</interval>
|
||||
<root_path>one_sec</root_path>
|
||||
|
||||
<metrics>true</metrics>
|
||||
<events>true</events>
|
||||
<events_cumulative>false</events_cumulative>
|
||||
<asynchronous_metrics>false</asynchronous_metrics>
|
||||
</graphite>
|
||||
-->
|
||||
|
||||
<!-- Serve endpoint fot Prometheus monitoring. -->
|
||||
<!--
|
||||
endpoint - mertics path (relative to root, statring with "/")
|
||||
port - port to setup server. If not defined or 0 than http_port used
|
||||
metrics - send data from table system.metrics
|
||||
events - send data from table system.events
|
||||
asynchronous_metrics - send data from table system.asynchronous_metrics
|
||||
-->
|
||||
<!--
|
||||
<prometheus>
|
||||
<endpoint>/metrics</endpoint>
|
||||
<port>9363</port>
|
||||
|
||||
<metrics>true</metrics>
|
||||
<events>true</events>
|
||||
<asynchronous_metrics>true</asynchronous_metrics>
|
||||
</prometheus>
|
||||
-->
|
||||
|
||||
<!-- Query log. Used only for queries with setting log_queries = 1. -->
|
||||
<query_log>
|
||||
<!-- What table to insert data. If table is not exist, it will be created.
|
||||
When query log structure is changed after system update,
|
||||
then old table will be renamed and new table will be created automatically.
|
||||
-->
|
||||
<database>system</database>
|
||||
<table>query_log</table>
|
||||
<!--
|
||||
PARTITION BY expr https://clickhouse.yandex/docs/en/table_engines/custom_partitioning_key/
|
||||
Example:
|
||||
event_date
|
||||
toMonday(event_date)
|
||||
toYYYYMM(event_date)
|
||||
toStartOfHour(event_time)
|
||||
-->
|
||||
<partition_by>toYYYYMM(event_date)</partition_by>
|
||||
<!-- Interval of flushing data. -->
|
||||
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
|
||||
</query_log>
|
||||
|
||||
<!-- Trace log. Stores stack traces collected by query profilers.
|
||||
See query_profiler_real_time_period_ns and query_profiler_cpu_time_period_ns settings. -->
|
||||
<trace_log>
|
||||
<database>system</database>
|
||||
<table>trace_log</table>
|
||||
|
||||
<partition_by>toYYYYMM(event_date)</partition_by>
|
||||
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
|
||||
</trace_log>
|
||||
|
||||
<!-- Query thread log. Has information about all threads participated in query execution.
|
||||
Used only for queries with setting log_query_threads = 1. -->
|
||||
<query_thread_log>
|
||||
<database>system</database>
|
||||
<table>query_thread_log</table>
|
||||
<partition_by>toYYYYMM(event_date)</partition_by>
|
||||
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
|
||||
</query_thread_log>
|
||||
|
||||
<!-- Uncomment if use part log.
|
||||
Part log contains information about all actions with parts in MergeTree tables (creation, deletion, merges, downloads).
|
||||
<part_log>
|
||||
<database>system</database>
|
||||
<table>part_log</table>
|
||||
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
|
||||
</part_log>
|
||||
-->
|
||||
|
||||
<!-- Uncomment to write text log into table.
|
||||
Text log contains all information from usual server log but stores it in structured and efficient way.
|
||||
<text_log>
|
||||
<database>system</database>
|
||||
<table>text_log</table>
|
||||
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
|
||||
</text_log>
|
||||
-->
|
||||
|
||||
<!-- Uncomment to write metric log into table.
|
||||
Metric log contains rows with current values of ProfileEvents, CurrentMetrics collected with "collect_interval_milliseconds" interval.
|
||||
<metric_log>
|
||||
<database>system</database>
|
||||
<table>metric_log</table>
|
||||
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
|
||||
<collect_interval_milliseconds>1000</collect_interval_milliseconds>
|
||||
</metric_log>
|
||||
-->
|
||||
|
||||
<!-- Parameters for embedded dictionaries, used in Yandex.Metrica.
|
||||
See https://clickhouse.yandex/docs/en/dicts/internal_dicts/
|
||||
-->
|
||||
|
||||
<!-- Path to file with region hierarchy. -->
|
||||
<!-- <path_to_regions_hierarchy_file>/opt/geo/regions_hierarchy.txt</path_to_regions_hierarchy_file> -->
|
||||
|
||||
<!-- Path to directory with files containing names of regions -->
|
||||
<!-- <path_to_regions_names_files>/opt/geo/</path_to_regions_names_files> -->
|
||||
|
||||
|
||||
<!-- Configuration of external dictionaries. See:
|
||||
https://clickhouse.yandex/docs/en/dicts/external_dicts/
|
||||
-->
|
||||
<dictionaries_config>*_dictionary.xml</dictionaries_config>
|
||||
|
||||
<!-- Uncomment if you want data to be compressed 30-100% better.
|
||||
Don't do that if you just started using ClickHouse.
|
||||
-->
|
||||
<compression incl="clickhouse_compression">
|
||||
<!--
|
||||
<!- - Set of variants. Checked in order. Last matching case wins. If nothing matches, lz4 will be used. - ->
|
||||
<case>
|
||||
|
||||
<!- - Conditions. All must be satisfied. Some conditions may be omitted. - ->
|
||||
<min_part_size>10000000000</min_part_size> <!- - Min part size in bytes. - ->
|
||||
<min_part_size_ratio>0.01</min_part_size_ratio> <!- - Min size of part relative to whole table size. - ->
|
||||
|
||||
<!- - What compression method to use. - ->
|
||||
<method>zstd</method>
|
||||
</case>
|
||||
-->
|
||||
</compression>
|
||||
|
||||
<!-- Allow to execute distributed DDL queries (CREATE, DROP, ALTER, RENAME) on cluster.
|
||||
Works only if ZooKeeper is enabled. Comment it if such functionality isn't required. -->
|
||||
<distributed_ddl>
|
||||
<!-- Path in ZooKeeper to queue with DDL queries -->
|
||||
<path>/clickhouse/task_queue/ddl</path>
|
||||
|
||||
<!-- Settings from this profile will be used to execute DDL queries -->
|
||||
<!-- <profile>default</profile> -->
|
||||
</distributed_ddl>
|
||||
|
||||
<!-- Settings to fine tune MergeTree tables. See documentation in source code, in MergeTreeSettings.h -->
|
||||
<!--
|
||||
<merge_tree>
|
||||
<max_suspicious_broken_parts>5</max_suspicious_broken_parts>
|
||||
</merge_tree>
|
||||
-->
|
||||
|
||||
<!-- Protection from accidental DROP.
|
||||
If size of a MergeTree table is greater than max_table_size_to_drop (in bytes) than table could not be dropped with any DROP query.
|
||||
If you want do delete one table and don't want to change clickhouse-server config, you could create special file <clickhouse-path>/flags/force_drop_table and make DROP once.
|
||||
By default max_table_size_to_drop is 50GB; max_table_size_to_drop=0 allows to DROP any tables.
|
||||
The same for max_partition_size_to_drop.
|
||||
Uncomment to disable protection.
|
||||
-->
|
||||
<!-- <max_table_size_to_drop>0</max_table_size_to_drop> -->
|
||||
<!-- <max_partition_size_to_drop>0</max_partition_size_to_drop> -->
|
||||
|
||||
<!-- Example of parameters for GraphiteMergeTree table engine -->
|
||||
<graphite_rollup_example>
|
||||
<pattern>
|
||||
<regexp>click_cost</regexp>
|
||||
<function>any</function>
|
||||
<retention>
|
||||
<age>0</age>
|
||||
<precision>3600</precision>
|
||||
</retention>
|
||||
<retention>
|
||||
<age>86400</age>
|
||||
<precision>60</precision>
|
||||
</retention>
|
||||
</pattern>
|
||||
<default>
|
||||
<function>max</function>
|
||||
<retention>
|
||||
<age>0</age>
|
||||
<precision>60</precision>
|
||||
</retention>
|
||||
<retention>
|
||||
<age>3600</age>
|
||||
<precision>300</precision>
|
||||
</retention>
|
||||
<retention>
|
||||
<age>86400</age>
|
||||
<precision>3600</precision>
|
||||
</retention>
|
||||
</default>
|
||||
</graphite_rollup_example>
|
||||
|
||||
<!-- Directory in <clickhouse-path> containing schema files for various input formats.
|
||||
The directory will be created if it doesn't exist.
|
||||
-->
|
||||
<format_schema_path>/var/lib/clickhouse/format_schemas/</format_schema_path>
|
||||
|
||||
|
||||
<!-- Uncomment to use query masking rules.
|
||||
name - name for the rule (optional)
|
||||
regexp - RE2 compatible regular expression (mandatory)
|
||||
replace - substitution string for sensitive data (optional, by default - six asterisks)
|
||||
<query_masking_rules>
|
||||
<rule>
|
||||
<name>hide SSN</name>
|
||||
<regexp>\b\d{3}-\d{2}-\d{4}\b</regexp>
|
||||
<replace>000-00-0000</replace>
|
||||
</rule>
|
||||
</query_masking_rules>
|
||||
-->
|
||||
|
||||
<!-- Uncomment to disable ClickHouse internal DNS caching. -->
|
||||
<!-- <disable_internal_dns_cache>1</disable_internal_dns_cache> -->
|
||||
</yandex>
|
0
clickhouse/docker-entrypoint-initdb.d/init-db.sh
Normal file
0
clickhouse/docker-entrypoint-initdb.d/init-db.sh
Normal file
12
clickhouse/docker_related_config.xml
Normal file
12
clickhouse/docker_related_config.xml
Normal file
@ -0,0 +1,12 @@
|
||||
<yandex>
|
||||
<!-- Listen wildcard address to allow accepting connections from other containers and host network. -->
|
||||
<listen_host>::</listen_host>
|
||||
<listen_host>0.0.0.0</listen_host>
|
||||
<listen_try>1</listen_try>
|
||||
|
||||
<!--
|
||||
<logger>
|
||||
<console>1</console>
|
||||
</logger>
|
||||
-->
|
||||
</yandex>
|
103
clickhouse/entrypoint.sh
Normal file
103
clickhouse/entrypoint.sh
Normal file
@ -0,0 +1,103 @@
|
||||
#!/bin/bash
|
||||
|
||||
# set some vars
|
||||
CLICKHOUSE_CONFIG="${CLICKHOUSE_CONFIG:-/etc/clickhouse-server/config.xml}"
|
||||
if [ x"$UID" == x0 ]; then
|
||||
USER="$(id -u clickhouse)"
|
||||
GROUP="$(id -g clickhouse)"
|
||||
gosu="gosu $USER:$GROUP"
|
||||
else
|
||||
USER="$(id -u)"
|
||||
GROUP="$(id -g)"
|
||||
gosu=""
|
||||
fi
|
||||
|
||||
# port is needed to check if clickhouse-server is ready for connections
|
||||
HTTP_PORT="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=http_port)"
|
||||
|
||||
# get CH directories locations
|
||||
DATA_DIR="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=path || true)"
|
||||
TMP_DIR="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=tmp_path || true)"
|
||||
USER_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=user_files_path || true)"
|
||||
LOG_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=logger.log || true)"
|
||||
LOG_DIR="$(dirname $LOG_PATH || true)"
|
||||
ERROR_LOG_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=logger.errorlog || true)"
|
||||
ERROR_LOG_DIR="$(dirname $ERROR_LOG_PATH || true)"
|
||||
FORMAT_SCHEMA_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=format_schema_path || true)"
|
||||
CLICKHOUSE_USER="${CLICKHOUSE_USER:-default}"
|
||||
|
||||
for dir in "$DATA_DIR" \
|
||||
"$ERROR_LOG_DIR" \
|
||||
"$LOG_DIR" \
|
||||
"$TMP_DIR" \
|
||||
"$USER_PATH" \
|
||||
"$FORMAT_SCHEMA_PATH"
|
||||
do
|
||||
# check if variable not empty
|
||||
[ -z "$dir" ] && continue
|
||||
# ensure directories exist
|
||||
if ! mkdir -p "$dir"; then
|
||||
echo "Couldn't create necessary directory: $dir"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ x"$UID" == x0 ] && [ "$CLICKHOUSE_DO_NOT_CHOWN" != "1" ]; then
|
||||
# ensure proper directories permissions
|
||||
chown -R "$USER:$GROUP" "$dir"
|
||||
elif [ "$(stat -c %u "$dir")" != "$USER" ]; then
|
||||
echo "Necessary directory '$dir' isn't owned by user with id '$USER'"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
|
||||
|
||||
if [ -n "$(ls /docker-entrypoint-initdb.d/)" ]; then
|
||||
$gosu /usr/bin/clickhouse-server --config-file=$CLICKHOUSE_CONFIG &
|
||||
pid="$!"
|
||||
|
||||
# check if clickhouse is ready to accept connections
|
||||
# will try to send ping clickhouse via http_port (max 12 retries, with 1 sec delay)
|
||||
if ! wget --spider --quiet --tries=12 --waitretry=1 --retry-connrefused "http://localhost:$HTTP_PORT/ping" ; then
|
||||
echo >&2 'ClickHouse init process failed.'
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -z "$CLICKHOUSE_PASSWORD" ]; then
|
||||
printf -v WITH_PASSWORD '%s %q' "--password" "$CLICKHOUSE_PASSWORD"
|
||||
fi
|
||||
|
||||
clickhouseclient=( clickhouse-client --multiquery -u $CLICKHOUSE_USER $WITH_PASSWORD )
|
||||
|
||||
echo
|
||||
for f in /docker-entrypoint-initdb.d/*; do
|
||||
case "$f" in
|
||||
*.sh)
|
||||
if [ -x "$f" ]; then
|
||||
echo "$0: running $f"
|
||||
"$f"
|
||||
else
|
||||
echo "$0: sourcing $f"
|
||||
. "$f"
|
||||
fi
|
||||
;;
|
||||
*.sql) echo "$0: running $f"; cat "$f" | "${clickhouseclient[@]}" ; echo ;;
|
||||
*.sql.gz) echo "$0: running $f"; gunzip -c "$f" | "${clickhouseclient[@]}"; echo ;;
|
||||
*) echo "$0: ignoring $f" ;;
|
||||
esac
|
||||
echo
|
||||
done
|
||||
|
||||
if ! kill -s TERM "$pid" || ! wait "$pid"; then
|
||||
echo >&2 'Finishing of ClickHouse init process failed.'
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# if no args passed to `docker run` or first argument start with `--`, then the user is passing clickhouse-server arguments
|
||||
if [[ $# -lt 1 ]] || [[ "$1" == "--"* ]]; then
|
||||
exec $gosu /usr/bin/clickhouse-server --config-file=$CLICKHOUSE_CONFIG "$@"
|
||||
fi
|
||||
|
||||
# Otherwise, we assume the user want to run his own process, for example a `bash` shell to explore this image
|
||||
exec "$@"
|
138
clickhouse/users.xml
Normal file
138
clickhouse/users.xml
Normal file
@ -0,0 +1,138 @@
|
||||
<?xml version="1.0"?>
|
||||
<yandex>
|
||||
<!-- Profiles of settings. -->
|
||||
<profiles>
|
||||
<!-- Default settings. -->
|
||||
<default>
|
||||
<!-- Maximum memory usage for processing single query, in bytes. -->
|
||||
<max_memory_usage>10000000000</max_memory_usage>
|
||||
|
||||
<!-- Use cache of uncompressed blocks of data. Meaningful only for processing many of very short queries. -->
|
||||
<use_uncompressed_cache>0</use_uncompressed_cache>
|
||||
|
||||
<!-- How to choose between replicas during distributed query processing.
|
||||
random - choose random replica from set of replicas with minimum number of errors
|
||||
nearest_hostname - from set of replicas with minimum number of errors, choose replica
|
||||
with minimum number of different symbols between replica's hostname and local hostname
|
||||
(Hamming distance).
|
||||
in_order - first live replica is chosen in specified order.
|
||||
first_or_random - if first replica one has higher number of errors, pick a random one from replicas with minimum number of errors.
|
||||
-->
|
||||
<load_balancing>random</load_balancing>
|
||||
</default>
|
||||
|
||||
<!-- Profile that allows only read queries. -->
|
||||
<readonly>
|
||||
<readonly>1</readonly>
|
||||
</readonly>
|
||||
</profiles>
|
||||
|
||||
<!-- Users and ACL. -->
|
||||
<users>
|
||||
<!-- If user name was not specified, 'default' user is used. -->
|
||||
<default>
|
||||
<!-- Password could be specified in plaintext or in SHA256 (in hex format).
|
||||
|
||||
If you want to specify password in plaintext (not recommended), place it in 'password' element.
|
||||
Example: <password>qwerty</password>.
|
||||
Password could be empty.
|
||||
|
||||
If you want to specify SHA256, place it in 'password_sha256_hex' element.
|
||||
Example: <password_sha256_hex>65e84be33532fb784c48129675f9eff3a682b27168c0ea744b2cf58ee02337c5</password_sha256_hex>
|
||||
Restrictions of SHA256: impossibility to connect to ClickHouse using MySQL JS client (as of July 2019).
|
||||
|
||||
If you want to specify double SHA1, place it in 'password_double_sha1_hex' element.
|
||||
Example: <password_double_sha1_hex>e395796d6546b1b65db9d665cd43f0e858dd4303</password_double_sha1_hex>
|
||||
|
||||
How to generate decent password:
|
||||
Execute: PASSWORD=$(base64 < /dev/urandom | head -c8); echo "$PASSWORD"; echo -n "$PASSWORD" | sha256sum | tr -d '-'
|
||||
In first line will be password and in second - corresponding SHA256.
|
||||
|
||||
How to generate double SHA1:
|
||||
Execute: PASSWORD=$(base64 < /dev/urandom | head -c8); echo "$PASSWORD"; echo -n "$PASSWORD" | openssl dgst -sha1 -binary | openssl dgst -sha1
|
||||
In first line will be password and in second - corresponding double SHA1.
|
||||
-->
|
||||
<password>secret</password>
|
||||
|
||||
<!-- List of networks with open access.
|
||||
|
||||
To open access from everywhere, specify:
|
||||
<ip>::/0</ip>
|
||||
|
||||
To open access only from localhost, specify:
|
||||
<ip>::1</ip>
|
||||
<ip>127.0.0.1</ip>
|
||||
|
||||
Each element of list has one of the following forms:
|
||||
<ip> IP-address or network mask. Examples: 213.180.204.3 or 10.0.0.1/8 or 10.0.0.1/255.255.255.0
|
||||
2a02:6b8::3 or 2a02:6b8::3/64 or 2a02:6b8::3/ffff:ffff:ffff:ffff::.
|
||||
<host> Hostname. Example: server01.yandex.ru.
|
||||
To check access, DNS query is performed, and all received addresses compared to peer address.
|
||||
<host_regexp> Regular expression for host names. Example, ^server\d\d-\d\d-\d\.yandex\.ru$
|
||||
To check access, DNS PTR query is performed for peer address and then regexp is applied.
|
||||
Then, for result of PTR query, another DNS query is performed and all received addresses compared to peer address.
|
||||
Strongly recommended that regexp is ends with $
|
||||
All results of DNS requests are cached till server restart.
|
||||
-->
|
||||
<networks incl="networks" replace="replace">
|
||||
<ip>::/0</ip>
|
||||
</networks>
|
||||
|
||||
<!-- Settings profile for user. -->
|
||||
<profile>default</profile>
|
||||
|
||||
<!-- Quota for user. -->
|
||||
<quota>default</quota>
|
||||
|
||||
<!-- For testing the table filters -->
|
||||
<databases>
|
||||
<test>
|
||||
<!-- Simple expression filter -->
|
||||
<filtered_table1>
|
||||
<filter>a = 1</filter>
|
||||
</filtered_table1>
|
||||
|
||||
<!-- Complex expression filter -->
|
||||
<filtered_table2>
|
||||
<filter>a + b < 1 or c - d > 5</filter>
|
||||
</filtered_table2>
|
||||
|
||||
<!-- Filter with ALIAS column -->
|
||||
<filtered_table3>
|
||||
<filter>c = 1</filter>
|
||||
</filtered_table3>
|
||||
</test>
|
||||
</databases>
|
||||
</default>
|
||||
|
||||
<!-- Example of user with readonly access. -->
|
||||
<!-- <readonly>
|
||||
<password></password>
|
||||
<networks incl="networks" replace="replace">
|
||||
<ip>::1</ip>
|
||||
<ip>127.0.0.1</ip>
|
||||
</networks>
|
||||
<profile>readonly</profile>
|
||||
<quota>default</quota>
|
||||
</readonly> -->
|
||||
</users>
|
||||
|
||||
<!-- Quotas. -->
|
||||
<quotas>
|
||||
<!-- Name of quota. -->
|
||||
<default>
|
||||
<!-- Limits for time interval. You could specify many intervals with different limits. -->
|
||||
<interval>
|
||||
<!-- Length of interval. -->
|
||||
<duration>3600</duration>
|
||||
|
||||
<!-- No limits. Just calculate resource usage for time interval. -->
|
||||
<queries>0</queries>
|
||||
<errors>0</errors>
|
||||
<result_rows>0</result_rows>
|
||||
<read_rows>0</read_rows>
|
||||
<execution_time>0</execution_time>
|
||||
</interval>
|
||||
</default>
|
||||
</quotas>
|
||||
</yandex>
|
@ -604,7 +604,38 @@ services:
|
||||
networks:
|
||||
- backend
|
||||
|
||||
### Redis ################################################
|
||||
### ClickHouse #############################################
|
||||
clickhouse:
|
||||
build:
|
||||
context: ./clickhouse
|
||||
args:
|
||||
- CLICKHOUSE_VERSION=${CLICKHOUSE_VERSION}
|
||||
- CLICKHOUSE_GOSU_VERSION=${CLICKHOUSE_GOSU_VERSION}
|
||||
environment:
|
||||
- CLICKHOUSE_USER=${CLICKHOUSE_USER}
|
||||
- CLICKHOUSE_PASSWORD=${CLICKHOUSE_PASSWORD}
|
||||
volumes:
|
||||
- ${DATA_PATH_HOST}/clickhouse:/var/lib/clickhouse
|
||||
- ${CLICKHOUSE_CUSTOM_CONFIG}:/etc/clickhouse-server/config.xml
|
||||
- ${CLICKHOUSE_USERS_CUSTOM_CONFIG}:/etc/clickhouse-server/users.xml
|
||||
- ${CLICKHOUSE_HOST_LOG_PATH}:/var/log/clickhouse
|
||||
- ${CLICKHOUSE_ENTRYPOINT_INITDB}:/docker-entrypoint-initdb.d
|
||||
links:
|
||||
- workspace
|
||||
ports:
|
||||
- "${CLICKHOUSE_HTTP_PORT}:8123"
|
||||
- "${CLICKHOUSE_CLIENT_PORT}:9000"
|
||||
- "${CLICKHOUSE_NATIVE_PORT}:9009"
|
||||
ulimits:
|
||||
nproc: 65535
|
||||
nofile:
|
||||
soft: 262144
|
||||
hard: 262144
|
||||
networks:
|
||||
# - frontend
|
||||
- backend
|
||||
|
||||
### Redis ################################################
|
||||
redis:
|
||||
build: ./redis
|
||||
volumes:
|
||||
|
14
env-example
14
env-example
@ -333,6 +333,20 @@ MYSQL_PORT=3306
|
||||
MYSQL_ROOT_PASSWORD=root
|
||||
MYSQL_ENTRYPOINT_INITDB=./mysql/docker-entrypoint-initdb.d
|
||||
|
||||
### CLICKHOUSE #################################################
|
||||
|
||||
CLICKHOUSE_VERSION=20.9.4.76
|
||||
CLICKHOUSE_GOSU_VERSION=1.10
|
||||
CLICKHOUSE_CUSTOM_CONFIG=./clickhouse/config.xml
|
||||
CLICKHOUSE_USERS_CUSTOM_CONFIG=./clickhouse/users.xml
|
||||
CLICKHOUSE_USER=default
|
||||
CLICKHOUSE_PASSWORD=HAHA
|
||||
CLICKHOUSE_HTTP_PORT=8123
|
||||
CLICKHOUSE_CLIENT_PORT=9000
|
||||
CLICKHOUSE_NATIVE_PORT=9009
|
||||
CLICKHOUSE_ENTRYPOINT_INITDB=./clickhouse/docker-entrypoint-initdb.d
|
||||
CLICKHOUSE_HOST_LOG_PATH=./logs/clickhouse
|
||||
|
||||
### REDIS #################################################
|
||||
|
||||
REDIS_PORT=6379
|
||||
|
Loading…
x
Reference in New Issue
Block a user