ClickHouse S3 Engine 数量级调优
本文主要讲解 ClickHouse S3 Engine 的读取写入性能代码 及 数量级调优
ClickHouse 如何性能调优
一 前文
二 perf 调优
1 堆栈来源 trace_log
SELECT
count(),
arrayStringConcat(arrayMap(x -> concat(demangle(addressToSymbol(x)), '\\n ', addressToLine(x)), trace), '\\n') AS sym
FROM system.trace_log
WHERE query_id = '157d5b6d-fa06-4bed-94f3-01f1d5f04e24'
GROUP BY trace
ORDER BY count() DESC
LIMIT 10
2 ClickHouse 线程
[root@9 ~]# ps H -o 'tid comm' $(pidof -s clickhouse-server) | awk '{print $2}' | sort | uniq -c | sort -rn
230 ChunkParser
199 QueryPipelineEx
71 Segmentator
66 BgSchPool
9 clickhouse-serv
3 TCPHandler
3 SystemLogFlush
3 Formatter
2 ExterLdrReload
2 ConfigReloader
2 Collector
3 异常堆栈
被调用多次
/root/ClickHouse/src/IO/BufferBase.h:82
void DB::readIntTextImpl<unsigned int, void, (DB::ReadIntTextCheckOverflow)0>(unsigned int&, DB::ReadBuffer&)
/root/ClickHouse/src/IO/BufferBase.h:95
void DB::readIntText<(DB::ReadIntTextCheckOverflow)0, unsigned int>(unsigned int&, DB::ReadBuffer&)
/root/ClickHouse/src/IO/ReadHelpers.h:393
std::__1::enable_if<is_integer_v<unsigned int>, void>::type DB::readText<unsigned int>(unsigned int&, DB::ReadBuffer&)
/root/ClickHouse/src/IO/ReadHelpers.h:902
void DB::readCSVSimple<unsigned int>(unsigned int&, DB::ReadBuffer&)
/root/ClickHouse/src/IO/ReadHelpers.h:988
std::__1::enable_if<is_arithmetic_v<unsigned int>, void>::type DB::readCSV<unsigned int>(unsigned int&, DB::ReadBuffer&)
/root/ClickHouse/src/IO/ReadHelpers.h:994
DB::SerializationNumber<unsigned int>::deserializeTextCSV(DB::IColumn&, DB::ReadBuffer&, DB::FormatSettings const&) const
/root/ClickHouse/src/DataTypes/Serializations/SerializationNumber.cpp:99
bool DB::SerializationNullable::deserializeTextCSVImpl<bool>(DB::IColumn&, DB::ReadBuffer&, DB::FormatSettings const&, std::__1::shared_ptr<DB::ISerialization const> const&)::'lambda'(DB::IColumn&)::operator()(DB::IColumn&) const
/root/ClickHouse/src/DataTypes/Serializations/SerializationNullable.cpp:377
bool DB::safeDeserialize<bool, bool DB::SerializationNullable::deserializeTextCSVImpl<bool>(DB::IColumn&, DB::ReadBuffer&, DB::FormatSettings const&, std::__1::shared_ptr<DB::ISerialization const> const&)::'lambda'()&, bool DB::SerializationNullable::deserializeTextCSVImpl<bool>(DB::IColumn&, DB::ReadBuffer&, DB::FormatSettings const&, std::__1::shared_ptr<DB::ISerialization const> const&)::'lambda'(DB::IColumn&)&, (bool*)0>(DB::IColumn&, DB::ISerialization const&, bool DB::SerializationNullable::deserializeTextCSVImpl<bool>(DB::IColumn&, DB::ReadBuffer&, DB::FormatSettings const&, std::__1::shared_ptr<DB::ISerialization const> const&)::'lambda'(DB::IColumn&)&, (bool*)0&&)
/root/ClickHouse/src/DataTypes/Serializations/SerializationNullable.cpp:193
bool DB::SerializationNullable::deserializeTextCSVImpl<bool>(DB::IColumn&, DB::ReadBuffer&, DB::FormatSettings const&, std::__1::shared_ptr<DB::ISerialization const> const&)
/root/ClickHouse/src/DataTypes/Serializations/SerializationNullable.cpp:409
DB::CSVRowInputFormat::readField(DB::IColumn&, std::__1::shared_ptr<DB::IDataType const> const&, std::__1::shared_ptr<DB::ISerialization const> const&, bool)
/root/ClickHouse/src/Processors/Formats/Impl/CSVRowInputFormat.cpp:404
DB::CSVRowInputFormat::readRow(std::__1::vector<COW<DB::IColumn>::mutable_ptr<DB::IColumn>, std::__1::allocator<COW<DB::IColumn>::mutable_ptr<DB::IColumn> > >&, DB::RowReadExtension&)
/root/ClickHouse/src/Processors/Formats/Impl/CSVRowInputFormat.cpp:236
DB::IRowInputFormat::generate()
4 性能分析
2 UML 类关系及调用顺序
- 优化方案 调整 ReadFromS3 单线程读性能
- ReadBuffer 解析
4 优化后写入
七 测试数据
1 Schema
CREATE TABLE lineorder
(
LO_ORDERKEY UInt32,
LO_LINENUMBER UInt8,
LO_CUSTKEY UInt32,
LO_PARTKEY UInt32,
LO_SUPPKEY UInt32,
LO_ORDERDATE Date,
LO_ORDERPRIORITY LowCardinality(String),
LO_SHIPPRIORITY UInt8,
LO_QUANTITY UInt8,
LO_EXTENDEDPRICE UInt32,
LO_ORDTOTALPRICE UInt32,
LO_DISCOUNT UInt8,
LO_REVENUE UInt32,
LO_SUPPLYCOST UInt32,
LO_TAX UInt8,
LO_COMMITDATE Date,
LO_SHIPMODE LowCardinality(String)
)
ENGINE = MergeTree PARTITION BY toYear(LO_ORDERDATE) ORDER BY (LO_ORDERDATE, LO_ORDERKEY);
2 单表测试模型
CREATE TABLE default.s3_1
(
`LO_ORDERKEY` UInt32,
`LO_LINENUMBER` UInt8,
`LO_CUSTKEY` UInt32,
`LO_PARTKEY` UInt32,
`LO_SUPPKEY` UInt32,
`LO_ORDERDATE` Date,
`LO_ORDERPRIORITY` LowCardinality(String),
`LO_SHIPPRIORITY` UInt8,
`LO_QUANTITY` UInt8,
`LO_EXTENDEDPRICE` UInt32,
`LO_ORDTOTALPRICE` UInt32,
`LO_DISCOUNT` UInt8,
`LO_REVENUE` UInt32,
`LO_SUPPLYCOST` UInt32,
`LO_TAX` UInt8,
`LO_COMMITDATE` Date,
`LO_SHIPMODE` LowCardinality(String)
)
ENGINE = S3('http://xxx/insert01/s3_engine_1.csv', 'xxx', 'xxx', 'CSV')
3 多表测试模型
CREATE TABLE default.s3_5
(
`LO_ORDERKEY` UInt32,
`LO_LINENUMBER` UInt8,
`LO_CUSTKEY` UInt32,
`LO_PARTKEY` UInt32,
`LO_SUPPKEY` UInt32,
`LO_ORDERDATE` Date,
`LO_ORDERPRIORITY` LowCardinality(String),
`LO_SHIPPRIORITY` UInt8,
`LO_QUANTITY` UInt8,
`LO_EXTENDEDPRICE` UInt32,
`LO_ORDTOTALPRICE` UInt32,
`LO_DISCOUNT` UInt8,
`LO_REVENUE` UInt32,
`LO_SUPPLYCOST` UInt32,
`LO_TAX` UInt8,
`LO_COMMITDATE` Date,
`LO_SHIPMODE` LowCardinality(String)
)
ENGINE = S3('http://xxx/insert01/s3_engine_{1..5}.csv', 'xxx', 'xxx', 'CSV')
4 腾讯云COS 规格与限制
5 网络代码测试结果
- 左侧为优化后 网络性能 基本可以打满 COS 带宽
- 右侧为优化前 网络性能
6 不同数据量下 网络带宽测试
希望能给学习ClickHouse 的同学带来帮助!
代码不太适合公布!欢迎大家使用 腾讯云 ClickHouse ,感谢!