CodeCommitsIssuesPull requestsActionsInsightsSecurity
50a312534c946f07547d0b932afb678cf8d615e0

Branches

Tags

  • No tags available.
0Branches0Tags
Go to file
Add file
Code

Clone

HTTPS

Download ZIP

src/Interpreters/InterpreterInsertQuery.cpp

327lines · modecode

1#include <Interpreters/InterpreterInsertQuery.h>
2
3#include <DataStreams/AddingDefaultBlockOutputStream.h>
4#include <DataStreams/AddingDefaultsBlockInputStream.h>
5#include <DataStreams/CheckConstraintsBlockOutputStream.h>
6#include <DataStreams/CountingBlockOutputStream.h>
7#include <DataStreams/InputStreamFromASTInsertQuery.h>
8#include <DataStreams/NullAndDoCopyBlockInputStream.h>
9#include <DataStreams/NullBlockOutputStream.h>
10#include <DataStreams/PushingToViewsBlockOutputStream.h>
11#include <DataStreams/RemoteBlockInputStream.h>
12#include <DataStreams/SquashingBlockOutputStream.h>
13#include <DataStreams/copyData.h>
14#include <IO/ConcatReadBuffer.h>
15#include <Interpreters/InterpreterSelectWithUnionQuery.h>
16#include <Interpreters/InterpreterWatchQuery.h>
17#include <Access/AccessFlags.h>
18#include <Interpreters/JoinedTables.h>
19#include <Parsers/ASTFunction.h>
20#include <Parsers/ASTInsertQuery.h>
21#include <Parsers/ASTSelectQuery.h>
22#include <Parsers/ASTSelectWithUnionQuery.h>
23#include <Parsers/queryToString.h>
24#include <Storages/StorageDistributed.h>
25#include <TableFunctions/TableFunctionFactory.h>
26#include <Common/checkStackSize.h>
27#include <Processors/Sources/SourceFromInputStream.h>
28#include <Processors/NullSink.h>
29#include <Processors/Transforms/ConvertingTransform.h>
30#include <Processors/Sources/SinkToOutputStream.h>
31
32namespace
33{
34const UInt64 PARALLEL_DISTRIBUTED_INSERT_SELECT_ALL = 2;
35}
36
37
38namespace DB
39{
40
41namespace ErrorCodes
42{
43 extern const int NO_SUCH_COLUMN_IN_TABLE;
44 extern const int ILLEGAL_COLUMN;
45 extern const int DUPLICATE_COLUMN;
46 extern const int LOGICAL_ERROR;
47}
48
49
50InterpreterInsertQuery::InterpreterInsertQuery(
51 const ASTPtr & query_ptr_, const Context & context_, bool allow_materialized_, bool no_squash_, bool no_destination_)
52 : query_ptr(query_ptr_)
53 , context(context_)
54 , allow_materialized(allow_materialized_)
55 , no_squash(no_squash_)
56 , no_destination(no_destination_)
57{
58 checkStackSize();
59}
60
61
62StoragePtr InterpreterInsertQuery::getTable(ASTInsertQuery & query)
63{
64 if (query.table_function)
65 {
66 const auto * table_function = query.table_function->as<ASTFunction>();
67 const auto & factory = TableFunctionFactory::instance();
68 TableFunctionPtr table_function_ptr = factory.get(table_function->name, context);
69 return table_function_ptr->execute(query.table_function, context, table_function_ptr->getName());
70 }
71
72 query.table_id = context.resolveStorageID(query.table_id);
73 return DatabaseCatalog::instance().getTable(query.table_id, context);
74}
75
76Block InterpreterInsertQuery::getSampleBlock(
77 const ASTInsertQuery & query,
78 const StoragePtr & table,
79 const StorageMetadataPtr & metadata_snapshot) const
80{
81 Block table_sample_non_materialized = metadata_snapshot->getSampleBlockNonMaterialized();
82 /// If the query does not include information about columns
83 if (!query.columns)
84 {
85 if (no_destination)
86 return metadata_snapshot->getSampleBlockWithVirtuals(table->getVirtuals());
87 else
88 return table_sample_non_materialized;
89 }
90
91 Block table_sample = metadata_snapshot->getSampleBlock();
92 /// Form the block based on the column names from the query
93 Block res;
94 for (const auto & identifier : query.columns->children)
95 {
96 std::string current_name = identifier->getColumnName();
97
98 /// The table does not have a column with that name
99 if (!table_sample.has(current_name))
100 throw Exception("No such column " + current_name + " in table " + query.table_id.getNameForLogs(), ErrorCodes::NO_SUCH_COLUMN_IN_TABLE);
101
102 if (!allow_materialized && !table_sample_non_materialized.has(current_name))
103 throw Exception("Cannot insert column " + current_name + ", because it is MATERIALIZED column.", ErrorCodes::ILLEGAL_COLUMN);
104 if (res.has(current_name))
105 throw Exception("Column " + current_name + " specified more than once", ErrorCodes::DUPLICATE_COLUMN);
106
107 res.insert(ColumnWithTypeAndName(table_sample.getByName(current_name).type, current_name));
108 }
109 return res;
110}
111
112
113BlockIO InterpreterInsertQuery::execute()
114{
115 const Settings & settings = context.getSettingsRef();
116 auto & query = query_ptr->as<ASTInsertQuery &>();
117
118 BlockIO res;
119
120 StoragePtr table = getTable(query);
121 auto table_lock = table->lockForShare(context.getInitialQueryId(), context.getSettingsRef().lock_acquire_timeout);
122 auto metadata_snapshot = table->getInMemoryMetadataPtr();
123
124 auto query_sample_block = getSampleBlock(query, table, metadata_snapshot);
125 if (!query.table_function)
126 context.checkAccess(AccessType::INSERT, query.table_id, query_sample_block.getNames());
127
128 bool is_distributed_insert_select = false;
129
130 if (query.select && table->isRemote() && settings.parallel_distributed_insert_select)
131 {
132 // Distributed INSERT SELECT
133 std::shared_ptr<StorageDistributed> storage_src;
134 auto & select = query.select->as<ASTSelectWithUnionQuery &>();
135 auto new_query = std::dynamic_pointer_cast<ASTInsertQuery>(query.clone());
136 if (select.list_of_selects->children.size() == 1)
137 {
138 auto & select_query = select.list_of_selects->children.at(0)->as<ASTSelectQuery &>();
139 JoinedTables joined_tables(Context(context), select_query);
140
141 if (joined_tables.tablesCount() == 1)
142 {
143 storage_src = std::dynamic_pointer_cast<StorageDistributed>(joined_tables.getLeftTableStorage());
144 if (storage_src)
145 {
146 const auto select_with_union_query = std::make_shared<ASTSelectWithUnionQuery>();
147 select_with_union_query->list_of_selects = std::make_shared<ASTExpressionList>();
148
149 auto new_select_query = std::dynamic_pointer_cast<ASTSelectQuery>(select_query.clone());
150 select_with_union_query->list_of_selects->children.push_back(new_select_query);
151
152 new_select_query->replaceDatabaseAndTable(storage_src->getRemoteDatabaseName(), storage_src->getRemoteTableName());
153
154 new_query->select = select_with_union_query;
155 }
156 }
157 }
158
159 auto storage_dst = std::dynamic_pointer_cast<StorageDistributed>(table);
160
161 if (storage_src && storage_dst && storage_src->cluster_name == storage_dst->cluster_name)
162 {
163 is_distributed_insert_select = true;
164
165 if (settings.parallel_distributed_insert_select == PARALLEL_DISTRIBUTED_INSERT_SELECT_ALL)
166 {
167 new_query->table_id = StorageID(storage_dst->getRemoteDatabaseName(), storage_dst->getRemoteTableName());
168 }
169
170 const auto & cluster = storage_src->getCluster();
171 const auto & shards_info = cluster->getShardsInfo();
172
173 std::vector<std::unique_ptr<QueryPipeline>> pipelines;
174
175 String new_query_str = queryToString(new_query);
176 for (size_t shard_index : ext::range(0, shards_info.size()))
177 {
178 const auto & shard_info = shards_info[shard_index];
179 if (shard_info.isLocal())
180 {
181 InterpreterInsertQuery interpreter(new_query, context);
182 pipelines.emplace_back(std::make_unique<QueryPipeline>(interpreter.execute().pipeline));
183 }
184 else
185 {
186 auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(settings);
187 auto connections = shard_info.pool->getMany(timeouts, &settings, PoolMode::GET_ONE);
188 if (connections.empty() || connections.front().isNull())
189 throw Exception(
190 "Expected exactly one connection for shard " + toString(shard_info.shard_num), ErrorCodes::LOGICAL_ERROR);
191
192 /// INSERT SELECT query returns empty block
193 auto in_stream = std::make_shared<RemoteBlockInputStream>(std::move(connections), new_query_str, Block{}, context);
194 pipelines.emplace_back(std::make_unique<QueryPipeline>());
195 pipelines.back()->init(Pipe(std::make_shared<SourceFromInputStream>(std::move(in_stream))));
196 pipelines.back()->setSinks([](const Block & header, QueryPipeline::StreamType) -> ProcessorPtr
197 {
198 return std::make_shared<EmptySink>(header);
199 });
200 }
201 }
202
203 res.pipeline = QueryPipeline::unitePipelines(std::move(pipelines), {});
204 }
205 }
206
207 BlockOutputStreams out_streams;
208 if (!is_distributed_insert_select || query.watch)
209 {
210 size_t out_streams_size = 1;
211 if (query.select)
212 {
213 /// Passing 1 as subquery_depth will disable limiting size of intermediate result.
214 InterpreterSelectWithUnionQuery interpreter_select{ query.select, context, SelectQueryOptions(QueryProcessingStage::Complete, 1)};
215 res = interpreter_select.execute();
216
217 if (table->supportsParallelInsert() && settings.max_insert_threads > 1)
218 out_streams_size = std::min(size_t(settings.max_insert_threads), res.pipeline.getNumStreams());
219
220 res.pipeline.resize(out_streams_size);
221 }
222 else if (query.watch)
223 {
224 InterpreterWatchQuery interpreter_watch{ query.watch, context };
225 res = interpreter_watch.execute();
226 res.pipeline.init(Pipe(std::make_shared<SourceFromInputStream>(std::move(res.in))));
227 }
228
229 for (size_t i = 0; i < out_streams_size; i++)
230 {
231 /// We create a pipeline of several streams, into which we will write data.
232 BlockOutputStreamPtr out;
233
234 /// NOTE: we explicitly ignore bound materialized views when inserting into Kafka Storage.
235 /// Otherwise we'll get duplicates when MV reads same rows again from Kafka.
236 if (table->noPushingToViews() && !no_destination)
237 out = table->write(query_ptr, metadata_snapshot, context);
238 else
239 out = std::make_shared<PushingToViewsBlockOutputStream>(table, metadata_snapshot, context, query_ptr, no_destination);
240
241 /// Note that we wrap transforms one on top of another, so we write them in reverse of data processing order.
242
243 /// Checking constraints. It must be done after calculation of all defaults, so we can check them on calculated columns.
244 if (const auto & constraints = metadata_snapshot->getConstraints(); !constraints.empty())
245 out = std::make_shared<CheckConstraintsBlockOutputStream>(
246 query.table_id, out, out->getHeader(), metadata_snapshot->getConstraints(), context);
247
248 /// Actually we don't know structure of input blocks from query/table,
249 /// because some clients break insertion protocol (columns != header)
250 out = std::make_shared<AddingDefaultBlockOutputStream>(
251 out, query_sample_block, out->getHeader(), metadata_snapshot->getColumns().getDefaults(), context);
252
253 /// It's important to squash blocks as early as possible (before other transforms),
254 /// because other transforms may work inefficient if block size is small.
255
256 /// Do not squash blocks if it is a sync INSERT into Distributed, since it lead to double bufferization on client and server side.
257 /// Client-side bufferization might cause excessive timeouts (especially in case of big blocks).
258 if (!(context.getSettingsRef().insert_distributed_sync && table->isRemote()) && !no_squash && !query.watch)
259 {
260 out = std::make_shared<SquashingBlockOutputStream>(
261 out,
262 out->getHeader(),
263 context.getSettingsRef().min_insert_block_size_rows,
264 context.getSettingsRef().min_insert_block_size_bytes);
265 }
266
267 auto out_wrapper = std::make_shared<CountingBlockOutputStream>(out);
268 out_wrapper->setProcessListElement(context.getProcessListElement());
269 out = std::move(out_wrapper);
270 out_streams.emplace_back(std::move(out));
271 }
272 }
273
274 /// What type of query: INSERT or INSERT SELECT or INSERT WATCH?
275 if (is_distributed_insert_select)
276 {
277 /// Pipeline was already built.
278 }
279 else if (query.select || query.watch)
280 {
281 const auto & header = out_streams.at(0)->getHeader();
282
283 res.pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
284 {
285 return std::make_shared<ConvertingTransform>(in_header, header,
286 ConvertingTransform::MatchColumnsMode::Position);
287 });
288
289 res.pipeline.setSinks([&](const Block &, QueryPipeline::StreamType type) -> ProcessorPtr
290 {
291 if (type != QueryPipeline::StreamType::Main)
292 return nullptr;
293
294 auto stream = std::move(out_streams.back());
295 out_streams.pop_back();
296
297 return std::make_shared<SinkToOutputStream>(std::move(stream));
298 });
299
300 if (!allow_materialized)
301 {
302 for (const auto & column : metadata_snapshot->getColumns())
303 if (column.default_desc.kind == ColumnDefaultKind::Materialized && header.has(column.name))
304 throw Exception("Cannot insert column " + column.name + ", because it is MATERIALIZED column.", ErrorCodes::ILLEGAL_COLUMN);
305 }
306 }
307 else if (query.data && !query.has_tail) /// can execute without additional data
308 {
309 // res.out = std::move(out_streams.at(0));
310 res.in = std::make_shared<InputStreamFromASTInsertQuery>(query_ptr, nullptr, query_sample_block, context, nullptr);
311 res.in = std::make_shared<NullAndDoCopyBlockInputStream>(res.in, out_streams.at(0));
312 }
313 else
314 res.out = std::move(out_streams.at(0));
315
316 res.pipeline.addStorageHolder(table);
317
318 return res;
319}
320
321
322StorageID InterpreterInsertQuery::getDatabaseTable() const
323{
324 return query_ptr->as<ASTInsertQuery &>().table_id;
325}
326
327}