Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .changeset/three-gifts-battle.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
---
'hive': patch
---

Adds an index to coordinates\_(daily,hourly,minutely) tables to speedup the
get_top_operations_for_types ClickHoue query.

Reading of type and fields usage statisticts should be noticeably faster now on big datasets.
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
import { z } from 'zod';
import type { Action } from '../clickhouse';

const SystemTablesModel = z.array(
z.object({
name: z.string(),
uuid: z.string(),
}),
);

const StateTableModel = z.array(
z.object({
table: z.string(),
idx_created: z.boolean(),
idx_materialized: z.boolean(),
}),
);

// This migration adds an index for the `coordinate` field.
// Improve the performance of the queries that filter the rows by the type's name.
//
// For example, when looking for `Member.*` coordinates we elimiate the need to scan the whole table,
// by laveraging the idx_typename index.
// We filter rows by the first part of the `coordinate` field (substringIndex(coordinate, '.', 1)).
export const action: Action = async (exec, query) => {
// Create a table to store the state of the migration
await exec(`
CREATE TABLE IF NOT EXISTS default.migration_coordinates_typename_index (
table String,
idx_created Bool DEFAULT false,
idx_materialized Bool DEFAULT false
) ENGINE = MergeTree() ORDER BY tuple()
`);

const tables = await query(`
SELECT uuid, name FROM system.tables WHERE name IN (
'coordinates_daily',
'coordinates_hourly',
'coordinates_minutely'
);
`).then(async r => SystemTablesModel.parse(r.data));

if (tables.length !== 3) {
throw new Error('Expected 3 tables');
}

const tableStates = await query(`
SELECT table, idx_created, idx_materialized FROM default.migration_coordinates_typename_index
`).then(async r => StateTableModel.parse(r.data));

for (const { uuid, name } of tables) {
let state = tableStates.find(s => s.table === name);

if (!state) {
console.log(`Creating state for table ${name}`);
await exec(`
INSERT INTO default.migration_coordinates_typename_index (table) VALUES ('${name}')
`);

state = { table: name, idx_created: false, idx_materialized: false };
}

const innerTable = `.inner_id.${uuid}`;

if (state.idx_created) {
console.log(`Skipping idx_typename for table ${name}`);
} else {
console.log(`Creating idx_typename for table ${name}`);
await exec(
`ALTER TABLE "${innerTable}" ADD INDEX IF NOT EXISTS idx_typename (substringIndex(coordinate, '.', 1)) TYPE ngrambf_v1(4, 1024, 2, 0) GRANULARITY 1`,
);
await exec(
`ALTER TABLE default.migration_coordinates_typename_index UPDATE idx_created = true WHERE table = '${name}'`,
{
mutations_sync: '2',
},
);
}

if (state.idx_materialized) {
console.log(`Skipping materializing idx_typename for table ${name}`);
} else {
console.log(`Materializing idx_typename for table ${name}`);
await exec(`ALTER TABLE "${innerTable}" MATERIALIZE INDEX idx_typename`);
await exec(
`ALTER TABLE default.migration_coordinates_typename_index UPDATE idx_materialized = true WHERE table = '${name}'`,
{
mutations_sync: '2',
},
);
}
}

console.log('Dropping migration state table');
await exec(`
DROP TABLE default.migration_coordinates_typename_index
`);
};
1 change: 1 addition & 0 deletions packages/migrations/src/clickhouse.ts
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ export async function migrateClickHouse(
import('./clickhouse-actions/009-ttl-1-year'),
import('./clickhouse-actions/010-app-deployment-operations'),
import('./clickhouse-actions/011-audit-logs'),
import('./clickhouse-actions/012-coordinates-typename-index'),
]);

async function actionRunner(action: Action, index: number) {
Expand Down
Loading