From fbe20821072ead216cec403a195f4165f94cd454 Mon Sep 17 00:00:00 2001 From: Umberto Sgueglia Date: Mon, 9 Mar 2026 17:32:23 +0100 Subject: [PATCH 1/9] feat: add index for dedup copy pipe --- services/libs/tinybird/datasources/activities.datasource | 3 +++ .../tinybird/datasources/activities_deduplicated_ds.datasource | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/services/libs/tinybird/datasources/activities.datasource b/services/libs/tinybird/datasources/activities.datasource index 68dc8dc893..25f570c3fd 100644 --- a/services/libs/tinybird/datasources/activities.datasource +++ b/services/libs/tinybird/datasources/activities.datasource @@ -48,6 +48,9 @@ SCHEMA > `updatedById` UUID `json:$.updatedById` DEFAULT toUUID('00000000-0000-0000-0000-000000000000'), `segmentId` LowCardinality(String) `json:$.segmentId` DEFAULT '' +INDEXES > + idx_updatedAt updatedAt TYPE minmax GRANULARITY 1 + ENGINE ReplacingMergeTree ENGINE_PARTITION_KEY toYear(createdAt) ENGINE_SORTING_KEY id diff --git a/services/libs/tinybird/datasources/activities_deduplicated_ds.datasource b/services/libs/tinybird/datasources/activities_deduplicated_ds.datasource index 50342803c7..0d8e04fbd9 100644 --- a/services/libs/tinybird/datasources/activities_deduplicated_ds.datasource +++ b/services/libs/tinybird/datasources/activities_deduplicated_ds.datasource @@ -38,4 +38,4 @@ INDEXES > ENGINE MergeTree ENGINE_PARTITION_KEY toYear(timestamp) -ENGINE_SORTING_KEY id, platform, channel +ENGINE_SORTING_KEY id, platform, channel, updatedAt From d079d08df3c4f3a231b84f048c7eb90582cb1175 Mon Sep 17 00:00:00 2001 From: Umberto Sgueglia Date: Wed, 11 Mar 2026 10:44:41 +0100 Subject: [PATCH 2/9] feat: add index on activities_deduplicated_ds --- .../datasources/activities_deduplicated_ds.datasource | 1 + .../activities_deduplicated_copy_pipe_append_mode.pipe | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/services/libs/tinybird/datasources/activities_deduplicated_ds.datasource b/services/libs/tinybird/datasources/activities_deduplicated_ds.datasource index 0d8e04fbd9..b56c7936bd 100644 --- a/services/libs/tinybird/datasources/activities_deduplicated_ds.datasource +++ b/services/libs/tinybird/datasources/activities_deduplicated_ds.datasource @@ -35,6 +35,7 @@ SCHEMA > INDEXES > idx_body_ngram3 body TYPE ngrambf_v1(3, 2048, 6, 0) GRANULARITY 64 idx_title_ngram3 title TYPE ngrambf_v1(3, 512, 6, 0) GRANULARITY 64 + idx_updatedAt updatedAt TYPE minmax GRANULARITY 1 ENGINE MergeTree ENGINE_PARTITION_KEY toYear(timestamp) diff --git a/services/libs/tinybird/pipes/activities_deduplicated_copy_pipe_append_mode.pipe b/services/libs/tinybird/pipes/activities_deduplicated_copy_pipe_append_mode.pipe index 277c4ddbc5..40cad0bcb4 100644 --- a/services/libs/tinybird/pipes/activities_deduplicated_copy_pipe_append_mode.pipe +++ b/services/libs/tinybird/pipes/activities_deduplicated_copy_pipe_append_mode.pipe @@ -15,7 +15,11 @@ SQL > a.url, a.updatedAt FROM activities a - WHERE a.updatedAt > (SELECT max("updatedAt") FROM activities_deduplicated_ds) + WHERE a.updatedAt > ( + SELECT max(updatedAt) + FROM activities_deduplicated_ds + WHERE updatedAt > now() - INTERVAL 3 HOUR + ) TYPE COPY TARGET_DATASOURCE activities_deduplicated_ds From 26e5cde72ea17b52074bb3d26fe1cf46bb0bcbb4 Mon Sep 17 00:00:00 2001 From: Umberto Sgueglia Date: Wed, 11 Mar 2026 10:52:23 +0100 Subject: [PATCH 3/9] fix: lint --- ...activities_deduplicated_copy_pipe_append_mode.pipe | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/services/libs/tinybird/pipes/activities_deduplicated_copy_pipe_append_mode.pipe b/services/libs/tinybird/pipes/activities_deduplicated_copy_pipe_append_mode.pipe index 40cad0bcb4..a997415a99 100644 --- a/services/libs/tinybird/pipes/activities_deduplicated_copy_pipe_append_mode.pipe +++ b/services/libs/tinybird/pipes/activities_deduplicated_copy_pipe_append_mode.pipe @@ -15,11 +15,12 @@ SQL > a.url, a.updatedAt FROM activities a - WHERE a.updatedAt > ( - SELECT max(updatedAt) - FROM activities_deduplicated_ds - WHERE updatedAt > now() - INTERVAL 3 HOUR - ) + WHERE + a.updatedAt > ( + SELECT max(updatedAt) + FROM activities_deduplicated_ds + WHERE updatedAt > now() - INTERVAL 3 HOUR + ) TYPE COPY TARGET_DATASOURCE activities_deduplicated_ds From 5a404eb576eabe330a685220a4315a7db74c174a Mon Sep 17 00:00:00 2001 From: Umberto Sgueglia Date: Wed, 11 Mar 2026 11:03:29 +0100 Subject: [PATCH 4/9] fix: lint --- .../datasources/activities_deduplicated_ds.datasource | 2 +- .../pipes/activities_deduplicated_copy_pipe_append_mode.pipe | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/services/libs/tinybird/datasources/activities_deduplicated_ds.datasource b/services/libs/tinybird/datasources/activities_deduplicated_ds.datasource index b56c7936bd..a86732be18 100644 --- a/services/libs/tinybird/datasources/activities_deduplicated_ds.datasource +++ b/services/libs/tinybird/datasources/activities_deduplicated_ds.datasource @@ -39,4 +39,4 @@ INDEXES > ENGINE MergeTree ENGINE_PARTITION_KEY toYear(timestamp) -ENGINE_SORTING_KEY id, platform, channel, updatedAt +ENGINE_SORTING_KEY id, platform, channel diff --git a/services/libs/tinybird/pipes/activities_deduplicated_copy_pipe_append_mode.pipe b/services/libs/tinybird/pipes/activities_deduplicated_copy_pipe_append_mode.pipe index a997415a99..f4a3cbc9db 100644 --- a/services/libs/tinybird/pipes/activities_deduplicated_copy_pipe_append_mode.pipe +++ b/services/libs/tinybird/pipes/activities_deduplicated_copy_pipe_append_mode.pipe @@ -17,7 +17,10 @@ SQL > FROM activities a WHERE a.updatedAt > ( - SELECT max(updatedAt) + SELECT greatest( + max(updatedAt), + now() - INTERVAL 3 HOUR + ) FROM activities_deduplicated_ds WHERE updatedAt > now() - INTERVAL 3 HOUR ) From eb2626733eff95599703cc9b7e8af72a62971db6 Mon Sep 17 00:00:00 2001 From: Umberto Sgueglia Date: Wed, 11 Mar 2026 11:03:34 +0100 Subject: [PATCH 5/9] fix: lint --- .../pipes/activities_deduplicated_copy_pipe_append_mode.pipe | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/services/libs/tinybird/pipes/activities_deduplicated_copy_pipe_append_mode.pipe b/services/libs/tinybird/pipes/activities_deduplicated_copy_pipe_append_mode.pipe index f4a3cbc9db..34f152704c 100644 --- a/services/libs/tinybird/pipes/activities_deduplicated_copy_pipe_append_mode.pipe +++ b/services/libs/tinybird/pipes/activities_deduplicated_copy_pipe_append_mode.pipe @@ -17,10 +17,7 @@ SQL > FROM activities a WHERE a.updatedAt > ( - SELECT greatest( - max(updatedAt), - now() - INTERVAL 3 HOUR - ) + SELECT greatest(max(updatedAt), now() - INTERVAL 3 HOUR) FROM activities_deduplicated_ds WHERE updatedAt > now() - INTERVAL 3 HOUR ) From 35da0a580d3a73eaf6062e8b4322886dc260eb40 Mon Sep 17 00:00:00 2001 From: Umberto Sgueglia Date: Wed, 11 Mar 2026 11:51:04 +0100 Subject: [PATCH 6/9] feat: test signed commits Signed-off-by: Umberto Sgueglia --- .../pipes/activities_deduplicated_copy_pipe_append_mode.pipe | 1 + 1 file changed, 1 insertion(+) diff --git a/services/libs/tinybird/pipes/activities_deduplicated_copy_pipe_append_mode.pipe b/services/libs/tinybird/pipes/activities_deduplicated_copy_pipe_append_mode.pipe index 34f152704c..55ee85c3b2 100644 --- a/services/libs/tinybird/pipes/activities_deduplicated_copy_pipe_append_mode.pipe +++ b/services/libs/tinybird/pipes/activities_deduplicated_copy_pipe_append_mode.pipe @@ -26,3 +26,4 @@ TYPE COPY TARGET_DATASOURCE activities_deduplicated_ds COPY_MODE append COPY_SCHEDULE 45 */2 * * * + From 72aa271cfd5c65a9395f52765423ca4e8eb06af0 Mon Sep 17 00:00:00 2001 From: Umberto Sgueglia Date: Wed, 11 Mar 2026 11:57:43 +0100 Subject: [PATCH 7/9] feat: test signed commits Signed-off-by: Umberto Sgueglia --- .../pipes/activities_deduplicated_copy_pipe_append_mode.pipe | 1 - 1 file changed, 1 deletion(-) diff --git a/services/libs/tinybird/pipes/activities_deduplicated_copy_pipe_append_mode.pipe b/services/libs/tinybird/pipes/activities_deduplicated_copy_pipe_append_mode.pipe index 55ee85c3b2..34f152704c 100644 --- a/services/libs/tinybird/pipes/activities_deduplicated_copy_pipe_append_mode.pipe +++ b/services/libs/tinybird/pipes/activities_deduplicated_copy_pipe_append_mode.pipe @@ -26,4 +26,3 @@ TYPE COPY TARGET_DATASOURCE activities_deduplicated_ds COPY_MODE append COPY_SCHEDULE 45 */2 * * * - From a82e25b714defdf169a35ac48f13bc2c1409d7c0 Mon Sep 17 00:00:00 2001 From: Umberto Sgueglia Date: Wed, 11 Mar 2026 12:09:28 +0100 Subject: [PATCH 8/9] feat: test signed commits Signed-off-by: Umberto Sgueglia --- .../pipes/activities_deduplicated_copy_pipe_append_mode.pipe | 1 + 1 file changed, 1 insertion(+) diff --git a/services/libs/tinybird/pipes/activities_deduplicated_copy_pipe_append_mode.pipe b/services/libs/tinybird/pipes/activities_deduplicated_copy_pipe_append_mode.pipe index 34f152704c..55ee85c3b2 100644 --- a/services/libs/tinybird/pipes/activities_deduplicated_copy_pipe_append_mode.pipe +++ b/services/libs/tinybird/pipes/activities_deduplicated_copy_pipe_append_mode.pipe @@ -26,3 +26,4 @@ TYPE COPY TARGET_DATASOURCE activities_deduplicated_ds COPY_MODE append COPY_SCHEDULE 45 */2 * * * + From c00322091bb5f27ce7edafd4c293700d6ddb50d3 Mon Sep 17 00:00:00 2001 From: Umberto Sgueglia Date: Wed, 11 Mar 2026 12:10:44 +0100 Subject: [PATCH 9/9] feat: test signed commits Signed-off-by: Umberto Sgueglia --- .../pipes/activities_deduplicated_copy_pipe_append_mode.pipe | 1 - 1 file changed, 1 deletion(-) diff --git a/services/libs/tinybird/pipes/activities_deduplicated_copy_pipe_append_mode.pipe b/services/libs/tinybird/pipes/activities_deduplicated_copy_pipe_append_mode.pipe index 55ee85c3b2..34f152704c 100644 --- a/services/libs/tinybird/pipes/activities_deduplicated_copy_pipe_append_mode.pipe +++ b/services/libs/tinybird/pipes/activities_deduplicated_copy_pipe_append_mode.pipe @@ -26,4 +26,3 @@ TYPE COPY TARGET_DATASOURCE activities_deduplicated_ds COPY_MODE append COPY_SCHEDULE 45 */2 * * * -