From cf893f97a9bb20d13cdd949f284f302b67715754 Mon Sep 17 00:00:00 2001 From: Max Ostapenko Date: Wed, 20 Aug 2025 23:53:13 +0000 Subject: [PATCH 1/3] fix month --- .../output/reports/reports_html_elements_popularity.js | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/definitions/output/reports/reports_html_elements_popularity.js b/definitions/output/reports/reports_html_elements_popularity.js index fe5f4f94..fec88a01 100644 --- a/definitions/output/reports/reports_html_elements_popularity.js +++ b/definitions/output/reports/reports_html_elements_popularity.js @@ -1,5 +1,3 @@ -const pastMonth = constants.fnPastMonth(constants.currentMonth) - publish('html_elements_popularity', { schema: 'reports', type: 'incremental', @@ -18,7 +16,7 @@ try { '''; DELETE FROM ${ctx.self()} -WHERE date = '${pastMonth}'; +WHERE date = '${constants.currentMonth}'; `).query(ctx => ` WITH pages_data AS ( SELECT @@ -29,7 +27,7 @@ WITH pages_data AS ( custom_metrics.element_count FROM ${ctx.ref('crawl', 'pages')} WHERE - date = '${pastMonth}' ${constants.devRankFilter} + date = '${constants.currentMonth}' ${constants.devRankFilter} ), totals AS ( @@ -69,9 +67,9 @@ SELECT "destination": "cloud_storage", "config": { "bucket": "${constants.bucket}", - "name": "${constants.storagePath}${pastMonth.replaceAll('-', '_')}/htmlElementPopularity.json" + "name": "${constants.storagePath}${constants.currentMonth.replaceAll('-', '_')}/htmlElementPopularity.json" }, - "query": "SELECT * EXCEPT(date) FROM ${ctx.self()} WHERE date = '${pastMonth}'" + "query": "SELECT * EXCEPT(date) FROM ${ctx.self()} WHERE date = '${constants.currentMonth}'" }''' ); `) From cdad0da7592ea70316bfe17e6038b6a266e67795 Mon Sep 17 00:00:00 2001 From: Max Ostapenko Date: Wed, 20 Aug 2025 23:53:33 +0000 Subject: [PATCH 2/3] fix column order --- definitions/output/crawl/parsed_css.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/definitions/output/crawl/parsed_css.js b/definitions/output/crawl/parsed_css.js index 648c2fb8..ec3abd1e 100644 --- a/definitions/output/crawl/parsed_css.js +++ b/definitions/output/crawl/parsed_css.js @@ -17,8 +17,8 @@ WHERE date = '${constants.currentMonth}' `).query(ctx => ` SELECT * EXCEPT(css), - SAFE.PARSE_JSON(css, wide_number_mode=>'round') AS css, - NULL AS css_backup + NULL AS css_backup, + SAFE.PARSE_JSON(css, wide_number_mode=>'round') AS css FROM ${ctx.ref('crawl_staging', 'parsed_css')} WHERE date = '${constants.currentMonth}' AND client = 'desktop' @@ -31,8 +31,8 @@ WHERE date = '${constants.currentMonth}' INSERT INTO ${ctx.self()} SELECT * EXCEPT(css), - SAFE.PARSE_JSON(css, wide_number_mode=>'round') AS css, - NULL AS css_backup + NULL AS css_backup, + SAFE.PARSE_JSON(css, wide_number_mode=>'round') AS css FROM ${ctx.ref('crawl_staging', 'parsed_css')} WHERE date = '${constants.currentMonth}' AND client = 'mobile' From 36607a589f55baf9fbcc2fa12092ea35a578a3fd Mon Sep 17 00:00:00 2001 From: Max Ostapenko Date: Wed, 20 Aug 2025 23:53:53 +0000 Subject: [PATCH 3/3] pages_per_client assert --- definitions/output/crawl/pages.js | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/definitions/output/crawl/pages.js b/definitions/output/crawl/pages.js index 91fd3388..197789d2 100644 --- a/definitions/output/crawl/pages.js +++ b/definitions/output/crawl/pages.js @@ -30,6 +30,20 @@ ORDER BY cnt_pages DESC HAVING cnt_pages > 200 `) +assert('pages_per_client') + .tags(['crawl_complete']) + .query(ctx => ` +SELECT + client, + COUNT(DISTINCT page) AS cnt_pages +FROM ${ctx.ref('crawl_staging', 'pages')} +WHERE + date = '${constants.currentMonth}' +GROUP BY + client +HAVING cnt_pages < 20000000 + `) + publish('pages', { type: 'incremental', protected: true,