From d4a40fba65bf3f4f6df6a0ab63ec0a11e073c747 Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Tue, 15 Apr 2025 19:42:51 +0200 Subject: [PATCH 1/5] versions --- .../output/reports/tech_report_versions.js | 59 +++---------------- 1 file changed, 7 insertions(+), 52 deletions(-) diff --git a/definitions/output/reports/tech_report_versions.js b/definitions/output/reports/tech_report_versions.js index 2ddd8266..a1d1a2ae 100644 --- a/definitions/output/reports/tech_report_versions.js +++ b/definitions/output/reports/tech_report_versions.js @@ -5,61 +5,16 @@ publish('tech_report_versions', { type: 'table', tags: ['tech_report'] }).query(ctx => ` -WITH pages AS ( - SELECT DISTINCT - client, - root_page, - tech.technology, - REGEXP_EXTRACT(version, r'\\d+(?:\\.\\d+)?') AS version - FROM ${ctx.ref('crawl', 'pages')} AS pages - INNER JOIN pages.technologies AS tech - LEFT JOIN tech.info AS version - WHERE - date = '${pastMonth}' - ${constants.devRankFilter} AND - tech.technology IS NOT NULL -), - -version_origins AS ( - SELECT - client, - technology, - version, - COUNT(DISTINCT root_page) AS origins - FROM pages - WHERE version IS NOT NULL - GROUP BY - client, - technology, - version -), - -total_origins AS ( - SELECT - client, - technology, - COUNT(DISTINCT root_page) AS origins - FROM pages - GROUP BY - client, - technology -) - SELECT - client, technology, version, - origins -FROM version_origins - -UNION ALL - -SELECT - client, - technology, - 'ALL' AS version, - origins -FROM total_origins + adoption AS origins +FROM ${ctx.ref('reports', 'tech_report_adoption')} +WHERE + date = '${pastMonth}' + AND rank = 'ALL' + AND geo = 'ALL' + ${constants.devRankFilter} `).postOps(ctx => ` SELECT reports.run_export_job( From 0f03d64423377a646400782e7b7427b115017511 Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Tue, 15 Apr 2025 19:43:01 +0200 Subject: [PATCH 2/5] technologies --- .../reports/tech_report_technologies.js | 57 +++++-------------- 1 file changed, 14 insertions(+), 43 deletions(-) diff --git a/definitions/output/reports/tech_report_technologies.js b/definitions/output/reports/tech_report_technologies.js index 9a1b7ae2..2d3a0b6c 100644 --- a/definitions/output/reports/tech_report_technologies.js +++ b/definitions/output/reports/tech_report_technologies.js @@ -5,36 +5,17 @@ publish('tech_report_technologies', { type: 'table', tags: ['tech_report'] }).query(ctx => ` -WITH pages AS ( - SELECT DISTINCT - client, - root_page, - tech.technology - FROM ${ctx.ref('crawl', 'pages')} AS pages - INNER JOIN pages.technologies AS tech - WHERE - date = '${pastMonth}' - ${constants.devRankFilter} -), - -tech_origins AS ( +WITH tech_origins AS ( SELECT - technology, - STRUCT( - MAX(IF(client = 'desktop', origins, 0)) AS desktop, - MAX(IF(client = 'mobile', origins, 0)) AS mobile - ) AS origins - FROM ( - SELECT - client, - technology, - COUNT(DISTINCT root_page) AS origins - FROM pages - GROUP BY - client, - technology - ) - GROUP BY technology + technology, + adoption AS origins +FROM ${ctx.ref('reports', 'tech_report_adoption')} +WHERE + date = '${pastMonth}' + AND rank = 'ALL' + AND geo = 'ALL' + AND version = 'ALL' + ${constants.devRankFilter} ), technologies AS ( @@ -51,14 +32,6 @@ technologies AS ( description, categories, icon -), - -total_pages AS ( - SELECT - client, - COUNT(DISTINCT root_page) AS origins - FROM pages - GROUP BY client ) SELECT @@ -75,16 +48,14 @@ USING(technology) UNION ALL SELECT - 'ALL' AS technology, + technology, NULL AS description, NULL AS icon, NULL AS category, NULL AS category_obj, - STRUCT( - MAX(IF(client = 'desktop', origins, 0)) AS desktop, - MAX(IF(client = 'mobile', origins, 0)) AS mobile - ) AS origins -FROM total_pages + origins +FROM tech_origins +WHERE technology = 'ALL' `).postOps(ctx => ` SELECT reports.run_export_job( From 45160790a8fa3acb640375bd3d99cbb04c44e30c Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Mon, 21 Apr 2025 14:18:39 +0200 Subject: [PATCH 3/5] categories --- .../output/reports/tech_report_categories.js | 33 +++++++++++++++---- 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/definitions/output/reports/tech_report_categories.js b/definitions/output/reports/tech_report_categories.js index 62ffe257..42ee925d 100644 --- a/definitions/output/reports/tech_report_categories.js +++ b/definitions/output/reports/tech_report_categories.js @@ -8,11 +8,12 @@ publish('tech_report_categories', { WITH pages AS ( SELECT DISTINCT client, - root_page, - technologies - FROM ${ctx.ref('crawl', 'pages')} + technologies, + root_page + FROM ${ctx.ref('crawl', 'pages')} AS pages WHERE date = '${pastMonth}' + AND technologies IS NOT NULL ${constants.devRankFilter} ), @@ -23,6 +24,26 @@ category_descriptions AS ( FROM ${ctx.ref('wappalyzer', 'categories')} ), +crux AS ( + SELECT + IF(device = 'desktop', 'desktop', 'mobile') AS client, + CONCAT(origin, '/') AS root_page + FROM ${ctx.ref('chrome-ux-report', 'materialized', 'device_summary')} + WHERE + date = '${pastMonth}' + AND device IN ('desktop', 'phone') +), + +merged_pages AS ( + SELECT DISTINCT + client, + technologies, + root_page + FROM pages + INNER JOIN crux + USING (client, root_page) +), + category_stats AS ( SELECT category, @@ -35,8 +56,8 @@ category_stats AS ( client, category, COUNT(DISTINCT root_page) AS origins - FROM pages - INNER JOIN pages.technologies AS tech + FROM merged_pages + INNER JOIN merged_pages.technologies AS tech INNER JOIN tech.categories AS category WHERE category IS NOT NULL @@ -87,7 +108,7 @@ FROM ( SELECT client, COUNT(DISTINCT root_page) AS origins - FROM pages + FROM merged_pages GROUP BY client ) `).postOps(ctx => ` From 52513b4bfadc8a372cfa2b20dac6edeae5c9b702 Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Tue, 22 Apr 2025 19:05:20 +0200 Subject: [PATCH 4/5] Update definitions/output/reports/tech_report_categories.js Co-authored-by: Barry Pollard --- definitions/output/reports/tech_report_categories.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/definitions/output/reports/tech_report_categories.js b/definitions/output/reports/tech_report_categories.js index 42ee925d..67316a9c 100644 --- a/definitions/output/reports/tech_report_categories.js +++ b/definitions/output/reports/tech_report_categories.js @@ -8,9 +8,9 @@ publish('tech_report_categories', { WITH pages AS ( SELECT DISTINCT client, - technologies, - root_page - FROM ${ctx.ref('crawl', 'pages')} AS pages + root_page, + technologies + FROM ${ctx.ref('crawl', 'pages')} WHERE date = '${pastMonth}' AND technologies IS NOT NULL From 80422e02a6ca96007f47113a84f2d4f0001875b8 Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Tue, 22 Apr 2025 20:17:26 +0200 Subject: [PATCH 5/5] all pages in categories = 'ALL' --- definitions/output/reports/tech_report_categories.js | 1 - 1 file changed, 1 deletion(-) diff --git a/definitions/output/reports/tech_report_categories.js b/definitions/output/reports/tech_report_categories.js index 67316a9c..9f5e865e 100644 --- a/definitions/output/reports/tech_report_categories.js +++ b/definitions/output/reports/tech_report_categories.js @@ -13,7 +13,6 @@ WITH pages AS ( FROM ${ctx.ref('crawl', 'pages')} WHERE date = '${pastMonth}' - AND technologies IS NOT NULL ${constants.devRankFilter} ),