Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 23 additions & 3 deletions definitions/output/reports/tech_report_categories.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,26 @@ category_descriptions AS (
FROM ${ctx.ref('wappalyzer', 'categories')}
),

crux AS (
SELECT
IF(device = 'desktop', 'desktop', 'mobile') AS client,
CONCAT(origin, '/') AS root_page
FROM ${ctx.ref('chrome-ux-report', 'materialized', 'device_summary')}
WHERE
date = '${pastMonth}'
AND device IN ('desktop', 'phone')
),

merged_pages AS (
SELECT DISTINCT
client,
technologies,
root_page
FROM pages
INNER JOIN crux
USING (client, root_page)
),

category_stats AS (
SELECT
category,
Expand All @@ -35,8 +55,8 @@ category_stats AS (
client,
category,
COUNT(DISTINCT root_page) AS origins
FROM pages
INNER JOIN pages.technologies AS tech
FROM merged_pages
INNER JOIN merged_pages.technologies AS tech
INNER JOIN tech.categories AS category
Comment on lines +59 to 60
Copy link
Copy Markdown
Member

@tunetheweb tunetheweb Apr 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should these be outer joins for pages with no technologies?

Copy link
Copy Markdown
Contributor Author

@max-ostapenko max-ostapenko Apr 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We aggregate by known categories here, so pages without any technologies will be excluded here.

If not here, then in the next steps:

  • INNER JOIN technology_stats ON category_stats.category IN UNNEST(technology_stats.categories)
  • or INNER JOIN category_descriptions USING (category)

For the pages without any technologies (and thus no categories) we have a part after UNION ALL (based on merged_pages).

WHERE
category IS NOT NULL
Expand Down Expand Up @@ -87,7 +107,7 @@ FROM (
SELECT
client,
COUNT(DISTINCT root_page) AS origins
FROM pages
FROM merged_pages
GROUP BY client
)
`).postOps(ctx => `
Expand Down
57 changes: 14 additions & 43 deletions definitions/output/reports/tech_report_technologies.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,36 +5,17 @@ publish('tech_report_technologies', {
type: 'table',
tags: ['tech_report']
}).query(ctx => `
WITH pages AS (
SELECT DISTINCT
client,
root_page,
tech.technology
FROM ${ctx.ref('crawl', 'pages')} AS pages
INNER JOIN pages.technologies AS tech
WHERE
date = '${pastMonth}'
${constants.devRankFilter}
),

tech_origins AS (
WITH tech_origins AS (
SELECT
technology,
STRUCT(
MAX(IF(client = 'desktop', origins, 0)) AS desktop,
MAX(IF(client = 'mobile', origins, 0)) AS mobile
) AS origins
FROM (
SELECT
client,
technology,
COUNT(DISTINCT root_page) AS origins
FROM pages
GROUP BY
client,
technology
)
GROUP BY technology
technology,
adoption AS origins
FROM ${ctx.ref('reports', 'tech_report_adoption')}
WHERE
date = '${pastMonth}'
AND rank = 'ALL'
AND geo = 'ALL'
AND version = 'ALL'
${constants.devRankFilter}
),

technologies AS (
Expand All @@ -51,14 +32,6 @@ technologies AS (
description,
categories,
icon
),

total_pages AS (
SELECT
client,
COUNT(DISTINCT root_page) AS origins
FROM pages
GROUP BY client
)

SELECT
Expand All @@ -75,16 +48,14 @@ USING(technology)
UNION ALL

SELECT
'ALL' AS technology,
technology,
NULL AS description,
NULL AS icon,
NULL AS category,
NULL AS category_obj,
STRUCT(
MAX(IF(client = 'desktop', origins, 0)) AS desktop,
MAX(IF(client = 'mobile', origins, 0)) AS mobile
) AS origins
FROM total_pages
origins
FROM tech_origins
WHERE technology = 'ALL'
Comment thread
tunetheweb marked this conversation as resolved.
`).postOps(ctx => `
SELECT
reports.run_export_job(
Expand Down
59 changes: 7 additions & 52 deletions definitions/output/reports/tech_report_versions.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,61 +5,16 @@ publish('tech_report_versions', {
type: 'table',
tags: ['tech_report']
}).query(ctx => `
WITH pages AS (
SELECT DISTINCT
client,
root_page,
tech.technology,
REGEXP_EXTRACT(version, r'\\d+(?:\\.\\d+)?') AS version
FROM ${ctx.ref('crawl', 'pages')} AS pages
INNER JOIN pages.technologies AS tech
LEFT JOIN tech.info AS version
WHERE
date = '${pastMonth}'
${constants.devRankFilter} AND
tech.technology IS NOT NULL
),

version_origins AS (
SELECT
client,
technology,
version,
COUNT(DISTINCT root_page) AS origins
FROM pages
WHERE version IS NOT NULL
GROUP BY
client,
technology,
version
),

total_origins AS (
SELECT
client,
technology,
COUNT(DISTINCT root_page) AS origins
FROM pages
GROUP BY
client,
technology
)

SELECT
client,
technology,
version,
origins
FROM version_origins

UNION ALL

SELECT
client,
technology,
'ALL' AS version,
origins
FROM total_origins
adoption AS origins
FROM ${ctx.ref('reports', 'tech_report_adoption')}
WHERE
date = '${pastMonth}'
AND rank = 'ALL'
AND geo = 'ALL'
${constants.devRankFilter}
`).postOps(ctx => `
SELECT
reports.run_export_job(
Expand Down