From 19c40c04ec1546de1a9c6a6c0bc7de26a4d1e1ee Mon Sep 17 00:00:00 2001 From: Eason WaveKat Date: Sun, 17 May 2026 20:40:39 +1200 Subject: [PATCH] feat: add lite audience analytics to star-tracker Logs each chart-svg and HTML page request into a daily-aggregated views_daily table (referer host, country from cf.country, UA class, 200/304). Rendered as an "Audience" panel on the owner's tenant page: 30-day sparkline, top referers, top countries, client mix. No IPs or raw user-agents stored. Owner self-views and our own domain (in any env) are excluded. Co-Authored-By: Claude Opus 4.7 (1M context) --- tools/star-tracker/migrations/0005_views.sql | 20 +++ tools/star-tracker/src/db.ts | 133 +++++++++++++++++++ tools/star-tracker/src/index.ts | 66 ++++++++- tools/star-tracker/src/pages.ts | 79 ++++++++++- 4 files changed, 295 insertions(+), 3 deletions(-) create mode 100644 tools/star-tracker/migrations/0005_views.sql diff --git a/tools/star-tracker/migrations/0005_views.sql b/tools/star-tracker/migrations/0005_views.sql new file mode 100644 index 0000000..4967583 --- /dev/null +++ b/tools/star-tracker/migrations/0005_views.sql @@ -0,0 +1,20 @@ +-- Lite analytics for chart-svg and HTML page requests so the owner can +-- see where their embedded charts (and tenant/repo pages) are being +-- viewed from. Aggregated daily rollups instead of per-event rows keep +-- D1 footprint bounded — one row per (tenant, repo, kind, day, referer, +-- country, ua_class, cached) tuple, incremented in place. No IPs, no +-- raw user-agents. +CREATE TABLE IF NOT EXISTS views_daily ( + tenant_slug TEXT NOT NULL, + repo TEXT NOT NULL DEFAULT '', -- '' = tenant-scoped (org chart / org page) + kind TEXT NOT NULL, -- 'chart' | 'page' + day TEXT NOT NULL, -- 'YYYY-MM-DD' UTC + referer_host TEXT NOT NULL DEFAULT '', -- '' = direct / no referer + country TEXT NOT NULL DEFAULT '', -- ISO 3166-1 alpha-2 from cf.country, '' if unknown + ua_class TEXT NOT NULL DEFAULT 'other',-- 'camo' | 'bot' | 'browser' | 'other' + cached INTEGER NOT NULL DEFAULT 0, -- 1 = 304 served, 0 = body served + count INTEGER NOT NULL DEFAULT 0, + PRIMARY KEY (tenant_slug, repo, kind, day, referer_host, country, ua_class, cached) +); + +CREATE INDEX IF NOT EXISTS idx_views_tenant_day ON views_daily(tenant_slug, day); diff --git a/tools/star-tracker/src/db.ts b/tools/star-tracker/src/db.ts index 0fa820c..8a78ac5 100644 --- a/tools/star-tracker/src/db.ts +++ b/tools/star-tracker/src/db.ts @@ -529,3 +529,136 @@ export async function listAllTenants(db: D1Database): Promise { const { results } = await db.prepare('SELECT * FROM tenants').all(); return results ?? []; } + +// -- Lite analytics --------------------------------------------------------- + +export type ViewKind = 'chart' | 'page'; +export type UAClass = 'camo' | 'bot' | 'browser' | 'other'; + +// Day bucket for the views_daily table — UTC YYYY-MM-DD. +export function utcDay(ms: number): string { + return new Date(ms).toISOString().slice(0, 10); +} + +// Increment-or-insert one view. Called via ctx.waitUntil so the response +// goes out before the write completes — keeps chart-svg latency identical +// to pre-analytics. Repo is '' for tenant-scoped requests. +export async function logView( + db: D1Database, + v: { + tenant: string; + repo: string; + kind: ViewKind; + day: string; + referer_host: string; + country: string; + ua_class: UAClass; + cached: 0 | 1; + }, +): Promise { + await db + .prepare( + `INSERT INTO views_daily (tenant_slug, repo, kind, day, referer_host, country, ua_class, cached, count) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, 1) + ON CONFLICT(tenant_slug, repo, kind, day, referer_host, country, ua_class, cached) + DO UPDATE SET count = count + 1`, + ) + .bind(v.tenant, v.repo, v.kind, v.day, v.referer_host, v.country, v.ua_class, v.cached) + .run(); +} + +export type ViewsSummary = { + windowDays: number; + totalChart: number; + totalPage: number; + cachedChart: number; + freshChart: number; + uaBreakdown: { ua_class: UAClass; count: number }[]; + topReferers: { host: string; count: number }[]; // chart + page combined + topCountries: { country: string; count: number }[]; + daily: { day: string; chart: number; page: number }[]; // ascending day +}; + +// Compact analytics summary used by the owner's tenant page. One pass of +// the day range pulls everything we need; the panel does no further work. +export async function viewsSummary( + db: D1Database, + tenant: string, + windowDays: number, + now: number, +): Promise { + const cutoffDay = utcDay(now - (windowDays - 1) * 86400_000); + + const { results } = await db + .prepare( + `SELECT repo, kind, day, referer_host, country, ua_class, cached, count + FROM views_daily + WHERE tenant_slug = ? AND day >= ?`, + ) + .bind(tenant, cutoffDay) + .all<{ + repo: string; + kind: ViewKind; + day: string; + referer_host: string; + country: string; + ua_class: UAClass; + cached: number; + count: number; + }>(); + + let totalChart = 0; + let totalPage = 0; + let cachedChart = 0; + let freshChart = 0; + const ua = new Map(); + const refs = new Map(); + const countries = new Map(); + const dailyMap = new Map(); + + for (const r of results ?? []) { + if (r.kind === 'chart') { + totalChart += r.count; + if (r.cached) cachedChart += r.count; + else freshChart += r.count; + } else { + totalPage += r.count; + } + ua.set(r.ua_class, (ua.get(r.ua_class) ?? 0) + r.count); + if (r.referer_host) refs.set(r.referer_host, (refs.get(r.referer_host) ?? 0) + r.count); + if (r.country) countries.set(r.country, (countries.get(r.country) ?? 0) + r.count); + const d = dailyMap.get(r.day) ?? { chart: 0, page: 0 }; + if (r.kind === 'chart') d.chart += r.count; + else d.page += r.count; + dailyMap.set(r.day, d); + } + + // Fill in zero-days so the sparkline has a continuous baseline. + const daily: ViewsSummary['daily'] = []; + for (let i = windowDays - 1; i >= 0; i--) { + const day = utcDay(now - i * 86400_000); + const v = dailyMap.get(day) ?? { chart: 0, page: 0 }; + daily.push({ day, ...v }); + } + + const toSorted = (m: Map) => + Array.from(m.entries()) + .map(([k, count]) => ({ k, count })) + .sort((a, b) => b.count - a.count); + + const uaSorted = toSorted(ua).map((x) => ({ ua_class: x.k, count: x.count })); + const refSorted = toSorted(refs).slice(0, 8).map((x) => ({ host: x.k, count: x.count })); + const countrySorted = toSorted(countries).slice(0, 8).map((x) => ({ country: x.k, count: x.count })); + + return { + windowDays, + totalChart, + totalPage, + cachedChart, + freshChart, + uaBreakdown: uaSorted, + topReferers: refSorted, + topCountries: countrySorted, + daily, + }; +} diff --git a/tools/star-tracker/src/index.ts b/tools/star-tracker/src/index.ts index c42f613..57ac984 100644 --- a/tools/star-tracker/src/index.ts +++ b/tools/star-tracker/src/index.ts @@ -108,6 +108,63 @@ function requireUser(c: any): db.User | Response { return u; } +// Fire-and-forget view logger for chart-svg and HTML page requests. +// Skips owner self-views so the owner's tenant page (which embeds the +// chart preview) doesn't pollute their own analytics. We never store +// IPs or raw user-agents — referer is reduced to hostname, UA to a +// coarse class (camo/bot/browser/other), and country comes from +// Cloudflare's edge metadata. +function recordView( + c: any, + tenant: string, + repo: string, + kind: db.ViewKind, + cached: 0 | 1, + ownerUserId: string, +): void { + const user = c.get('user') as db.User | null; + if (user && user.id === ownerUserId) return; + + const ref = c.req.header('referer') ?? c.req.header('referrer') ?? ''; + let host = ''; + try { host = new URL(ref).hostname.toLowerCase(); } catch { /* no/invalid referer */ } + // Don't count our own pages as referers — only off-site embeds matter. + // Check both PUBLIC_URL (prod) and the current request host (covers + // localhost:8787 during wrangler dev, preview URLs, etc.). + if (host) { + try { + const ownHost = new URL(c.env.PUBLIC_URL).hostname.toLowerCase(); + if (host === ownHost) host = ''; + } catch { /* PUBLIC_URL missing */ } + } + if (host) { + try { + const reqHost = new URL(c.req.url).hostname.toLowerCase(); + if (host === reqHost) host = ''; + } catch { /* unreachable — c.req.url is always valid */ } + } + + const country = (((c.req.raw as Request).cf as { country?: string } | undefined)?.country ?? '').toUpperCase(); + const ua = (c.req.header('user-agent') ?? '').toLowerCase(); + let uaClass: db.UAClass = 'other'; + if (ua.includes('github-camo')) uaClass = 'camo'; + else if (/bot|crawler|spider|preview|fetcher|monitor|slurp|facebookexternalhit|discordbot|telegrambot|whatsapp|twitterbot|linkedinbot/.test(ua)) uaClass = 'bot'; + else if (ua.includes('mozilla')) uaClass = 'browser'; + + c.executionCtx.waitUntil( + db.logView(c.env.DB, { + tenant, + repo, + kind, + day: db.utcDay(Date.now()), + referer_host: host, + country, + ua_class: uaClass, + cached, + }), + ); +} + // Syncs one repo, choosing exact vs sampled based on stargazers_count. // Returns the chosen mode + counts so callers can build flash messages. // Private repos return mode 'private' — we ensure the row exists and tag @@ -382,15 +439,17 @@ app.get('/:slug', async (c) => { const timeline = await db.tenantTimeline(c.env.DB, slug); const counts = await db.eventCountsByType(c.env.DB, slug); const recent = await db.tenantRecentByRepo(c.env.DB, slug, now); + const views = await db.viewsSummary(c.env.DB, slug, 30, now); const flash = takeFlash(c); return c.html( - pages.tenantDetail(user, tenant, c.env.PUBLIC_URL, repos, timeline.length, counts, recent, flash?.justCreated, flash?.msg), + pages.tenantDetail(user, tenant, c.env.PUBLIC_URL, repos, timeline.length, counts, recent, views, flash?.justCreated, flash?.msg), ); } const repos = await db.listTenantRepos(c.env.DB, slug); const timeline = await db.tenantTimeline(c.env.DB, slug); const recent = await db.tenantRecentByRepo(c.env.DB, slug, now); + recordView(c, slug, '', 'page', 0, tenant.owner_user_id); return c.html(pages.publicOrg(user, tenant, c.env.PUBLIC_URL, repos, timeline.length, recent)); }); @@ -488,6 +547,7 @@ app.get('/:slug/chart.svg', async (c) => { const reactiveTs = Math.max(latestTs, privTs); const etag = `"${theme}.${splitN}.${style}.${rangeRaw || 'all'}.${reactiveTs}.${now}.${djb2(title)}"`; if (c.req.header('if-none-match') === etag) { + recordView(c, slug, '', 'chart', 1, tenant.owner_user_id); return new Response(null, { status: 304, headers: { etag, 'cache-control': 'public, max-age=300' } }); } @@ -500,6 +560,7 @@ app.get('/:slug/chart.svg', async (c) => { tMinOverride: rangeStart ?? undefined, }); + recordView(c, slug, '', 'chart', 0, tenant.owner_user_id); return new Response(svg, { headers: { 'content-type': 'image/svg+xml; charset=utf-8', @@ -545,6 +606,7 @@ app.get('/:slug/:repo/chart.svg', async (c) => { const reactiveTs = Math.max(latestTs, privTs); const etag = `"r1.${theme}.${style}.${rangeRaw || 'all'}.${reactiveTs}.${now}.${djb2(title)}"`; if (c.req.header('if-none-match') === etag) { + recordView(c, slug, fullName, 'chart', 1, tenant.owner_user_id); return new Response(null, { status: 304, headers: { etag, 'cache-control': 'public, max-age=300' } }); } @@ -557,6 +619,7 @@ app.get('/:slug/:repo/chart.svg', async (c) => { tMinOverride: rangeStart ?? undefined, }); + recordView(c, slug, fullName, 'chart', 0, tenant.owner_user_id); return new Response(svg, { headers: { 'content-type': 'image/svg+xml; charset=utf-8', @@ -582,6 +645,7 @@ app.get('/:slug/:repo', async (c) => { const series = all.find((r) => r.repo === fullName); const total = series?.total ?? 0; const gains = db.recentForSeries(series?.points ?? [], total, Date.now()); + recordView(c, slug, fullName, 'page', 0, tenant.owner_user_id); return c.html(pages.repoDetail(c.get('user'), tenant, repoRow, total, gains, c.env.PUBLIC_URL)); }); diff --git a/tools/star-tracker/src/pages.ts b/tools/star-tracker/src/pages.ts index e09c253..70c30d4 100644 --- a/tools/star-tracker/src/pages.ts +++ b/tools/star-tracker/src/pages.ts @@ -1,7 +1,7 @@ // Server-rendered HTML pages. Inline CSS, no JS — keeps the bundle small and // the UX dependable inside a Worker. -import type { EventCounts, RepoRecent, RepoRow, Tenant, User } from './db'; +import type { EventCounts, RepoRecent, RepoRow, Tenant, User, ViewsSummary } from './db'; // Human-friendly relative timestamp ("3 minutes ago"). Used for webhook // status — absolute UTC strings are precise but require mental math; "5 @@ -544,7 +544,80 @@ function webhookStatusBlock(tenant: Tenant, counts: EventCounts): string { `; } -export function tenantDetail(user: User, tenant: Tenant, publicUrl: string, repos: RepoRow[], totalStars: number, counts: EventCounts, recent: RepoRecent[], justCreated?: boolean, flash?: string): string { +// Owner-only audience panel. Aggregated daily counts of chart-svg and +// HTML page requests, broken down by referer host, country, UA class, +// and cache hit/miss. Pure HTML/SVG — no JS. Caveat at the top is +// honest about Camo and our own 5-minute cache undercounting. +function viewsPanel(views: ViewsSummary): string { + const total = views.totalChart + views.totalPage; + if (total === 0) { + return `

Audience

+
+

No chart or page requests recorded in the last ${views.windowDays} days yet. Once your chart is embedded somewhere — a README, a blog post — accesses start appearing here.

+
`; + } + + // Sparkline: stacked bars, page below, chart above. SVG width fits 30 days. + const days = views.daily; + const maxDay = Math.max(1, ...days.map((d) => d.chart + d.page)); + const barW = 8; + const barGap = 2; + const w = days.length * (barW + barGap); + const h = 40; + const bars = days.map((d, i) => { + const x = i * (barW + barGap); + const totalDay = d.chart + d.page; + const totalH = Math.round((totalDay / maxDay) * h); + const pageH = Math.round((d.page / maxDay) * h); + const chartH = totalH - pageH; + return ` + ${esc(d.day)}: ${d.chart.toLocaleString('en-US')} chart · ${d.page.toLocaleString('en-US')} page + ${pageH > 0 ? `` : ''} + ${chartH > 0 ? `` : ''} + `; + }).join(''); + const sparkline = `${bars}`; + + const refRows = views.topReferers.length === 0 + ? `

No off-site referers yet — most embeds are fetched without a Referer header (GitHub Camo strips it).

` + : `
    ${views.topReferers.map((r) => `
  • ${r.count.toLocaleString('en-US')} ${esc(r.host)}
  • `).join('')}
`; + + const countryRows = views.topCountries.length === 0 + ? `

No country data yet.

` + : `
    ${views.topCountries.map((cn) => `
  • ${cn.count.toLocaleString('en-US')} ${esc(cn.country)}
  • `).join('')}
`; + + const uaLabels: Record = { + camo: 'GitHub Camo (README embeds)', + browser: 'Browser', + bot: 'Bot / crawler', + other: 'Other', + }; + const uaRows = views.uaBreakdown.length === 0 + ? '' + : `
    ${views.uaBreakdown.map((u) => `
  • ${u.count.toLocaleString('en-US')} ${esc(uaLabels[u.ua_class] ?? u.ua_class)}
  • `).join('')}
`; + + const cacheLine = views.totalChart > 0 + ? `

${views.freshChart.toLocaleString('en-US')} fresh · ${views.cachedChart.toLocaleString('en-US')} cached (304). High 304 ratio means GitHub Camo or browsers re-validated the same image.

` + : ''; + + return `

Audience · last ${views.windowDays} days

+
+

Approximate — chart SVGs are cached for 5 minutes at the edge, and GitHub's Camo proxy fronts most README embeds. Referer mix and country mix stay meaningful even when totals undercount.

+
+
Chart views
${views.totalChart.toLocaleString('en-US')}${cacheLine}
+
Page views
${views.totalPage.toLocaleString('en-US')}
+
+
${sparkline}
+

chart · page · ${views.daily[0]?.day ?? ''} → ${views.daily[views.daily.length - 1]?.day ?? ''}

+

Top referers

+ ${refRows} +

Top countries

+ ${countryRows} + ${uaRows ? `

Client mix

${uaRows}` : ''} +
`; +} + +export function tenantDetail(user: User, tenant: Tenant, publicUrl: string, repos: RepoRow[], totalStars: number, counts: EventCounts, recent: RepoRecent[], views: ViewsSummary, justCreated?: boolean, flash?: string): string { const webhookUrl = `${publicUrl}/webhook`; const chartSvg = `${publicUrl}/${tenant.slug}/chart.svg`; // Embed snippets wrap the chart in a link back to the org's stars page @@ -584,6 +657,8 @@ ${chartBlock(tenant.slug, tenant.display_name, chartSvg, orgPage, totalStars, te ${recentActivityBlock(recent)} +${viewsPanel(views)} +

1. GitHub webhook

${secretBlock}

Add at https://github.com/organizations/${esc(tenant.slug)}/settings/hooks (or per-repo settings for a personal account):