#!/usr/bin/env bun /** * Get DE Wastewater Surveillance Data (AMELAG / RKI + Umweltbundesamt) * * Fetches the population-weighted nationwide aggregated viral load time series * from the AMELAG GitHub repository and produces: * - Data/DE-Wastewater-Surveillance/wastewater-latest.csv * * Source: https://github.com/robert-koch-institut/Abwassersurveillance_AMELAG * Data: amelag_aggregierte_kurve.tsv (weekly, population-weighted national aggregate) * License: CC-BY 4.0 (Robert Koch-Institut / Umweltbundesamt) * No authentication required. * * Strategy: * 1. Fetch the aggregated TSV from GitHub raw. * 2. Parse all pathogen types (SARS-CoV-2, Influenza A/B, RSV A/B). * 3. For each pathogen, compute latest value, 4-week trend, and peak. * 4. Write a tidy CSV with all data points. * 5. Print a summary with current status and trends. */ import { writeFileSync, mkdirSync } from "fs"; import { join } from "path"; const AMELAG_URL = "https://raw.githubusercontent.com/robert-koch-institut/Abwassersurveillance_AMELAG/main/amelag_aggregierte_kurve.tsv"; const OUT_DIR = join(__dirname, "Data/DE-Wastewater-Surveillance"); // Pathogen types we report individually (others are combined variants) const PRIMARY_PATHOGENS = ["SARS-CoV-2", "Influenza A", "Influenza B", "RSV A", "RSV B"]; interface AmelagRow { datum: string; n: number; anteil_bev: number; viruslast: number; viruslast_normalisiert: number; vorhersage: number; obere_schranke: number; untere_schranke: number; typ: string; } function parseTSV(raw: string): AmelagRow[] { const lines = raw.trim().split("\n"); const headers = lines[0].split("\t"); const idx = (name: string) => { const i = headers.indexOf(name); if (i === -1) throw new Error(`Column '${name}' not found in TSV headers: ${headers.join(", ")}`); return i; }; const rows: AmelagRow[] = []; for (let i = 1; i < lines.length; i++) { const cols = lines[i].split("\t"); if (cols.length < headers.length) continue; const viruslast = parseFloat(cols[idx("viruslast")]); if (isNaN(viruslast)) continue; rows.push({ datum: cols[idx("datum")], n: parseInt(cols[idx("n")], 10), anteil_bev: parseFloat(cols[idx("anteil_bev")]), viruslast, viruslast_normalisiert: parseFloat(cols[idx("viruslast_normalisiert")]), vorhersage: parseFloat(cols[idx("vorhersage")]), obere_schranke: parseFloat(cols[idx("obere_schranke")]), untere_schranke: parseFloat(cols[idx("untere_schranke")]), typ: cols[idx("typ")], }); } return rows; } interface PathogenSummary { typ: string; latest_date: string; latest_viruslast: number; latest_vorhersage: number; sites_reporting: number; pop_coverage_pct: number; trend_4w_pct: number; peak_viruslast: number; peak_date: string; data_points: number; } function analyzePathogen(rows: AmelagRow[], typ: string): PathogenSummary | null { const filtered = rows .filter((r) => r.typ === typ) .sort((a, b) => a.datum.localeCompare(b.datum)); if (filtered.length === 0) return null; const latest = filtered[filtered.length - 1]; // 4-week trend: compare latest vorhersage to 4 weeks prior let trend_4w_pct = 0; if (filtered.length >= 5) { const fourWeeksAgo = filtered[filtered.length - 5]; if (fourWeeksAgo.vorhersage > 0) { trend_4w_pct = ((latest.vorhersage - fourWeeksAgo.vorhersage) / fourWeeksAgo.vorhersage) * 100; } } // Peak const peak = filtered.reduce((best, r) => r.viruslast > best.viruslast ? r : best ); return { typ, latest_date: latest.datum, latest_viruslast: latest.viruslast, latest_vorhersage: latest.vorhersage, sites_reporting: latest.n, pop_coverage_pct: latest.anteil_bev * 100, trend_4w_pct, peak_viruslast: peak.viruslast, peak_date: peak.datum, data_points: filtered.length, }; } function trendArrow(pct: number): string { if (pct > 20) return "↑↑"; if (pct > 5) return "↑"; if (pct > -5) return "→"; if (pct > -20) return "↓"; return "↓↓"; } async function main() { console.log("Fetching DE wastewater surveillance data (AMELAG)…\n"); mkdirSync(OUT_DIR, { recursive: true }); // Fetch TSV const res = await fetch(AMELAG_URL); if (!res.ok) throw new Error(`HTTP ${res.status} fetching AMELAG data`); const raw = await res.text(); const rows = parseTSV(raw); console.log(`Parsed ${rows.length} data points from AMELAG aggregated curve.\n`); // Get all unique pathogen types const allTypes = [...new Set(rows.map((r) => r.typ))].sort(); console.log(`Pathogen types found: ${allTypes.join(", ")}\n`); // Write full CSV (all pathogens, all data points) const csvHeader = "datum,typ,n_sites,pop_coverage_pct,viruslast_gc_per_l,viruslast_normalisiert,vorhersage_gam,obere_schranke,untere_schranke"; const csvRows = rows .sort((a, b) => a.datum.localeCompare(b.datum) || a.typ.localeCompare(b.typ)) .map( (r) => `${r.datum},${r.typ},${r.n},${(r.anteil_bev * 100).toFixed(2)},${r.viruslast.toFixed(2)},${r.viruslast_normalisiert.toFixed(2)},${r.vorhersage.toFixed(2)},${r.obere_schranke.toFixed(2)},${r.untere_schranke.toFixed(2)}` ); const csvPath = join(OUT_DIR, "wastewater-latest.csv"); writeFileSync(csvPath, [csvHeader, ...csvRows].join("\n") + "\n"); console.log(`Wrote ${csvPath} (${csvRows.length} rows)\n`); // Analyze primary pathogens const summaries: PathogenSummary[] = []; for (const typ of PRIMARY_PATHOGENS) { const s = analyzePathogen(rows, typ); if (s) summaries.push(s); } // Write summary CSV const summaryHeader = "typ,latest_date,latest_viruslast_gc_per_l,latest_vorhersage_gam,sites_reporting,pop_coverage_pct,trend_4w_pct,peak_viruslast,peak_date,data_points"; const summaryRows = summaries.map( (s) => `${s.typ},${s.latest_date},${s.latest_viruslast.toFixed(2)},${s.latest_vorhersage.toFixed(2)},${s.sites_reporting},${s.pop_coverage_pct.toFixed(2)},${s.trend_4w_pct.toFixed(1)},${s.peak_viruslast.toFixed(2)},${s.peak_date},${s.data_points}` ); const summaryPath = join(OUT_DIR, "wastewater-summary.csv"); writeFileSync(summaryPath, [summaryHeader, ...summaryRows].join("\n") + "\n"); console.log(`Wrote ${summaryPath}\n`); // Print console summary console.log("── Current Status ──────────────────────────────────────\n"); for (const s of summaries) { const arrow = trendArrow(s.trend_4w_pct); console.log( `${s.typ.padEnd(16)} ${arrow} ${s.trend_4w_pct > 0 ? "+" : ""}${s.trend_4w_pct.toFixed(1)}% ` + `Latest: ${s.latest_viruslast.toFixed(0)} gc/L ` + `GAM: ${s.latest_vorhersage.toFixed(0)} gc/L ` + `(${s.sites_reporting} sites, ${s.pop_coverage_pct.toFixed(1)}% pop) ` + `Peak: ${s.peak_viruslast.toFixed(0)} gc/L (${s.peak_date})` ); } const covid = summaries.find((s) => s.typ === "SARS-CoV-2"); if (covid) { const pctOfPeak = (covid.latest_viruslast / covid.peak_viruslast) * 100; console.log( `\nSARS-CoV-2 is at ${pctOfPeak.toFixed(1)}% of its all-time peak (${covid.peak_date}).` ); } console.log( "\nData: AMELAG (RKI + Umweltbundesamt), CC-BY 4.0" ); console.log("Units: gene copies per liter (gc/L), population-weighted national aggregate."); } main().catch((err) => { console.error("Error:", err.message); process.exit(1); });