Files
Substrate/get-de-wastewater
svemagie 17d9ca0e37 feat: add DE-Wastewater-Surveillance dataset, DS-00018 source catalog, update script
RKI AMELAG wastewater surveillance data with CSV exports and bun fetch script.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-23 12:07:59 +02:00

220 lines
7.4 KiB
Plaintext
Executable File

#!/usr/bin/env bun
/**
* Get DE Wastewater Surveillance Data (AMELAG / RKI + Umweltbundesamt)
*
* Fetches the population-weighted nationwide aggregated viral load time series
* from the AMELAG GitHub repository and produces:
* - Data/DE-Wastewater-Surveillance/wastewater-latest.csv
*
* Source: https://github.com/robert-koch-institut/Abwassersurveillance_AMELAG
* Data: amelag_aggregierte_kurve.tsv (weekly, population-weighted national aggregate)
* License: CC-BY 4.0 (Robert Koch-Institut / Umweltbundesamt)
* No authentication required.
*
* Strategy:
* 1. Fetch the aggregated TSV from GitHub raw.
* 2. Parse all pathogen types (SARS-CoV-2, Influenza A/B, RSV A/B).
* 3. For each pathogen, compute latest value, 4-week trend, and peak.
* 4. Write a tidy CSV with all data points.
* 5. Print a summary with current status and trends.
*/
import { writeFileSync, mkdirSync } from "fs";
import { join } from "path";
const AMELAG_URL =
"https://raw.githubusercontent.com/robert-koch-institut/Abwassersurveillance_AMELAG/main/amelag_aggregierte_kurve.tsv";
const OUT_DIR = join(__dirname, "Data/DE-Wastewater-Surveillance");
// Pathogen types we report individually (others are combined variants)
const PRIMARY_PATHOGENS = ["SARS-CoV-2", "Influenza A", "Influenza B", "RSV A", "RSV B"];
interface AmelagRow {
datum: string;
n: number;
anteil_bev: number;
viruslast: number;
viruslast_normalisiert: number;
vorhersage: number;
obere_schranke: number;
untere_schranke: number;
typ: string;
}
function parseTSV(raw: string): AmelagRow[] {
const lines = raw.trim().split("\n");
const headers = lines[0].split("\t");
const idx = (name: string) => {
const i = headers.indexOf(name);
if (i === -1) throw new Error(`Column '${name}' not found in TSV headers: ${headers.join(", ")}`);
return i;
};
const rows: AmelagRow[] = [];
for (let i = 1; i < lines.length; i++) {
const cols = lines[i].split("\t");
if (cols.length < headers.length) continue;
const viruslast = parseFloat(cols[idx("viruslast")]);
if (isNaN(viruslast)) continue;
rows.push({
datum: cols[idx("datum")],
n: parseInt(cols[idx("n")], 10),
anteil_bev: parseFloat(cols[idx("anteil_bev")]),
viruslast,
viruslast_normalisiert: parseFloat(cols[idx("viruslast_normalisiert")]),
vorhersage: parseFloat(cols[idx("vorhersage")]),
obere_schranke: parseFloat(cols[idx("obere_schranke")]),
untere_schranke: parseFloat(cols[idx("untere_schranke")]),
typ: cols[idx("typ")],
});
}
return rows;
}
interface PathogenSummary {
typ: string;
latest_date: string;
latest_viruslast: number;
latest_vorhersage: number;
sites_reporting: number;
pop_coverage_pct: number;
trend_4w_pct: number;
peak_viruslast: number;
peak_date: string;
data_points: number;
}
function analyzePathogen(rows: AmelagRow[], typ: string): PathogenSummary | null {
const filtered = rows
.filter((r) => r.typ === typ)
.sort((a, b) => a.datum.localeCompare(b.datum));
if (filtered.length === 0) return null;
const latest = filtered[filtered.length - 1];
// 4-week trend: compare latest vorhersage to 4 weeks prior
let trend_4w_pct = 0;
if (filtered.length >= 5) {
const fourWeeksAgo = filtered[filtered.length - 5];
if (fourWeeksAgo.vorhersage > 0) {
trend_4w_pct =
((latest.vorhersage - fourWeeksAgo.vorhersage) / fourWeeksAgo.vorhersage) * 100;
}
}
// Peak
const peak = filtered.reduce((best, r) =>
r.viruslast > best.viruslast ? r : best
);
return {
typ,
latest_date: latest.datum,
latest_viruslast: latest.viruslast,
latest_vorhersage: latest.vorhersage,
sites_reporting: latest.n,
pop_coverage_pct: latest.anteil_bev * 100,
trend_4w_pct,
peak_viruslast: peak.viruslast,
peak_date: peak.datum,
data_points: filtered.length,
};
}
function trendArrow(pct: number): string {
if (pct > 20) return "↑↑";
if (pct > 5) return "↑";
if (pct > -5) return "→";
if (pct > -20) return "↓";
return "↓↓";
}
async function main() {
console.log("Fetching DE wastewater surveillance data (AMELAG)…\n");
mkdirSync(OUT_DIR, { recursive: true });
// Fetch TSV
const res = await fetch(AMELAG_URL);
if (!res.ok) throw new Error(`HTTP ${res.status} fetching AMELAG data`);
const raw = await res.text();
const rows = parseTSV(raw);
console.log(`Parsed ${rows.length} data points from AMELAG aggregated curve.\n`);
// Get all unique pathogen types
const allTypes = [...new Set(rows.map((r) => r.typ))].sort();
console.log(`Pathogen types found: ${allTypes.join(", ")}\n`);
// Write full CSV (all pathogens, all data points)
const csvHeader =
"datum,typ,n_sites,pop_coverage_pct,viruslast_gc_per_l,viruslast_normalisiert,vorhersage_gam,obere_schranke,untere_schranke";
const csvRows = rows
.sort((a, b) => a.datum.localeCompare(b.datum) || a.typ.localeCompare(b.typ))
.map(
(r) =>
`${r.datum},${r.typ},${r.n},${(r.anteil_bev * 100).toFixed(2)},${r.viruslast.toFixed(2)},${r.viruslast_normalisiert.toFixed(2)},${r.vorhersage.toFixed(2)},${r.obere_schranke.toFixed(2)},${r.untere_schranke.toFixed(2)}`
);
const csvPath = join(OUT_DIR, "wastewater-latest.csv");
writeFileSync(csvPath, [csvHeader, ...csvRows].join("\n") + "\n");
console.log(`Wrote ${csvPath} (${csvRows.length} rows)\n`);
// Analyze primary pathogens
const summaries: PathogenSummary[] = [];
for (const typ of PRIMARY_PATHOGENS) {
const s = analyzePathogen(rows, typ);
if (s) summaries.push(s);
}
// Write summary CSV
const summaryHeader =
"typ,latest_date,latest_viruslast_gc_per_l,latest_vorhersage_gam,sites_reporting,pop_coverage_pct,trend_4w_pct,peak_viruslast,peak_date,data_points";
const summaryRows = summaries.map(
(s) =>
`${s.typ},${s.latest_date},${s.latest_viruslast.toFixed(2)},${s.latest_vorhersage.toFixed(2)},${s.sites_reporting},${s.pop_coverage_pct.toFixed(2)},${s.trend_4w_pct.toFixed(1)},${s.peak_viruslast.toFixed(2)},${s.peak_date},${s.data_points}`
);
const summaryPath = join(OUT_DIR, "wastewater-summary.csv");
writeFileSync(summaryPath, [summaryHeader, ...summaryRows].join("\n") + "\n");
console.log(`Wrote ${summaryPath}\n`);
// Print console summary
console.log("── Current Status ──────────────────────────────────────\n");
for (const s of summaries) {
const arrow = trendArrow(s.trend_4w_pct);
console.log(
`${s.typ.padEnd(16)} ${arrow} ${s.trend_4w_pct > 0 ? "+" : ""}${s.trend_4w_pct.toFixed(1)}% ` +
`Latest: ${s.latest_viruslast.toFixed(0)} gc/L ` +
`GAM: ${s.latest_vorhersage.toFixed(0)} gc/L ` +
`(${s.sites_reporting} sites, ${s.pop_coverage_pct.toFixed(1)}% pop) ` +
`Peak: ${s.peak_viruslast.toFixed(0)} gc/L (${s.peak_date})`
);
}
const covid = summaries.find((s) => s.typ === "SARS-CoV-2");
if (covid) {
const pctOfPeak = (covid.latest_viruslast / covid.peak_viruslast) * 100;
console.log(
`\nSARS-CoV-2 is at ${pctOfPeak.toFixed(1)}% of its all-time peak (${covid.peak_date}).`
);
}
console.log(
"\nData: AMELAG (RKI + Umweltbundesamt), CC-BY 4.0"
);
console.log("Units: gene copies per liter (gc/L), population-weighted national aggregate.");
}
main().catch((err) => {
console.error("Error:", err.message);
process.exit(1);
});