RKI AMELAG wastewater surveillance data with CSV exports and bun fetch script. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
220 lines
7.4 KiB
Plaintext
Executable File
220 lines
7.4 KiB
Plaintext
Executable File
#!/usr/bin/env bun
|
|
|
|
/**
|
|
* Get DE Wastewater Surveillance Data (AMELAG / RKI + Umweltbundesamt)
|
|
*
|
|
* Fetches the population-weighted nationwide aggregated viral load time series
|
|
* from the AMELAG GitHub repository and produces:
|
|
* - Data/DE-Wastewater-Surveillance/wastewater-latest.csv
|
|
*
|
|
* Source: https://github.com/robert-koch-institut/Abwassersurveillance_AMELAG
|
|
* Data: amelag_aggregierte_kurve.tsv (weekly, population-weighted national aggregate)
|
|
* License: CC-BY 4.0 (Robert Koch-Institut / Umweltbundesamt)
|
|
* No authentication required.
|
|
*
|
|
* Strategy:
|
|
* 1. Fetch the aggregated TSV from GitHub raw.
|
|
* 2. Parse all pathogen types (SARS-CoV-2, Influenza A/B, RSV A/B).
|
|
* 3. For each pathogen, compute latest value, 4-week trend, and peak.
|
|
* 4. Write a tidy CSV with all data points.
|
|
* 5. Print a summary with current status and trends.
|
|
*/
|
|
|
|
import { writeFileSync, mkdirSync } from "fs";
|
|
import { join } from "path";
|
|
|
|
const AMELAG_URL =
|
|
"https://raw.githubusercontent.com/robert-koch-institut/Abwassersurveillance_AMELAG/main/amelag_aggregierte_kurve.tsv";
|
|
|
|
const OUT_DIR = join(__dirname, "Data/DE-Wastewater-Surveillance");
|
|
|
|
// Pathogen types we report individually (others are combined variants)
|
|
const PRIMARY_PATHOGENS = ["SARS-CoV-2", "Influenza A", "Influenza B", "RSV A", "RSV B"];
|
|
|
|
interface AmelagRow {
|
|
datum: string;
|
|
n: number;
|
|
anteil_bev: number;
|
|
viruslast: number;
|
|
viruslast_normalisiert: number;
|
|
vorhersage: number;
|
|
obere_schranke: number;
|
|
untere_schranke: number;
|
|
typ: string;
|
|
}
|
|
|
|
function parseTSV(raw: string): AmelagRow[] {
|
|
const lines = raw.trim().split("\n");
|
|
const headers = lines[0].split("\t");
|
|
|
|
const idx = (name: string) => {
|
|
const i = headers.indexOf(name);
|
|
if (i === -1) throw new Error(`Column '${name}' not found in TSV headers: ${headers.join(", ")}`);
|
|
return i;
|
|
};
|
|
|
|
const rows: AmelagRow[] = [];
|
|
for (let i = 1; i < lines.length; i++) {
|
|
const cols = lines[i].split("\t");
|
|
if (cols.length < headers.length) continue;
|
|
|
|
const viruslast = parseFloat(cols[idx("viruslast")]);
|
|
if (isNaN(viruslast)) continue;
|
|
|
|
rows.push({
|
|
datum: cols[idx("datum")],
|
|
n: parseInt(cols[idx("n")], 10),
|
|
anteil_bev: parseFloat(cols[idx("anteil_bev")]),
|
|
viruslast,
|
|
viruslast_normalisiert: parseFloat(cols[idx("viruslast_normalisiert")]),
|
|
vorhersage: parseFloat(cols[idx("vorhersage")]),
|
|
obere_schranke: parseFloat(cols[idx("obere_schranke")]),
|
|
untere_schranke: parseFloat(cols[idx("untere_schranke")]),
|
|
typ: cols[idx("typ")],
|
|
});
|
|
}
|
|
|
|
return rows;
|
|
}
|
|
|
|
interface PathogenSummary {
|
|
typ: string;
|
|
latest_date: string;
|
|
latest_viruslast: number;
|
|
latest_vorhersage: number;
|
|
sites_reporting: number;
|
|
pop_coverage_pct: number;
|
|
trend_4w_pct: number;
|
|
peak_viruslast: number;
|
|
peak_date: string;
|
|
data_points: number;
|
|
}
|
|
|
|
function analyzePathogen(rows: AmelagRow[], typ: string): PathogenSummary | null {
|
|
const filtered = rows
|
|
.filter((r) => r.typ === typ)
|
|
.sort((a, b) => a.datum.localeCompare(b.datum));
|
|
|
|
if (filtered.length === 0) return null;
|
|
|
|
const latest = filtered[filtered.length - 1];
|
|
|
|
// 4-week trend: compare latest vorhersage to 4 weeks prior
|
|
let trend_4w_pct = 0;
|
|
if (filtered.length >= 5) {
|
|
const fourWeeksAgo = filtered[filtered.length - 5];
|
|
if (fourWeeksAgo.vorhersage > 0) {
|
|
trend_4w_pct =
|
|
((latest.vorhersage - fourWeeksAgo.vorhersage) / fourWeeksAgo.vorhersage) * 100;
|
|
}
|
|
}
|
|
|
|
// Peak
|
|
const peak = filtered.reduce((best, r) =>
|
|
r.viruslast > best.viruslast ? r : best
|
|
);
|
|
|
|
return {
|
|
typ,
|
|
latest_date: latest.datum,
|
|
latest_viruslast: latest.viruslast,
|
|
latest_vorhersage: latest.vorhersage,
|
|
sites_reporting: latest.n,
|
|
pop_coverage_pct: latest.anteil_bev * 100,
|
|
trend_4w_pct,
|
|
peak_viruslast: peak.viruslast,
|
|
peak_date: peak.datum,
|
|
data_points: filtered.length,
|
|
};
|
|
}
|
|
|
|
function trendArrow(pct: number): string {
|
|
if (pct > 20) return "↑↑";
|
|
if (pct > 5) return "↑";
|
|
if (pct > -5) return "→";
|
|
if (pct > -20) return "↓";
|
|
return "↓↓";
|
|
}
|
|
|
|
async function main() {
|
|
console.log("Fetching DE wastewater surveillance data (AMELAG)…\n");
|
|
|
|
mkdirSync(OUT_DIR, { recursive: true });
|
|
|
|
// Fetch TSV
|
|
const res = await fetch(AMELAG_URL);
|
|
if (!res.ok) throw new Error(`HTTP ${res.status} fetching AMELAG data`);
|
|
const raw = await res.text();
|
|
const rows = parseTSV(raw);
|
|
console.log(`Parsed ${rows.length} data points from AMELAG aggregated curve.\n`);
|
|
|
|
// Get all unique pathogen types
|
|
const allTypes = [...new Set(rows.map((r) => r.typ))].sort();
|
|
console.log(`Pathogen types found: ${allTypes.join(", ")}\n`);
|
|
|
|
// Write full CSV (all pathogens, all data points)
|
|
const csvHeader =
|
|
"datum,typ,n_sites,pop_coverage_pct,viruslast_gc_per_l,viruslast_normalisiert,vorhersage_gam,obere_schranke,untere_schranke";
|
|
const csvRows = rows
|
|
.sort((a, b) => a.datum.localeCompare(b.datum) || a.typ.localeCompare(b.typ))
|
|
.map(
|
|
(r) =>
|
|
`${r.datum},${r.typ},${r.n},${(r.anteil_bev * 100).toFixed(2)},${r.viruslast.toFixed(2)},${r.viruslast_normalisiert.toFixed(2)},${r.vorhersage.toFixed(2)},${r.obere_schranke.toFixed(2)},${r.untere_schranke.toFixed(2)}`
|
|
);
|
|
|
|
const csvPath = join(OUT_DIR, "wastewater-latest.csv");
|
|
writeFileSync(csvPath, [csvHeader, ...csvRows].join("\n") + "\n");
|
|
console.log(`Wrote ${csvPath} (${csvRows.length} rows)\n`);
|
|
|
|
// Analyze primary pathogens
|
|
const summaries: PathogenSummary[] = [];
|
|
for (const typ of PRIMARY_PATHOGENS) {
|
|
const s = analyzePathogen(rows, typ);
|
|
if (s) summaries.push(s);
|
|
}
|
|
|
|
// Write summary CSV
|
|
const summaryHeader =
|
|
"typ,latest_date,latest_viruslast_gc_per_l,latest_vorhersage_gam,sites_reporting,pop_coverage_pct,trend_4w_pct,peak_viruslast,peak_date,data_points";
|
|
const summaryRows = summaries.map(
|
|
(s) =>
|
|
`${s.typ},${s.latest_date},${s.latest_viruslast.toFixed(2)},${s.latest_vorhersage.toFixed(2)},${s.sites_reporting},${s.pop_coverage_pct.toFixed(2)},${s.trend_4w_pct.toFixed(1)},${s.peak_viruslast.toFixed(2)},${s.peak_date},${s.data_points}`
|
|
);
|
|
|
|
const summaryPath = join(OUT_DIR, "wastewater-summary.csv");
|
|
writeFileSync(summaryPath, [summaryHeader, ...summaryRows].join("\n") + "\n");
|
|
console.log(`Wrote ${summaryPath}\n`);
|
|
|
|
// Print console summary
|
|
console.log("── Current Status ──────────────────────────────────────\n");
|
|
|
|
for (const s of summaries) {
|
|
const arrow = trendArrow(s.trend_4w_pct);
|
|
console.log(
|
|
`${s.typ.padEnd(16)} ${arrow} ${s.trend_4w_pct > 0 ? "+" : ""}${s.trend_4w_pct.toFixed(1)}% ` +
|
|
`Latest: ${s.latest_viruslast.toFixed(0)} gc/L ` +
|
|
`GAM: ${s.latest_vorhersage.toFixed(0)} gc/L ` +
|
|
`(${s.sites_reporting} sites, ${s.pop_coverage_pct.toFixed(1)}% pop) ` +
|
|
`Peak: ${s.peak_viruslast.toFixed(0)} gc/L (${s.peak_date})`
|
|
);
|
|
}
|
|
|
|
const covid = summaries.find((s) => s.typ === "SARS-CoV-2");
|
|
if (covid) {
|
|
const pctOfPeak = (covid.latest_viruslast / covid.peak_viruslast) * 100;
|
|
console.log(
|
|
`\nSARS-CoV-2 is at ${pctOfPeak.toFixed(1)}% of its all-time peak (${covid.peak_date}).`
|
|
);
|
|
}
|
|
|
|
console.log(
|
|
"\nData: AMELAG (RKI + Umweltbundesamt), CC-BY 4.0"
|
|
);
|
|
console.log("Units: gene copies per liter (gc/L), population-weighted national aggregate.");
|
|
}
|
|
|
|
main().catch((err) => {
|
|
console.error("Error:", err.message);
|
|
process.exit(1);
|
|
});
|