Files
Substrate/get-california-wastewater-data
Daniel Miessler 9066ad477b Add Bay Area COVID wastewater and Pulitzer Prize datasets
Added two comprehensive datasets with full documentation:

1. Bay Area COVID-19 Wastewater Surveillance (2022-2025)
   - California statewide COVID-19 wastewater data
   - 161 weekly data points from CDPH
   - Leading health indicator for viral trends
   - Includes automated update scripts

2. Pulitzer Prize Winners - Arts & Letters (1918-2024)
   - 249 winners across 107 years
   - Poetry, Drama, and General/Special categories
   - High-quality curated data from Wikidata
   - CSV files for each category

Added master Data directory documentation (Data/README.md) describing:
- Data philosophy and quality standards
- All four current datasets
- Contribution guidelines
- File naming conventions

Includes utility commands:
- get-bay-area-covid-status: Analyze current COVID wastewater levels
- get-california-wastewater-data: Fetch latest surveillance data

Updated .gitignore to exclude large raw data files (278MB+).

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-16 22:09:43 -07:00

383 lines
14 KiB
Plaintext
Executable File
Raw Permalink Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env bun
/**
* Get California Wastewater Data
*
* Analyzes trends and provides risk assessment for going out in public
*/
import { readFileSync } from 'fs';
import { join } from 'path';
const CSV_PATH = join(__dirname, 'Data/Bay-Area-COVID-Wastewater/California-Wastewater-Surveillance-Latest.csv');
interface WastewaterRecord {
sample_collect_date: string;
pcr_target: string;
pcr_target_avg_conc: string;
reporting_jurisdiction: string;
county_names: string;
pcr_target_units: string;
}
function parseCSV(csvContent: string): WastewaterRecord[] {
const lines = csvContent.trim().split('\n');
const headers = lines[0].split(',');
const dateIdx = headers.indexOf('sample_collect_date');
const targetIdx = headers.indexOf('pcr_target');
const concIdx = headers.indexOf('pcr_target_avg_conc');
const jurisdIdx = headers.indexOf('reporting_jurisdiction');
const countyIdx = headers.indexOf('county_names');
const unitsIdx = headers.indexOf('pcr_target_units');
const records: WastewaterRecord[] = [];
for (let i = 1; i < lines.length; i++) {
const line = lines[i];
if (!line.trim()) continue;
const values = line.split(',');
const record = {
sample_collect_date: values[dateIdx] || '',
pcr_target: values[targetIdx] || '',
pcr_target_avg_conc: values[concIdx] || '',
reporting_jurisdiction: values[jurisdIdx] || '',
county_names: values[countyIdx] || '',
pcr_target_units: values[unitsIdx] || ''
};
if (record.reporting_jurisdiction === 'CA' &&
record.pcr_target &&
record.pcr_target_avg_conc &&
!isNaN(parseFloat(record.pcr_target_avg_conc))) {
records.push(record);
}
}
return records;
}
function formatDate(dateStr: string): string {
const date = new Date(dateStr);
return date.toLocaleDateString('en-US', {
month: 'short',
day: 'numeric',
year: 'numeric'
});
}
function analyzePathogenTrends(records: WastewaterRecord[], pathogenName: string) {
const now = new Date();
const oneYearAgo = new Date(now.getTime() - 365 * 24 * 60 * 60 * 1000);
const threeMonthsAgo = new Date(now.getTime() - 90 * 24 * 60 * 60 * 1000);
const oneMonthAgo = new Date(now.getTime() - 30 * 24 * 60 * 60 * 1000);
const twoWeeksAgo = new Date(now.getTime() - 14 * 24 * 60 * 60 * 1000);
const relevantRecords = records.filter(r =>
r.pcr_target.toLowerCase() === pathogenName.toLowerCase()
);
const dataByPeriod = {
recent: [] as number[],
twoWeeks: [] as number[],
oneMonth: [] as number[],
threeMonths: [] as number[],
year: [] as number[],
latestDate: ''
};
for (const record of relevantRecords) {
const date = new Date(record.sample_collect_date);
const value = parseFloat(record.pcr_target_avg_conc);
if (date >= oneYearAgo) {
dataByPeriod.year.push(value);
if (date >= threeMonthsAgo) {
dataByPeriod.threeMonths.push(value);
if (date >= oneMonthAgo) {
dataByPeriod.oneMonth.push(value);
if (date >= twoWeeksAgo) {
dataByPeriod.twoWeeks.push(value);
dataByPeriod.recent.push(value);
}
}
}
if (!dataByPeriod.latestDate || record.sample_collect_date > dataByPeriod.latestDate) {
dataByPeriod.latestDate = record.sample_collect_date;
}
}
}
const avg = (arr: number[]) => arr.length ? arr.reduce((a, b) => a + b, 0) / arr.length : 0;
const percentChange = (current: number, previous: number) =>
previous ? ((current - previous) / previous * 100) : 0;
return {
current: avg(dataByPeriod.recent),
twoWeeksAvg: avg(dataByPeriod.twoWeeks),
oneMonthAvg: avg(dataByPeriod.oneMonth),
threeMonthsAvg: avg(dataByPeriod.threeMonths),
yearAvg: avg(dataByPeriod.year),
yearMin: Math.min(...dataByPeriod.year),
yearMax: Math.max(...dataByPeriod.year),
latestDate: dataByPeriod.latestDate,
trend2wk: percentChange(avg(dataByPeriod.recent), avg(dataByPeriod.twoWeeks)),
trend1mo: percentChange(avg(dataByPeriod.recent), avg(dataByPeriod.oneMonth)),
sampleCount: dataByPeriod.recent.length
};
}
function generateYearGraph(records: WastewaterRecord[], pathogenName: string, title: string): string {
const now = new Date();
const oneYearAgo = new Date(now.getTime() - 365 * 24 * 60 * 60 * 1000);
// Group data by month
const monthlyData: { [key: string]: number[] } = {};
for (const record of records) {
if (record.pcr_target.toLowerCase() !== pathogenName.toLowerCase()) continue;
const date = new Date(record.sample_collect_date);
if (date < oneYearAgo) continue;
const monthKey = `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, '0')}`;
const value = parseFloat(record.pcr_target_avg_conc);
if (!monthlyData[monthKey]) {
monthlyData[monthKey] = [];
}
monthlyData[monthKey].push(value);
}
// Calculate monthly averages
const months: { label: string; value: number }[] = [];
const sortedMonths = Object.keys(monthlyData).sort();
for (const month of sortedMonths) {
const avg = monthlyData[month].reduce((a, b) => a + b, 0) / monthlyData[month].length;
const [year, monthNum] = month.split('-');
const date = new Date(parseInt(year), parseInt(monthNum) - 1, 1);
const label = date.toLocaleDateString('en-US', { month: 'short', year: '2-digit' });
months.push({ label, value: avg });
}
if (months.length === 0) {
return ' No data available for graphing\n';
}
// Find max value for scaling
const maxValue = Math.max(...months.map(m => m.value));
const graphWidth = 50;
let graph = `\n ${title}\n\n`;
// Generate bars with trend arrows
for (let i = 0; i < months.length; i++) {
const month = months[i];
const barLength = maxValue > 0 ? Math.round((month.value / maxValue) * graphWidth) : 0;
const bar = '█'.repeat(barLength);
const valueStr = month.value >= 1000 ? `${(month.value / 1000).toFixed(1)}k` : month.value.toFixed(0);
// Calculate trend arrow
let arrow = ' ';
if (i > 0) {
const prevValue = months[i - 1].value;
const change = ((month.value - prevValue) / prevValue) * 100;
if (change > 10) {
arrow = '⬆️';
} else if (change < -10) {
arrow = '⬇️';
} else {
arrow = '➡️';
}
}
graph += ` ${month.label} │${bar} ${valueStr} ${arrow}\n`;
}
return graph + '\n';
}
function getRiskLevel(covidData: any, rsvData: any, fluData: any) {
// Risk scoring based on relative levels
let riskScore = 0;
let factors: string[] = [];
// COVID risk
if (covidData.current > 0) {
const covidPercentile = (covidData.current - covidData.yearMin) / (covidData.yearMax - covidData.yearMin);
if (covidPercentile > 0.7) {
riskScore += 3;
factors.push('COVID levels HIGH (top 30% of year)');
} else if (covidPercentile > 0.4) {
riskScore += 2;
factors.push('COVID levels MODERATE');
} else {
riskScore += 1;
factors.push('COVID levels LOW');
}
if (covidData.trend2wk > 20) {
riskScore += 2;
factors.push('COVID rapidly increasing');
} else if (covidData.trend2wk > 0) {
riskScore += 1;
factors.push('COVID slowly increasing');
}
}
// RSV risk
if (rsvData.current > 0) {
const rsvPercentile = (rsvData.current - rsvData.yearMin) / (rsvData.yearMax - rsvData.yearMin);
if (rsvPercentile > 0.7) {
riskScore += 2;
factors.push('RSV levels HIGH');
} else if (rsvPercentile > 0.4) {
riskScore += 1;
factors.push('RSV levels MODERATE');
}
}
// Flu risk
if (fluData.current > 0) {
const fluPercentile = (fluData.current - fluData.yearMin) / (fluData.yearMax - fluData.yearMin);
if (fluPercentile > 0.7) {
riskScore += 2;
factors.push('FLU levels HIGH');
} else if (fluPercentile > 0.4) {
riskScore += 1;
factors.push('FLU levels MODERATE');
}
}
let assessment = '';
let emoji = '';
let recommendation = '';
if (riskScore <= 3) {
assessment = 'LOW RISK';
emoji = '🟢';
recommendation = 'Generally safe to be in public. Standard precautions sufficient.';
} else if (riskScore <= 6) {
assessment = 'MODERATE RISK';
emoji = '🟡';
recommendation = 'Exercise caution in crowded indoor spaces. Consider masking in high-traffic areas.';
} else if (riskScore <= 9) {
assessment = 'HIGH RISK';
emoji = '🟠';
recommendation = 'Significant viral circulation. Recommend masking indoors and avoiding crowded spaces.';
} else {
assessment = 'VERY HIGH RISK';
emoji = '🔴';
recommendation = 'Multiple pathogens at elevated levels. Strong recommendation to mask and minimize public exposure.';
}
return { assessment, emoji, recommendation, factors, riskScore };
}
try {
console.log('\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━');
console.log('🦠 CALIFORNIA WASTEWATER SURVEILLANCE');
console.log('━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n');
const csvContent = readFileSync(CSV_PATH, 'utf-8');
const records = parseCSV(csvContent);
const covidData = analyzePathogenTrends(records, 'sars-cov-2');
const rsvData = analyzePathogenTrends(records, 'rsv');
const fluData = analyzePathogenTrends(records, 'fluav'); // Influenza A
console.log('📅 DATA STATUS\n');
console.log(`📊 Latest data: ${formatDate(covidData.latestDate || rsvData.latestDate)}`);
console.log(`📈 Analysis period: Past 12 months`);
console.log(`🔬 Total samples: ${records.length.toLocaleString()}\n`);
console.log('━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n');
// COVID Analysis
if (covidData.sampleCount > 0) {
console.log('🦠 SARS-CoV-2 (COVID-19)\n');
console.log(` Current Level: ${covidData.current.toFixed(0)} copies/g`);
console.log(` 12-Month Range: ${covidData.yearMin.toFixed(0)} - ${covidData.yearMax.toFixed(0)}`);
console.log(` 12-Month Average: ${covidData.yearAvg.toFixed(0)}\n`);
const trend2wk = covidData.trend2wk > 0 ? '⬆️' : covidData.trend2wk < 0 ? '⬇️' : '➡️';
console.log(` 2-Week Trend: ${trend2wk} ${Math.abs(covidData.trend2wk).toFixed(1)}%`);
const trend1mo = covidData.trend1mo > 0 ? '⬆️' : covidData.trend1mo < 0 ? '⬇️' : '➡️';
console.log(` 1-Month Trend: ${trend1mo} ${Math.abs(covidData.trend1mo).toFixed(1)}%\n`);
// COVID Graph
console.log(generateYearGraph(records, 'sars-cov-2', '12-Month Trend (Monthly Averages)'));
}
// Flu Analysis
if (fluData.sampleCount > 0) {
console.log('🤧 INFLUENZA A\n');
console.log(` Current Level: ${fluData.current.toFixed(0)} copies/g`);
console.log(` 12-Month Range: ${fluData.yearMin.toFixed(0)} - ${fluData.yearMax.toFixed(0)}`);
console.log(` 12-Month Average: ${fluData.yearAvg.toFixed(0)}\n`);
const trend2wk = fluData.trend2wk > 0 ? '⬆️' : fluData.trend2wk < 0 ? '⬇️' : '➡️';
console.log(` 2-Week Trend: ${trend2wk} ${Math.abs(fluData.trend2wk).toFixed(1)}%`);
const trend1mo = fluData.trend1mo > 0 ? '⬆️' : fluData.trend1mo < 0 ? '⬇️' : '➡️';
console.log(` 1-Month Trend: ${trend1mo} ${Math.abs(fluData.trend1mo).toFixed(1)}%\n`);
// Flu Graph
console.log(generateYearGraph(records, 'fluav', '12-Month Trend (Monthly Averages)'));
}
// RSV Analysis
if (rsvData.sampleCount > 0) {
console.log('🤒 RSV (Respiratory Syncytial Virus)\n');
console.log(` Current Level: ${rsvData.current.toFixed(0)} copies/g`);
console.log(` 12-Month Range: ${rsvData.yearMin.toFixed(0)} - ${rsvData.yearMax.toFixed(0)}`);
console.log(` 12-Month Average: ${rsvData.yearAvg.toFixed(0)}\n`);
const trend2wk = rsvData.trend2wk > 0 ? '⬆️' : rsvData.trend2wk < 0 ? '⬇️' : '➡️';
console.log(` 2-Week Trend: ${trend2wk} ${Math.abs(rsvData.trend2wk).toFixed(1)}%`);
const trend1mo = rsvData.trend1mo > 0 ? '⬆️' : rsvData.trend1mo < 0 ? '⬇️' : '➡️';
console.log(` 1-Month Trend: ${trend1mo} ${Math.abs(rsvData.trend1mo).toFixed(1)}%\n`);
// RSV Graph
console.log(generateYearGraph(records, 'rsv', '12-Month Trend (Monthly Averages)'));
}
console.log('━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n');
// Risk Assessment
const risk = getRiskLevel(covidData, rsvData, fluData);
console.log('🎯 RISK ASSESSMENT\n');
console.log(`${risk.emoji} Overall Risk Level: ${risk.assessment}\n`);
console.log('📋 Key Factors:');
for (const factor of risk.factors) {
console.log(` • ${factor}`);
}
console.log();
console.log('💡 RECOMMENDATION\n');
console.log(` ${risk.recommendation}\n`);
console.log('━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n');
console.log(' Source: California Department of Public Health');
console.log(' Data: CHHS Open Data Portal (Updated Daily)');
console.log(' Analysis: 12-month trend comparison\n');
} catch (error) {
if ((error as NodeJS.ErrnoException).code === 'ENOENT') {
console.error('❌ Data file not found. Please run update first:\n');
console.error(' ~/Library/Mobile\\ Documents/com~apple~CloudDocs/Projects/Substrate/Data/Bay-Area-COVID-Wastewater/update-wastewater-data\n');
} else {
console.error('❌ Error reading wastewater data:', error);
}
process.exit(1);
}