const db = require('../models/db'); const { forwardGeocode, clearCache } = require('../services/geocoding'); const logger = require('../utils/logger'); /** * Batch geocode resources that need coordinates * Can target: all missing, low confidence, or force re-geocode all */ async function batchGeocode(options = {}) { const { forceAll = false, // Re-geocode everything minConfidence = 60, // Re-geocode if below this confidence onlyMissing = false, // Only geocode records without coordinates limit = null // Limit number of records to process } = options; logger.info('Starting batch geocoding', { forceAll, minConfidence, onlyMissing, limit }); // Ensure geocode_confidence column exists await ensureConfidenceColumn(); // Build query based on options let query; if (forceAll) { query = ` SELECT id, name, address, city, postal_code, latitude, longitude, geocode_confidence FROM food_resources ORDER BY geocode_confidence ASC NULLS FIRST, city, name `; } else if (onlyMissing) { query = ` SELECT id, name, address, city, postal_code, latitude, longitude, geocode_confidence FROM food_resources WHERE latitude IS NULL OR longitude IS NULL ORDER BY city, name `; } else { query = ` SELECT id, name, address, city, postal_code, latitude, longitude, geocode_confidence FROM food_resources WHERE latitude IS NULL OR longitude IS NULL OR geocode_confidence IS NULL OR geocode_confidence < $1 ORDER BY geocode_confidence ASC NULLS FIRST, city, name `; } const params = (!forceAll && !onlyMissing) ? [minConfidence] : []; const result = await db.query(query, params); let resources = result.rows; if (limit) { resources = resources.slice(0, limit); } logger.info(`Found ${resources.length} resources to geocode`); let successCount = 0; let failCount = 0; let skippedCount = 0; const results = []; for (const resource of resources) { // Build address string - prioritize full address if available let addressToGeocode; let hasStreetAddress = false; if (resource.address && !resource.address.startsWith('PO Box') && resource.address.trim() !== '') { addressToGeocode = `${resource.address}, ${resource.city}, Alberta, Canada`; hasStreetAddress = true; } else if (resource.postal_code && resource.postal_code.trim() !== '') { addressToGeocode = `${resource.city}, ${resource.postal_code}, Alberta, Canada`; } else { addressToGeocode = `${resource.city}, Alberta, Canada`; } logger.info(`[${resource.id}] Geocoding "${resource.name}" in ${resource.city}`); logger.info(` Address: "${addressToGeocode}"`); try { const geocodeResult = await forwardGeocode(addressToGeocode); if (geocodeResult && geocodeResult.latitude && geocodeResult.longitude) { const confidence = geocodeResult.combinedConfidence || geocodeResult.confidence || 50; const provider = geocodeResult.provider || 'unknown'; const warnings = geocodeResult.validation?.warnings || []; // Adjust confidence if no street address was provided const adjustedConfidence = hasStreetAddress ? confidence : Math.min(confidence, 40); await db.query(` UPDATE food_resources SET latitude = $1, longitude = $2, geocode_confidence = $3, geocode_provider = $4, updated_at = NOW() WHERE id = $5 `, [geocodeResult.latitude, geocodeResult.longitude, adjustedConfidence, provider, resource.id]); const resultInfo = { id: resource.id, name: resource.name, city: resource.city, address: resource.address, latitude: geocodeResult.latitude, longitude: geocodeResult.longitude, confidence: adjustedConfidence, provider, warnings, status: 'success' }; results.push(resultInfo); if (warnings.length > 0) { logger.warn(` Success with warnings: ${geocodeResult.latitude}, ${geocodeResult.longitude} (${provider}, ${adjustedConfidence}%)`); logger.warn(` Warnings: ${warnings.join(', ')}`); } else { logger.info(` Success: ${geocodeResult.latitude}, ${geocodeResult.longitude} (${provider}, ${adjustedConfidence}%)`); } successCount++; } else { logger.warn(` No coordinates found`); results.push({ id: resource.id, name: resource.name, city: resource.city, address: resource.address, status: 'no_result' }); failCount++; } } catch (error) { logger.error(` Error: ${error.message}`); results.push({ id: resource.id, name: resource.name, city: resource.city, address: resource.address, status: 'error', error: error.message }); failCount++; } // Rate limiting - wait between requests await new Promise(resolve => setTimeout(resolve, 1500)); } logger.info(`Batch geocoding complete: ${successCount} success, ${failCount} failed, ${skippedCount} skipped`); return { success: successCount, failed: failCount, skipped: skippedCount, total: resources.length, results }; } /** * Ensure the geocode_confidence column exists */ async function ensureConfidenceColumn() { try { await db.query(` ALTER TABLE food_resources ADD COLUMN IF NOT EXISTS geocode_confidence INTEGER, ADD COLUMN IF NOT EXISTS geocode_provider VARCHAR(50) `); } catch (error) { // Column might already exist, that's fine logger.debug('Confidence column check:', error.message); } } /** * Get geocoding statistics */ async function getGeocodingStats() { const result = await db.query(` SELECT COUNT(*) as total, COUNT(latitude) as geocoded, COUNT(CASE WHEN address IS NOT NULL AND address != '' THEN 1 END) as has_address, COUNT(CASE WHEN geocode_confidence >= 80 THEN 1 END) as high_confidence, COUNT(CASE WHEN geocode_confidence >= 50 AND geocode_confidence < 80 THEN 1 END) as medium_confidence, COUNT(CASE WHEN geocode_confidence < 50 OR geocode_confidence IS NULL THEN 1 END) as low_confidence, ROUND(AVG(geocode_confidence)) as avg_confidence FROM food_resources `); return result.rows[0]; } if (require.main === module) { // Parse CLI arguments const args = process.argv.slice(2); const options = { forceAll: args.includes('--force-all'), onlyMissing: args.includes('--only-missing'), minConfidence: 60, limit: null }; // Parse --min-confidence=N const minConfArg = args.find(a => a.startsWith('--min-confidence=')); if (minConfArg) { options.minConfidence = parseInt(minConfArg.split('=')[1], 10); } // Parse --limit=N const limitArg = args.find(a => a.startsWith('--limit=')); if (limitArg) { options.limit = parseInt(limitArg.split('=')[1], 10); } // Show stats first if requested if (args.includes('--stats')) { getGeocodingStats() .then(stats => { console.log('\nGeocoding Statistics:'); console.log('====================='); console.log(`Total records: ${stats.total}`); console.log(`Geocoded: ${stats.geocoded}`); console.log(`With street address: ${stats.has_address}`); console.log(`High confidence: ${stats.high_confidence} (>=80%)`); console.log(`Medium confidence: ${stats.medium_confidence} (50-79%)`); console.log(`Low confidence: ${stats.low_confidence} (<50%)`); console.log(`Average confidence: ${stats.avg_confidence || 'N/A'}%`); process.exit(0); }) .catch(err => { console.error('Failed to get stats:', err); process.exit(1); }); } else { console.log('Batch geocoding with options:', options); console.log('Use --stats to see current geocoding statistics'); console.log('Use --force-all to re-geocode everything'); console.log('Use --only-missing to only geocode records without coordinates'); console.log('Use --min-confidence=N to re-geocode records below N% confidence'); console.log('Use --limit=N to limit the number of records processed'); console.log(''); // Clear cache before batch run clearCache(); batchGeocode(options) .then(result => { console.log('\nGeocoding complete:'); console.log(` Success: ${result.success}`); console.log(` Failed: ${result.failed}`); console.log(` Total: ${result.total}`); process.exit(0); }) .catch(err => { console.error('Geocoding failed:', err); process.exit(1); }); } } module.exports = { batchGeocode, getGeocodingStats, ensureConfidenceColumn };