266 lines
8.8 KiB
JavaScript

const db = require('../models/db');
const { forwardGeocode, clearCache } = require('../services/geocoding');
const logger = require('../utils/logger');
/**
* Batch geocode resources that need coordinates
* Can target: all missing, low confidence, or force re-geocode all
*/
async function batchGeocode(options = {}) {
const {
forceAll = false, // Re-geocode everything
minConfidence = 60, // Re-geocode if below this confidence
onlyMissing = false, // Only geocode records without coordinates
limit = null // Limit number of records to process
} = options;
logger.info('Starting batch geocoding', { forceAll, minConfidence, onlyMissing, limit });
// Ensure geocode_confidence column exists
await ensureConfidenceColumn();
// Build query based on options
let query;
if (forceAll) {
query = `
SELECT id, name, address, city, postal_code, latitude, longitude, geocode_confidence
FROM food_resources
ORDER BY geocode_confidence ASC NULLS FIRST, city, name
`;
} else if (onlyMissing) {
query = `
SELECT id, name, address, city, postal_code, latitude, longitude, geocode_confidence
FROM food_resources
WHERE latitude IS NULL OR longitude IS NULL
ORDER BY city, name
`;
} else {
query = `
SELECT id, name, address, city, postal_code, latitude, longitude, geocode_confidence
FROM food_resources
WHERE latitude IS NULL OR longitude IS NULL
OR geocode_confidence IS NULL
OR geocode_confidence < $1
ORDER BY geocode_confidence ASC NULLS FIRST, city, name
`;
}
const params = (!forceAll && !onlyMissing) ? [minConfidence] : [];
const result = await db.query(query, params);
let resources = result.rows;
if (limit) {
resources = resources.slice(0, limit);
}
logger.info(`Found ${resources.length} resources to geocode`);
let successCount = 0;
let failCount = 0;
let skippedCount = 0;
const results = [];
for (const resource of resources) {
// Build address string - prioritize full address if available
let addressToGeocode;
let hasStreetAddress = false;
if (resource.address && !resource.address.startsWith('PO Box') && resource.address.trim() !== '') {
addressToGeocode = `${resource.address}, ${resource.city}, Alberta, Canada`;
hasStreetAddress = true;
} else if (resource.postal_code && resource.postal_code.trim() !== '') {
addressToGeocode = `${resource.city}, ${resource.postal_code}, Alberta, Canada`;
} else {
addressToGeocode = `${resource.city}, Alberta, Canada`;
}
logger.info(`[${resource.id}] Geocoding "${resource.name}" in ${resource.city}`);
logger.info(` Address: "${addressToGeocode}"`);
try {
const geocodeResult = await forwardGeocode(addressToGeocode);
if (geocodeResult && geocodeResult.latitude && geocodeResult.longitude) {
const confidence = geocodeResult.combinedConfidence || geocodeResult.confidence || 50;
const provider = geocodeResult.provider || 'unknown';
const warnings = geocodeResult.validation?.warnings || [];
// Adjust confidence if no street address was provided
const adjustedConfidence = hasStreetAddress ? confidence : Math.min(confidence, 40);
await db.query(`
UPDATE food_resources
SET latitude = $1,
longitude = $2,
geocode_confidence = $3,
geocode_provider = $4,
updated_at = NOW()
WHERE id = $5
`, [geocodeResult.latitude, geocodeResult.longitude, adjustedConfidence, provider, resource.id]);
const resultInfo = {
id: resource.id,
name: resource.name,
city: resource.city,
address: resource.address,
latitude: geocodeResult.latitude,
longitude: geocodeResult.longitude,
confidence: adjustedConfidence,
provider,
warnings,
status: 'success'
};
results.push(resultInfo);
if (warnings.length > 0) {
logger.warn(` Success with warnings: ${geocodeResult.latitude}, ${geocodeResult.longitude} (${provider}, ${adjustedConfidence}%)`);
logger.warn(` Warnings: ${warnings.join(', ')}`);
} else {
logger.info(` Success: ${geocodeResult.latitude}, ${geocodeResult.longitude} (${provider}, ${adjustedConfidence}%)`);
}
successCount++;
} else {
logger.warn(` No coordinates found`);
results.push({
id: resource.id,
name: resource.name,
city: resource.city,
address: resource.address,
status: 'no_result'
});
failCount++;
}
} catch (error) {
logger.error(` Error: ${error.message}`);
results.push({
id: resource.id,
name: resource.name,
city: resource.city,
address: resource.address,
status: 'error',
error: error.message
});
failCount++;
}
// Rate limiting - wait between requests
await new Promise(resolve => setTimeout(resolve, 1500));
}
logger.info(`Batch geocoding complete: ${successCount} success, ${failCount} failed, ${skippedCount} skipped`);
return {
success: successCount,
failed: failCount,
skipped: skippedCount,
total: resources.length,
results
};
}
/**
* Ensure the geocode_confidence column exists
*/
async function ensureConfidenceColumn() {
try {
await db.query(`
ALTER TABLE food_resources
ADD COLUMN IF NOT EXISTS geocode_confidence INTEGER,
ADD COLUMN IF NOT EXISTS geocode_provider VARCHAR(50)
`);
} catch (error) {
// Column might already exist, that's fine
logger.debug('Confidence column check:', error.message);
}
}
/**
* Get geocoding statistics
*/
async function getGeocodingStats() {
const result = await db.query(`
SELECT
COUNT(*) as total,
COUNT(latitude) as geocoded,
COUNT(CASE WHEN address IS NOT NULL AND address != '' THEN 1 END) as has_address,
COUNT(CASE WHEN geocode_confidence >= 80 THEN 1 END) as high_confidence,
COUNT(CASE WHEN geocode_confidence >= 50 AND geocode_confidence < 80 THEN 1 END) as medium_confidence,
COUNT(CASE WHEN geocode_confidence < 50 OR geocode_confidence IS NULL THEN 1 END) as low_confidence,
ROUND(AVG(geocode_confidence)) as avg_confidence
FROM food_resources
`);
return result.rows[0];
}
if (require.main === module) {
// Parse CLI arguments
const args = process.argv.slice(2);
const options = {
forceAll: args.includes('--force-all'),
onlyMissing: args.includes('--only-missing'),
minConfidence: 60,
limit: null
};
// Parse --min-confidence=N
const minConfArg = args.find(a => a.startsWith('--min-confidence='));
if (minConfArg) {
options.minConfidence = parseInt(minConfArg.split('=')[1], 10);
}
// Parse --limit=N
const limitArg = args.find(a => a.startsWith('--limit='));
if (limitArg) {
options.limit = parseInt(limitArg.split('=')[1], 10);
}
// Show stats first if requested
if (args.includes('--stats')) {
getGeocodingStats()
.then(stats => {
console.log('\nGeocoding Statistics:');
console.log('=====================');
console.log(`Total records: ${stats.total}`);
console.log(`Geocoded: ${stats.geocoded}`);
console.log(`With street address: ${stats.has_address}`);
console.log(`High confidence: ${stats.high_confidence} (>=80%)`);
console.log(`Medium confidence: ${stats.medium_confidence} (50-79%)`);
console.log(`Low confidence: ${stats.low_confidence} (<50%)`);
console.log(`Average confidence: ${stats.avg_confidence || 'N/A'}%`);
process.exit(0);
})
.catch(err => {
console.error('Failed to get stats:', err);
process.exit(1);
});
} else {
console.log('Batch geocoding with options:', options);
console.log('Use --stats to see current geocoding statistics');
console.log('Use --force-all to re-geocode everything');
console.log('Use --only-missing to only geocode records without coordinates');
console.log('Use --min-confidence=N to re-geocode records below N% confidence');
console.log('Use --limit=N to limit the number of records processed');
console.log('');
// Clear cache before batch run
clearCache();
batchGeocode(options)
.then(result => {
console.log('\nGeocoding complete:');
console.log(` Success: ${result.success}`);
console.log(` Failed: ${result.failed}`);
console.log(` Total: ${result.total}`);
process.exit(0);
})
.catch(err => {
console.error('Geocoding failed:', err);
process.exit(1);
});
}
}
module.exports = { batchGeocode, getGeocodingStats, ensureConfidenceColumn };