266 lines
8.8 KiB
JavaScript
266 lines
8.8 KiB
JavaScript
const db = require('../models/db');
|
|
const { forwardGeocode, clearCache } = require('../services/geocoding');
|
|
const logger = require('../utils/logger');
|
|
|
|
/**
|
|
* Batch geocode resources that need coordinates
|
|
* Can target: all missing, low confidence, or force re-geocode all
|
|
*/
|
|
async function batchGeocode(options = {}) {
|
|
const {
|
|
forceAll = false, // Re-geocode everything
|
|
minConfidence = 60, // Re-geocode if below this confidence
|
|
onlyMissing = false, // Only geocode records without coordinates
|
|
limit = null // Limit number of records to process
|
|
} = options;
|
|
|
|
logger.info('Starting batch geocoding', { forceAll, minConfidence, onlyMissing, limit });
|
|
|
|
// Ensure geocode_confidence column exists
|
|
await ensureConfidenceColumn();
|
|
|
|
// Build query based on options
|
|
let query;
|
|
if (forceAll) {
|
|
query = `
|
|
SELECT id, name, address, city, postal_code, latitude, longitude, geocode_confidence
|
|
FROM food_resources
|
|
ORDER BY geocode_confidence ASC NULLS FIRST, city, name
|
|
`;
|
|
} else if (onlyMissing) {
|
|
query = `
|
|
SELECT id, name, address, city, postal_code, latitude, longitude, geocode_confidence
|
|
FROM food_resources
|
|
WHERE latitude IS NULL OR longitude IS NULL
|
|
ORDER BY city, name
|
|
`;
|
|
} else {
|
|
query = `
|
|
SELECT id, name, address, city, postal_code, latitude, longitude, geocode_confidence
|
|
FROM food_resources
|
|
WHERE latitude IS NULL OR longitude IS NULL
|
|
OR geocode_confidence IS NULL
|
|
OR geocode_confidence < $1
|
|
ORDER BY geocode_confidence ASC NULLS FIRST, city, name
|
|
`;
|
|
}
|
|
|
|
const params = (!forceAll && !onlyMissing) ? [minConfidence] : [];
|
|
const result = await db.query(query, params);
|
|
let resources = result.rows;
|
|
|
|
if (limit) {
|
|
resources = resources.slice(0, limit);
|
|
}
|
|
|
|
logger.info(`Found ${resources.length} resources to geocode`);
|
|
|
|
let successCount = 0;
|
|
let failCount = 0;
|
|
let skippedCount = 0;
|
|
const results = [];
|
|
|
|
for (const resource of resources) {
|
|
// Build address string - prioritize full address if available
|
|
let addressToGeocode;
|
|
let hasStreetAddress = false;
|
|
|
|
if (resource.address && !resource.address.startsWith('PO Box') && resource.address.trim() !== '') {
|
|
addressToGeocode = `${resource.address}, ${resource.city}, Alberta, Canada`;
|
|
hasStreetAddress = true;
|
|
} else if (resource.postal_code && resource.postal_code.trim() !== '') {
|
|
addressToGeocode = `${resource.city}, ${resource.postal_code}, Alberta, Canada`;
|
|
} else {
|
|
addressToGeocode = `${resource.city}, Alberta, Canada`;
|
|
}
|
|
|
|
logger.info(`[${resource.id}] Geocoding "${resource.name}" in ${resource.city}`);
|
|
logger.info(` Address: "${addressToGeocode}"`);
|
|
|
|
try {
|
|
const geocodeResult = await forwardGeocode(addressToGeocode);
|
|
|
|
if (geocodeResult && geocodeResult.latitude && geocodeResult.longitude) {
|
|
const confidence = geocodeResult.combinedConfidence || geocodeResult.confidence || 50;
|
|
const provider = geocodeResult.provider || 'unknown';
|
|
const warnings = geocodeResult.validation?.warnings || [];
|
|
|
|
// Adjust confidence if no street address was provided
|
|
const adjustedConfidence = hasStreetAddress ? confidence : Math.min(confidence, 40);
|
|
|
|
await db.query(`
|
|
UPDATE food_resources
|
|
SET latitude = $1,
|
|
longitude = $2,
|
|
geocode_confidence = $3,
|
|
geocode_provider = $4,
|
|
updated_at = NOW()
|
|
WHERE id = $5
|
|
`, [geocodeResult.latitude, geocodeResult.longitude, adjustedConfidence, provider, resource.id]);
|
|
|
|
const resultInfo = {
|
|
id: resource.id,
|
|
name: resource.name,
|
|
city: resource.city,
|
|
address: resource.address,
|
|
latitude: geocodeResult.latitude,
|
|
longitude: geocodeResult.longitude,
|
|
confidence: adjustedConfidence,
|
|
provider,
|
|
warnings,
|
|
status: 'success'
|
|
};
|
|
|
|
results.push(resultInfo);
|
|
|
|
if (warnings.length > 0) {
|
|
logger.warn(` Success with warnings: ${geocodeResult.latitude}, ${geocodeResult.longitude} (${provider}, ${adjustedConfidence}%)`);
|
|
logger.warn(` Warnings: ${warnings.join(', ')}`);
|
|
} else {
|
|
logger.info(` Success: ${geocodeResult.latitude}, ${geocodeResult.longitude} (${provider}, ${adjustedConfidence}%)`);
|
|
}
|
|
|
|
successCount++;
|
|
} else {
|
|
logger.warn(` No coordinates found`);
|
|
results.push({
|
|
id: resource.id,
|
|
name: resource.name,
|
|
city: resource.city,
|
|
address: resource.address,
|
|
status: 'no_result'
|
|
});
|
|
failCount++;
|
|
}
|
|
} catch (error) {
|
|
logger.error(` Error: ${error.message}`);
|
|
results.push({
|
|
id: resource.id,
|
|
name: resource.name,
|
|
city: resource.city,
|
|
address: resource.address,
|
|
status: 'error',
|
|
error: error.message
|
|
});
|
|
failCount++;
|
|
}
|
|
|
|
// Rate limiting - wait between requests
|
|
await new Promise(resolve => setTimeout(resolve, 1500));
|
|
}
|
|
|
|
logger.info(`Batch geocoding complete: ${successCount} success, ${failCount} failed, ${skippedCount} skipped`);
|
|
|
|
return {
|
|
success: successCount,
|
|
failed: failCount,
|
|
skipped: skippedCount,
|
|
total: resources.length,
|
|
results
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Ensure the geocode_confidence column exists
|
|
*/
|
|
async function ensureConfidenceColumn() {
|
|
try {
|
|
await db.query(`
|
|
ALTER TABLE food_resources
|
|
ADD COLUMN IF NOT EXISTS geocode_confidence INTEGER,
|
|
ADD COLUMN IF NOT EXISTS geocode_provider VARCHAR(50)
|
|
`);
|
|
} catch (error) {
|
|
// Column might already exist, that's fine
|
|
logger.debug('Confidence column check:', error.message);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get geocoding statistics
|
|
*/
|
|
async function getGeocodingStats() {
|
|
const result = await db.query(`
|
|
SELECT
|
|
COUNT(*) as total,
|
|
COUNT(latitude) as geocoded,
|
|
COUNT(CASE WHEN address IS NOT NULL AND address != '' THEN 1 END) as has_address,
|
|
COUNT(CASE WHEN geocode_confidence >= 80 THEN 1 END) as high_confidence,
|
|
COUNT(CASE WHEN geocode_confidence >= 50 AND geocode_confidence < 80 THEN 1 END) as medium_confidence,
|
|
COUNT(CASE WHEN geocode_confidence < 50 OR geocode_confidence IS NULL THEN 1 END) as low_confidence,
|
|
ROUND(AVG(geocode_confidence)) as avg_confidence
|
|
FROM food_resources
|
|
`);
|
|
return result.rows[0];
|
|
}
|
|
|
|
if (require.main === module) {
|
|
// Parse CLI arguments
|
|
const args = process.argv.slice(2);
|
|
const options = {
|
|
forceAll: args.includes('--force-all'),
|
|
onlyMissing: args.includes('--only-missing'),
|
|
minConfidence: 60,
|
|
limit: null
|
|
};
|
|
|
|
// Parse --min-confidence=N
|
|
const minConfArg = args.find(a => a.startsWith('--min-confidence='));
|
|
if (minConfArg) {
|
|
options.minConfidence = parseInt(minConfArg.split('=')[1], 10);
|
|
}
|
|
|
|
// Parse --limit=N
|
|
const limitArg = args.find(a => a.startsWith('--limit='));
|
|
if (limitArg) {
|
|
options.limit = parseInt(limitArg.split('=')[1], 10);
|
|
}
|
|
|
|
// Show stats first if requested
|
|
if (args.includes('--stats')) {
|
|
getGeocodingStats()
|
|
.then(stats => {
|
|
console.log('\nGeocoding Statistics:');
|
|
console.log('=====================');
|
|
console.log(`Total records: ${stats.total}`);
|
|
console.log(`Geocoded: ${stats.geocoded}`);
|
|
console.log(`With street address: ${stats.has_address}`);
|
|
console.log(`High confidence: ${stats.high_confidence} (>=80%)`);
|
|
console.log(`Medium confidence: ${stats.medium_confidence} (50-79%)`);
|
|
console.log(`Low confidence: ${stats.low_confidence} (<50%)`);
|
|
console.log(`Average confidence: ${stats.avg_confidence || 'N/A'}%`);
|
|
process.exit(0);
|
|
})
|
|
.catch(err => {
|
|
console.error('Failed to get stats:', err);
|
|
process.exit(1);
|
|
});
|
|
} else {
|
|
console.log('Batch geocoding with options:', options);
|
|
console.log('Use --stats to see current geocoding statistics');
|
|
console.log('Use --force-all to re-geocode everything');
|
|
console.log('Use --only-missing to only geocode records without coordinates');
|
|
console.log('Use --min-confidence=N to re-geocode records below N% confidence');
|
|
console.log('Use --limit=N to limit the number of records processed');
|
|
console.log('');
|
|
|
|
// Clear cache before batch run
|
|
clearCache();
|
|
|
|
batchGeocode(options)
|
|
.then(result => {
|
|
console.log('\nGeocoding complete:');
|
|
console.log(` Success: ${result.success}`);
|
|
console.log(` Failed: ${result.failed}`);
|
|
console.log(` Total: ${result.total}`);
|
|
process.exit(0);
|
|
})
|
|
.catch(err => {
|
|
console.error('Geocoding failed:', err);
|
|
process.exit(1);
|
|
});
|
|
}
|
|
}
|
|
|
|
module.exports = { batchGeocode, getGeocodingStats, ensureConfidenceColumn };
|