const axios = require('axios'); const logger = require('../utils/logger'); // Cache for geocoding results const geocodeCache = new Map(); const CACHE_TTL = 24 * 60 * 60 * 1000; // 24 hours // Clean up old cache entries periodically setInterval(() => { const now = Date.now(); for (const [key, value] of geocodeCache.entries()) { if (now - value.timestamp > CACHE_TTL) { geocodeCache.delete(key); } } }, 60 * 60 * 1000); /** * Alberta bounding box for validation * Ensures geocoded points are actually in Alberta */ const ALBERTA_BOUNDS = { north: 60.0, south: 49.0, east: -110.0, west: -120.0 }; /** * Multi-provider geocoding - tries providers in order until success * Premium providers first (when API key available), then free fallbacks */ const GEOCODING_PROVIDERS = [ { name: 'Mapbox', func: geocodeWithMapbox, enabled: () => !!process.env.MAPBOX_ACCESS_TOKEN, options: { timeout: 10000, delay: 0 } }, { name: 'Nominatim', func: geocodeWithNominatim, enabled: () => true, options: { timeout: 10000, delay: 1000 } }, { name: 'Photon', func: geocodeWithPhoton, enabled: () => true, options: { timeout: 10000, delay: 500 } }, { name: 'ArcGIS', func: geocodeWithArcGIS, enabled: () => true, options: { timeout: 10000, delay: 500 } } ]; /** * Geocode with Mapbox (premium provider) */ async function geocodeWithMapbox(address, options = {}) { const { timeout = 10000 } = options; const apiKey = process.env.MAPBOX_ACCESS_TOKEN; if (!apiKey) { throw new Error('Mapbox API key not configured'); } logger.info(`Geocoding with Mapbox: ${address}`); try { const url = `https://api.mapbox.com/geocoding/v5/mapbox.places/${encodeURIComponent(address)}.json`; const response = await axios.get(url, { params: { access_token: apiKey, limit: 1, country: 'ca', types: 'address,poi,place' }, timeout }); const data = response.data; if (!data.features || data.features.length === 0) { return null; } const result = data.features[0]; const [longitude, latitude] = result.center; // Extract address components from context const components = extractMapboxComponents(result); return { latitude, longitude, formattedAddress: result.place_name, provider: 'Mapbox', confidence: Math.round((result.relevance || 0.5) * 100), components, raw: result }; } catch (error) { logger.error('Mapbox geocoding error:', error.message); throw error; } } /** * Geocode with Nominatim (OpenStreetMap) */ async function geocodeWithNominatim(address, options = {}) { const { timeout = 10000, delay = 1000 } = options; if (delay > 0) { await new Promise(resolve => setTimeout(resolve, delay)); } const url = `https://nominatim.openstreetmap.org/search`; logger.info(`Geocoding with Nominatim: ${address}`); try { const response = await axios.get(url, { params: { format: 'json', q: address, limit: 1, addressdetails: 1, countrycodes: 'ca' }, headers: { 'User-Agent': 'FreeAlbertaFood/1.0 (https://freealberta.org)' }, timeout }); const data = response.data; if (!data || data.length === 0) { return null; } const result = data[0]; return { latitude: parseFloat(result.lat), longitude: parseFloat(result.lon), formattedAddress: result.display_name, provider: 'Nominatim', confidence: calculateNominatimConfidence(result), components: extractAddressComponents(result.address || {}), raw: result }; } catch (error) { logger.error('Nominatim geocoding error:', error.message); throw error; } } /** * Geocode with Photon (OpenStreetMap-based) */ async function geocodeWithPhoton(address, options = {}) { const { timeout = 10000, delay = 500 } = options; if (delay > 0) { await new Promise(resolve => setTimeout(resolve, delay)); } logger.info(`Geocoding with Photon: ${address}`); try { const response = await axios.get('https://photon.komoot.io/api/', { params: { q: address, limit: 1, lang: 'en' }, timeout }); if (!response.data?.features || response.data.features.length === 0) { return null; } const feature = response.data.features[0]; const coords = feature.geometry.coordinates; const props = feature.properties; return { latitude: coords[1], longitude: coords[0], formattedAddress: buildFormattedAddress(props), provider: 'Photon', confidence: calculatePhotonConfidence(feature), components: extractPhotonComponents(props), raw: feature }; } catch (error) { logger.error('Photon geocoding error:', error.message); throw error; } } /** * Geocode with ArcGIS World Geocoding Service (free tier) */ async function geocodeWithArcGIS(address, options = {}) { const { timeout = 10000, delay = 500 } = options; if (delay > 0) { await new Promise(resolve => setTimeout(resolve, delay)); } logger.info(`Geocoding with ArcGIS: ${address}`); try { const response = await axios.get('https://geocode-api.arcgis.com/arcgis/rest/services/World/GeocodeServer/findAddressCandidates', { params: { SingleLine: address, f: 'json', outFields: '*', maxLocations: 1, countryCode: 'CA' }, timeout }); if (!response.data?.candidates || response.data.candidates.length === 0) { return null; } const candidate = response.data.candidates[0]; const location = candidate.location; const attributes = candidate.attributes; return { latitude: location.y, longitude: location.x, formattedAddress: attributes.LongLabel || candidate.address, provider: 'ArcGIS', confidence: candidate.score || 50, components: extractArcGISComponents(attributes), raw: candidate }; } catch (error) { logger.error('ArcGIS geocoding error:', error.message); throw error; } } /** * Validate that coordinates are within Alberta */ function isInAlberta(lat, lng) { return lat >= ALBERTA_BOUNDS.south && lat <= ALBERTA_BOUNDS.north && lng >= ALBERTA_BOUNDS.west && lng <= ALBERTA_BOUNDS.east; } /** * Validate geocoding result against original address */ function validateGeocodeResult(originalAddress, result) { const validation = { isValid: true, confidence: result.confidence || 50, warnings: [] }; if (!result || !result.latitude || !result.longitude) { validation.isValid = false; validation.confidence = 0; return validation; } // Check if result is in Alberta if (!isInAlberta(result.latitude, result.longitude)) { validation.warnings.push('Result is outside Alberta'); validation.confidence -= 50; validation.isValid = false; } // Check for street number match if original has one const originalNumber = originalAddress.match(/^(\d+)/); if (originalNumber && result.components) { if (!result.components.house_number) { validation.warnings.push('Street number not found in result'); validation.confidence -= 25; } else if (result.components.house_number !== originalNumber[1]) { validation.warnings.push('Street number mismatch'); validation.confidence -= 30; } } // Penalize results that are just city-level (no street) if (result.components && !result.components.road && !result.components.house_number) { validation.warnings.push('Result is city-level only, not street address'); validation.confidence -= 20; } validation.confidence = Math.max(validation.confidence, 0); validation.isValid = validation.confidence >= 30; return validation; } /** * Forward geocode address to coordinates */ async function forwardGeocode(address) { if (!address || typeof address !== 'string' || address.trim().length === 0) { throw new Error('Invalid address'); } address = address.trim(); const cacheKey = `addr:${address.toLowerCase()}`; // Check cache const cached = geocodeCache.get(cacheKey); if (cached && Date.now() - cached.timestamp < CACHE_TTL) { logger.debug(`Geocoding cache hit for ${address}`); return cached.data; } // Build address variations for Alberta addresses const variations = buildAddressVariations(address); let bestResult = null; let bestValidation = null; let bestScore = 0; for (const provider of GEOCODING_PROVIDERS) { if (!provider.enabled()) continue; logger.info(`Trying provider: ${provider.name}`); for (const variation of variations) { try { const result = await provider.func(variation, provider.options); if (!result) continue; // Validate the result const validation = validateGeocodeResult(address, result); const score = (result.confidence + validation.confidence) / 2; logger.debug(`${provider.name} result: confidence=${result.confidence}, validation=${validation.confidence}, score=${score}`); if (score > bestScore) { bestResult = result; bestValidation = validation; bestScore = score; // If we have a high-confidence match, return immediately if (score >= 85 && validation.isValid) { bestResult.validation = validation; bestResult.combinedConfidence = score; geocodeCache.set(cacheKey, { data: bestResult, timestamp: Date.now() }); logger.info(`High-confidence result from ${provider.name}: ${score}`); return bestResult; } } } catch (error) { logger.warn(`${provider.name} failed for "${variation}": ${error.message}`); } } // If we have a good result from this provider, stop trying more if (bestScore >= 70) { logger.info(`Good result from ${provider.name}, stopping search`); break; } } if (bestResult) { bestResult.validation = bestValidation; bestResult.combinedConfidence = bestScore; geocodeCache.set(cacheKey, { data: bestResult, timestamp: Date.now() }); logger.info(`Best result: ${bestResult.provider} with score ${bestScore}`); return bestResult; } throw new Error('Could not geocode address'); } /** * Build address variations for geocoding attempts */ function buildAddressVariations(address) { const variations = new Set(); // Original address variations.add(address); // Add Alberta/Canada if not present if (!address.toLowerCase().includes('alberta') && !address.toLowerCase().includes(', ab')) { variations.add(`${address}, Alberta, Canada`); variations.add(`${address}, AB, Canada`); } // Expand quadrant abbreviations (common in Calgary/Edmonton) const quadrantExpansions = { ' NW': ' Northwest', ' NE': ' Northeast', ' SW': ' Southwest', ' SE': ' Southeast' }; for (const [abbrev, full] of Object.entries(quadrantExpansions)) { if (address.toUpperCase().includes(abbrev)) { variations.add(address.replace(new RegExp(abbrev, 'gi'), full)); } if (address.includes(full)) { variations.add(address.replace(new RegExp(full, 'gi'), abbrev.trim())); } } // Expand/contract street type abbreviations const streetTypes = { ' St ': ' Street ', ' St.': ' Street', ' Ave ': ' Avenue ', ' Ave.': ' Avenue', ' Rd ': ' Road ', ' Rd.': ' Road', ' Dr ': ' Drive ', ' Dr.': ' Drive', ' Cres ': ' Crescent ', ' Cres.': ' Crescent', ' Blvd ': ' Boulevard ', ' Blvd.': ' Boulevard' }; for (const [abbrev, full] of Object.entries(streetTypes)) { if (address.includes(abbrev)) { variations.add(address.replace(abbrev, full)); } if (address.includes(full)) { variations.add(address.replace(full, abbrev.replace('.', ''))); } } return Array.from(variations).slice(0, 6); // Limit to 6 variations } /** * Reverse geocode coordinates to address */ async function reverseGeocode(lat, lng) { const cacheKey = `rev:${lat.toFixed(6)},${lng.toFixed(6)}`; const cached = geocodeCache.get(cacheKey); if (cached && Date.now() - cached.timestamp < CACHE_TTL) { return cached.data; } await new Promise(resolve => setTimeout(resolve, 1000)); try { const response = await axios.get('https://nominatim.openstreetmap.org/reverse', { params: { format: 'json', lat, lon: lng, zoom: 18, addressdetails: 1 }, headers: { 'User-Agent': 'FreeAlbertaFood/1.0 (https://freealberta.org)' }, timeout: 10000 }); const result = { formattedAddress: response.data.display_name, components: extractAddressComponents(response.data.address || {}), latitude: parseFloat(response.data.lat), longitude: parseFloat(response.data.lon) }; geocodeCache.set(cacheKey, { data: result, timestamp: Date.now() }); return result; } catch (error) { logger.error('Reverse geocoding error:', error.message); throw error; } } // Helper functions function extractAddressComponents(address) { return { house_number: address.house_number || '', road: address.road || '', suburb: address.suburb || address.neighbourhood || '', city: address.city || address.town || address.village || '', state: address.state || address.province || '', postcode: address.postcode || '', country: address.country || '' }; } function extractPhotonComponents(props) { return { house_number: props.housenumber || '', road: props.street || '', suburb: props.district || '', city: props.city || '', state: props.state || '', postcode: props.postcode || '', country: props.country || '' }; } function extractMapboxComponents(result) { const components = { house_number: '', road: '', suburb: '', city: '', state: '', postcode: '', country: '' }; if (result.context && Array.isArray(result.context)) { result.context.forEach(item => { const id = item.id || ''; if (id.startsWith('postcode.')) components.postcode = item.text; else if (id.startsWith('place.')) components.city = item.text; else if (id.startsWith('region.')) components.state = item.text; else if (id.startsWith('country.')) components.country = item.text; else if (id.startsWith('neighborhood.')) components.suburb = item.text; }); } // Extract house number and street from place_name if (result.place_name) { const match = result.place_name.match(/^(\d+[A-Za-z]?)\s+(.+?),/); if (match) { components.house_number = match[1]; components.road = match[2]; } } return components; } function extractArcGISComponents(attributes) { return { house_number: attributes.AddNum || '', road: attributes.StName || '', suburb: attributes.District || '', city: attributes.City || '', state: attributes.Region || '', postcode: attributes.Postal || '', country: attributes.Country || '' }; } function buildFormattedAddress(props) { const parts = []; if (props.housenumber) parts.push(props.housenumber); if (props.street) parts.push(props.street); if (props.city) parts.push(props.city); if (props.state) parts.push(props.state); if (props.postcode) parts.push(props.postcode); return parts.join(', '); } function calculateNominatimConfidence(data) { let confidence = 100; if (!data.address?.house_number) confidence -= 20; if (!data.address?.road) confidence -= 30; if (data.type === 'administrative') confidence -= 25; return Math.max(confidence, 10); } function calculatePhotonConfidence(feature) { let confidence = 100; const props = feature.properties; if (!props.housenumber) confidence -= 20; if (!props.street) confidence -= 30; return Math.max(confidence, 10); } function getCacheStats() { return { size: geocodeCache.size, ttl: CACHE_TTL }; } function clearCache() { geocodeCache.clear(); } module.exports = { forwardGeocode, reverseGeocode, getCacheStats, clearCache };