diff --git a/map/app/controllers/dataConvertController.js b/map/app/controllers/dataConvertController.js index f55a87e..40355f5 100644 --- a/map/app/controllers/dataConvertController.js +++ b/map/app/controllers/dataConvertController.js @@ -15,6 +15,7 @@ class DataConvertController { this.parseCSV = this.parseCSV.bind(this); this.saveGeocodedData = this.saveGeocodedData.bind(this); this.downloadReport = this.downloadReport.bind(this); + this.scanAndGeocode = this.scanAndGeocode.bind(this); } // Process CSV upload and geocode addresses with SSE progress updates @@ -152,7 +153,10 @@ class DataConvertController { if (geocodeResult && geocodeResult.coordinates) { // Check if result is malformed const isMalformed = geocodeResult.validation && geocodeResult.validation.isMalformed; - const confidence = geocodeResult.validation ? geocodeResult.validation.confidence : 100; + // Use combined confidence for best overall assessment + const confidence = geocodeResult.combinedConfidence !== undefined ? + geocodeResult.combinedConfidence : + (geocodeResult.validation ? geocodeResult.validation.confidence : 100); const warnings = geocodeResult.validation ? geocodeResult.validation.warnings : []; const processedRow = { @@ -165,6 +169,8 @@ class DataConvertController { geocode_status: isMalformed ? 'WARNING' : 'SUCCESS', geocode_error: '', confidence_score: confidence, + provider_confidence: geocodeResult.providerConfidence || null, + validation_confidence: geocodeResult.validation ? geocodeResult.validation.confidence : null, warnings: warnings.join('; '), is_malformed: isMalformed, provider: geocodeResult.provider || 'Unknown', @@ -333,6 +339,8 @@ class DataConvertController { latitude: parseFloat(location.latitude), longitude: parseFloat(location.longitude), Address: originalAddress, // Always use the original address from CSV + 'Geocode Confidence': location.confidence_score || null, // Add confidence score + 'Geocode Provider': location.provider || null, // Add provider name created_by_user: req.session.userEmail || 'csv_import', last_updated_by_user: req.session.userEmail || 'csv_import' }; @@ -367,7 +375,7 @@ class DataConvertController { const lowerKey = key.toLowerCase(); // Skip already processed fields - if (['latitude', 'longitude', 'geo-location', 'geocoded_address', 'geocode_success', 'address', 'csv_filename'].includes(lowerKey)) { + if (['latitude', 'longitude', 'geo-location', 'geocoded_address', 'geocode_success', 'address', 'csv_filename', 'confidence_score', 'provider_confidence', 'validation_confidence', 'warnings', 'is_malformed', 'provider', 'row_number', 'geocode_status', 'geocode_error'].includes(lowerKey)) { return; } @@ -402,6 +410,21 @@ class DataConvertController { noteParts.push(`Geocoded as: ${geocodedAddress}`); } + // Add confidence information if available + if (location.confidence_score !== undefined && location.confidence_score !== null) { + noteParts.push(`Geocode confidence: ${location.confidence_score}%`); + } + + // Add provider information if available + if (location.provider) { + noteParts.push(`Provider: ${location.provider}`); + } + + // Add warnings if present + if (location.warnings && location.warnings.trim()) { + noteParts.push(`Warnings: ${location.warnings}`); + } + locationData[targetField] = noteParts.join(' | '); } else { locationData[targetField] = location[key]; @@ -418,6 +441,21 @@ class DataConvertController { noteParts.push(`Geocoded as: ${geocodedAddress}`); } + // Add confidence information if available + if (location.confidence_score !== undefined && location.confidence_score !== null) { + noteParts.push(`Geocode confidence: ${location.confidence_score}%`); + } + + // Add provider information if available + if (location.provider) { + noteParts.push(`Provider: ${location.provider}`); + } + + // Add warnings if present + if (location.warnings && location.warnings.trim()) { + noteParts.push(`Warnings: ${location.warnings}`); + } + locationData['Notes'] = noteParts.join(' | '); } @@ -455,6 +493,7 @@ class DataConvertController { async downloadReport(req, res) { try { const { sessionId } = req.params; + const format = req.query.format || 'csv'; // Default to CSV, support 'txt' for backward compatibility if (!sessionId || !processingResults.has(sessionId)) { return res.status(404).json({ @@ -466,16 +505,27 @@ class DataConvertController { const results = processingResults.get(sessionId); const { filename, timestamp, allResults, summary } = results; - // Generate comprehensive report content - const reportContent = this.generateComprehensiveReport(allResults, filename, timestamp, summary); + let reportContent, contentType, fileExtension; - // Set headers for text download - const reportFilename = `geocoding-report-${sessionId}.txt`; - res.setHeader('Content-Type', 'text/plain'); + if (format === 'csv') { + // Generate CSV report + reportContent = this.generateReportCSV(allResults, filename, timestamp, summary); + contentType = 'text/csv'; + fileExtension = 'csv'; + } else { + // Generate text report (backward compatibility) + reportContent = this.generateComprehensiveReport(allResults, filename, timestamp, summary); + contentType = 'text/plain'; + fileExtension = 'txt'; + } + + // Set headers for download + const reportFilename = `geocoding-report-${sessionId}.${fileExtension}`; + res.setHeader('Content-Type', contentType); res.setHeader('Content-Disposition', `attachment; filename="${reportFilename}"`); res.setHeader('Cache-Control', 'no-cache'); - logger.info(`Generating comprehensive report for session ${sessionId}: ${allResults.length} records`); + logger.info(`Generating ${format.toUpperCase()} report for session ${sessionId}: ${allResults.length} records`); res.send(reportContent); @@ -651,6 +701,290 @@ class DataConvertController { return stringField; } + + // Scan NocoDB database for records missing geo-location data and geocode them + async scanAndGeocode(req, res) { + try { + const sessionId = Date.now().toString(); + + // Set up SSE headers + res.writeHead(200, { + 'Content-Type': 'text/event-stream', + 'Cache-Control': 'no-cache', + 'Connection': 'keep-alive', + 'X-Accel-Buffering': 'no' + }); + + logger.info(`Starting database scan for missing geo-location data (session: ${sessionId})`); + + // Send initial status + res.write(`data: ${JSON.stringify({ + type: 'status', + message: 'Scanning database for records missing geo-location data...', + sessionId: sessionId + })}\n\n`); + res.flush && res.flush(); + + // Fetch all records from NocoDB + let allRecords = []; + let offset = 0; + const limit = 100; // Process in batches + let hasMoreRecords = true; + + while (hasMoreRecords) { + try { + const response = await nocodbService.getAll(config.nocodb.tableId, { limit, offset }); + + if (response && response.list && response.list.length > 0) { + allRecords.push(...response.list); + offset += limit; + + // Send progress update + res.write(`data: ${JSON.stringify({ + type: 'scanning', + message: `Fetched ${allRecords.length} records from database...`, + count: allRecords.length + })}\n\n`); + res.flush && res.flush(); + + // Check if we've fetched all records + hasMoreRecords = response.list.length === limit; + } else { + hasMoreRecords = false; + } + } catch (error) { + logger.error('Error fetching records from NocoDB:', error); + res.write(`data: ${JSON.stringify({ + type: 'error', + message: `Error fetching records: ${error.message}` + })}\n\n`); + res.end(); + return; + } + } + + logger.info(`Database scan complete: found ${allRecords.length} total records`); + + // Filter records that need geocoding + const recordsNeedingGeocode = allRecords.filter(record => { + // Check if record is missing geo-location data + const hasGeoLocation = record['Geo-Location'] && + record['Geo-Location'].trim() !== '' && + record['Geo-Location'] !== 'null'; + const hasCoordinates = (record.latitude && record.longitude) || + (record.Latitude && record.Longitude); + const hasAddress = record.Address || record.address || record.ADDRESS; + + return !hasGeoLocation && !hasCoordinates && hasAddress; + }); + + const totalToGeocode = recordsNeedingGeocode.length; + + logger.info(`Found ${totalToGeocode} records needing geocoding`); + + // Send summary + res.write(`data: ${JSON.stringify({ + type: 'scan_complete', + message: `Scan complete: ${totalToGeocode} records need geocoding`, + total: allRecords.length, + needingGeocode: totalToGeocode + })}\n\n`); + res.flush && res.flush(); + + if (totalToGeocode === 0) { + res.write(`data: ${JSON.stringify({ + type: 'complete', + message: 'No records found that need geocoding. All records already have location data!', + results: { success: 0, failed: 0, skipped: allRecords.length } + })}\n\n`); + res.end(); + return; + } + + // Process geocoding + const results = { + success: 0, + failed: 0, + errors: [], + sessionId: sessionId + }; + + const allResults = []; + let processedCount = 0; + + for (const record of recordsNeedingGeocode) { + try { + processedCount++; + const address = record.Address || record.address || record.ADDRESS; + + // Send progress update + res.write(`data: ${JSON.stringify({ + type: 'progress', + current: processedCount, + total: totalToGeocode, + currentAddress: address, + status: 'processing' + })}\n\n`); + res.flush && res.flush(); + + logger.info(`Geocoding ${processedCount}/${totalToGeocode}: ${address} (Record ID: ${record.id || record.Id || record.ID})`); + + // Geocode the address + const geocodeResult = await forwardGeocode(address); + + if (geocodeResult && geocodeResult.coordinates) { + // Check if result is malformed + const isMalformed = geocodeResult.validation && geocodeResult.validation.isMalformed; + // Use combined confidence for best overall assessment + const confidence = geocodeResult.combinedConfidence !== undefined ? + geocodeResult.combinedConfidence : + (geocodeResult.validation ? geocodeResult.validation.confidence : 100); + const warnings = geocodeResult.validation ? geocodeResult.validation.warnings : []; + + // Update the record in NocoDB + const updateData = { + 'Geo-Location': `${geocodeResult.coordinates.lat};${geocodeResult.coordinates.lng}`, + latitude: geocodeResult.coordinates.lat, + longitude: geocodeResult.coordinates.lng, + 'Geocode Confidence': confidence, + 'Geocode Provider': geocodeResult.provider || 'Unknown', + last_updated_by_user: req.session?.userEmail || 'scan_geocode' + }; + + // Update the record in NocoDB + await nocodbService.update(config.nocodb.tableId, record.id || record.Id || record.ID, updateData); + + const processedRecord = { + id: record.id || record.Id || record.ID, + address: address, + latitude: geocodeResult.coordinates.lat, + longitude: geocodeResult.coordinates.lng, + confidence_score: confidence, + provider: geocodeResult.provider || 'Unknown', + status: isMalformed ? 'WARNING' : 'SUCCESS', + warnings: warnings.join('; ') + }; + + allResults.push(processedRecord); + results.success++; + + // Send success update + const successMessage = { + type: 'geocoded', + data: processedRecord, + index: processedCount - 1, + status: isMalformed ? 'warning' : 'success', + confidence: confidence, + warnings: warnings + }; + + logger.info(`â Successfully geocoded and updated: ${address} (Confidence: ${confidence}%)`); + res.write(`data: ${JSON.stringify(successMessage)}\n\n`); + res.flush && res.flush(); + + } else { + throw new Error('Geocoding failed - no coordinates returned'); + } + + } catch (error) { + logger.error(`Failed to geocode record ${processedCount}/${totalToGeocode}:`, error.message); + + const errorRecord = { + id: record.id || record.Id || record.ID, + address: record.Address || record.address || record.ADDRESS, + error: error.message, + status: 'ERROR' + }; + + allResults.push(errorRecord); + results.failed++; + results.errors.push({ + address: errorRecord.address, + error: error.message + }); + + // Send error update + res.write(`data: ${JSON.stringify({ + type: 'error', + data: errorRecord, + index: processedCount - 1, + message: `Failed to geocode: ${errorRecord.address}` + })}\n\n`); + res.flush && res.flush(); + } + + // Rate limiting to be nice to geocoding APIs + if (processedCount < totalToGeocode) { + await new Promise(resolve => setTimeout(resolve, 500)); // 0.5 second delay between requests + } + } + + // Calculate summary statistics for report + const successful = allResults.filter(r => r.status === 'SUCCESS').length; + const warnings = allResults.filter(r => r.status === 'WARNING').length; + const failed = allResults.filter(r => r.status === 'ERROR').length; + const malformed = allResults.filter(r => r.warnings && r.warnings.includes('malformed')).length; + const total = successful + warnings + failed; + + // Transform scan results to match CSV processing format for report generation + const transformedResults = allResults.map(result => ({ + // Original format fields + address: result.address, + Address: result.address, + geocoded_address: result.address, // For scan, this is the same + latitude: result.latitude, + longitude: result.longitude, + 'Geo-Location': result.latitude && result.longitude ? `${result.latitude};${result.longitude}` : '', + confidence_score: result.confidence_score, + provider: result.provider, + + // Status mapping + geocode_success: result.status !== 'ERROR', + geocode_status: result.status, + geocode_error: result.error || '', + is_malformed: result.warnings && result.warnings.includes('malformed'), + warnings: result.warnings || '', + + // Scan-specific fields + record_id: result.id, + source: 'database_scan', + row_number: result.id // Use record ID as row number for scan + })); + + // Store results for potential report download + processingResults.set(sessionId, { + filename: 'database_scan', + timestamp: new Date().toISOString(), + allResults: transformedResults, + summary: { + total: total, + successful: successful, + warnings: warnings, + failed: failed, + malformed: malformed + } + }); + + // Send completion message + logger.info(`Database scan and geocoding completed: ${results.success} successful, ${results.failed} failed`); + + res.write(`data: ${JSON.stringify({ + type: 'complete', + message: `Scan and geocode completed! Successfully updated ${results.success} records, ${results.failed} failed.`, + results: results, + sessionId: sessionId + })}\n\n`); + res.end(); + + } catch (error) { + logger.error('Database scan error:', error); + res.write(`data: ${JSON.stringify({ + type: 'error', + message: `Database scan failed: ${error.message}` + })}\n\n`); + res.end(); + } + } } module.exports = new DataConvertController(); diff --git a/map/app/public/admin.html b/map/app/public/admin.html index 623e538..3360762 100644 --- a/map/app/public/admin.html +++ b/map/app/public/admin.html @@ -1255,6 +1255,116 @@ + + +
Scan your existing database for records that are missing location data and automatically geocode them.
+ +