const axios = require('axios'); const db = require('../models/db'); const logger = require('../utils/logger'); /* * 211 Alberta Scraper * * NOTE: ab.211.ca uses Cloudflare protection which blocks automated scraping. * This scraper is designed to work with their API if access is granted, * or can be used with manual data entry. * * Options for getting 211 data: * 1. Contact 211 Alberta to request API access * 2. Use their data export/sharing programs * 3. Manual data entry from their website * * For now, this provides a framework for importing 211 data. */ const SEARCH_LOCATIONS = [ { name: 'Calgary', lat: 51.0447, lng: -114.0719 }, { name: 'Edmonton', lat: 53.5461, lng: -113.4938 }, { name: 'Red Deer', lat: 52.2681, lng: -113.8112 }, { name: 'Lethbridge', lat: 49.6956, lng: -112.8451 }, { name: 'Medicine Hat', lat: 50.0405, lng: -110.6764 } ]; // Topic ID 58 = Food/Meals in 211's taxonomy const FOOD_TOPIC_ID = 58; async function attempt211Fetch(location) { const url = `https://ab.211.ca/api/v1/search`; try { const response = await axios.get(url, { params: { latitude: location.lat, longitude: location.lng, topicPath: FOOD_TOPIC_ID, distance: 50 }, headers: { 'User-Agent': 'FreeAlbertaFoodBot/1.0', 'Accept': 'application/json' }, timeout: 30000 }); return response.data; } catch (error) { if (error.response?.status === 403) { logger.warn('211 Alberta blocked request (Cloudflare protection)', { location: location.name }); } else { logger.error('211 fetch failed', { location: location.name, error: error.message }); } return null; } } async function importManualData(resources) { /* * Use this function to import manually collected 211 data. * Expected format: * [{ * name: 'Service Name', * description: 'Description', * address: 'Full address', * city: 'City', * phone: 'Phone number', * website: 'URL', * hours: 'Hours of operation', * type: 'food_bank|community_meal|hamper|pantry|etc' * }] */ let added = 0; let updated = 0; for (const resource of resources) { try { const sourceId = resource.id || `211-${resource.name}-${resource.city}`.replace(/\s+/g, '-').toLowerCase(); const result = await db.query(` INSERT INTO food_resources ( name, description, resource_type, address, city, phone, website, hours_of_operation, source, source_id, updated_at, last_verified_at ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, 'ab211', $9, NOW(), NOW()) ON CONFLICT (source, source_id) DO UPDATE SET name = EXCLUDED.name, description = COALESCE(EXCLUDED.description, food_resources.description), address = COALESCE(EXCLUDED.address, food_resources.address), phone = COALESCE(EXCLUDED.phone, food_resources.phone), website = COALESCE(EXCLUDED.website, food_resources.website), hours_of_operation = COALESCE(EXCLUDED.hours_of_operation, food_resources.hours_of_operation), updated_at = NOW() RETURNING (xmax = 0) AS inserted `, [ resource.name, resource.description || null, resource.type || 'other', resource.address || null, resource.city || null, resource.phone || null, resource.website || null, resource.hours || null, sourceId ]); if (result.rows[0].inserted) { added++; } else { updated++; } } catch (error) { logger.error('Failed to import 211 resource', { name: resource.name, error: error.message }); } } return { added, updated }; } async function scrape211Alberta() { logger.info('Starting 211 Alberta scrape'); logger.warn('Note: 211 Alberta has Cloudflare protection. API access may be required.'); const logResult = await db.query(` INSERT INTO scrape_logs (source, status) VALUES ('ab211', 'running') RETURNING id `); const logId = logResult.rows[0].id; let totalFound = 0; let totalAdded = 0; let totalUpdated = 0; try { for (const location of SEARCH_LOCATIONS) { const data = await attempt211Fetch(location); if (data && data.results) { totalFound += data.results.length; // Process results if we get any logger.info(`Found ${data.results.length} results for ${location.name}`); } // Rate limiting await new Promise(resolve => setTimeout(resolve, 2000)); } await db.query(` UPDATE scrape_logs SET completed_at = NOW(), status = 'completed', records_found = $1, records_added = $2, records_updated = $3 WHERE id = $4 `, [totalFound, totalAdded, totalUpdated, logId]); logger.info('211 Alberta scrape completed', { note: 'API access likely blocked by Cloudflare', found: totalFound }); return { found: totalFound, added: totalAdded, updated: totalUpdated }; } catch (error) { await db.query(` UPDATE scrape_logs SET completed_at = NOW(), status = 'failed', error_message = $1 WHERE id = $2 `, [error.message, logId]); throw error; } } // Run if called directly if (require.main === module) { scrape211Alberta() .then(result => { console.log('211 scrape attempted:', result); process.exit(0); }) .catch(err => { console.error('211 scrape failed:', err); process.exit(1); }); } module.exports = { scrape211Alberta, importManualData };