207 lines
5.6 KiB
JavaScript

const axios = require('axios');
const db = require('../models/db');
const logger = require('../utils/logger');
/*
* 211 Alberta Scraper
*
* NOTE: ab.211.ca uses Cloudflare protection which blocks automated scraping.
* This scraper is designed to work with their API if access is granted,
* or can be used with manual data entry.
*
* Options for getting 211 data:
* 1. Contact 211 Alberta to request API access
* 2. Use their data export/sharing programs
* 3. Manual data entry from their website
*
* For now, this provides a framework for importing 211 data.
*/
const SEARCH_LOCATIONS = [
{ name: 'Calgary', lat: 51.0447, lng: -114.0719 },
{ name: 'Edmonton', lat: 53.5461, lng: -113.4938 },
{ name: 'Red Deer', lat: 52.2681, lng: -113.8112 },
{ name: 'Lethbridge', lat: 49.6956, lng: -112.8451 },
{ name: 'Medicine Hat', lat: 50.0405, lng: -110.6764 }
];
// Topic ID 58 = Food/Meals in 211's taxonomy
const FOOD_TOPIC_ID = 58;
async function attempt211Fetch(location) {
const url = `https://ab.211.ca/api/v1/search`;
try {
const response = await axios.get(url, {
params: {
latitude: location.lat,
longitude: location.lng,
topicPath: FOOD_TOPIC_ID,
distance: 50
},
headers: {
'User-Agent': 'FreeAlbertaFoodBot/1.0',
'Accept': 'application/json'
},
timeout: 30000
});
return response.data;
} catch (error) {
if (error.response?.status === 403) {
logger.warn('211 Alberta blocked request (Cloudflare protection)', {
location: location.name
});
} else {
logger.error('211 fetch failed', {
location: location.name,
error: error.message
});
}
return null;
}
}
async function importManualData(resources) {
/*
* Use this function to import manually collected 211 data.
* Expected format:
* [{
* name: 'Service Name',
* description: 'Description',
* address: 'Full address',
* city: 'City',
* phone: 'Phone number',
* website: 'URL',
* hours: 'Hours of operation',
* type: 'food_bank|community_meal|hamper|pantry|etc'
* }]
*/
let added = 0;
let updated = 0;
for (const resource of resources) {
try {
const sourceId = resource.id ||
`211-${resource.name}-${resource.city}`.replace(/\s+/g, '-').toLowerCase();
const result = await db.query(`
INSERT INTO food_resources (
name, description, resource_type,
address, city, phone, website,
hours_of_operation, source, source_id,
updated_at, last_verified_at
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, 'ab211', $9, NOW(), NOW())
ON CONFLICT (source, source_id)
DO UPDATE SET
name = EXCLUDED.name,
description = COALESCE(EXCLUDED.description, food_resources.description),
address = COALESCE(EXCLUDED.address, food_resources.address),
phone = COALESCE(EXCLUDED.phone, food_resources.phone),
website = COALESCE(EXCLUDED.website, food_resources.website),
hours_of_operation = COALESCE(EXCLUDED.hours_of_operation, food_resources.hours_of_operation),
updated_at = NOW()
RETURNING (xmax = 0) AS inserted
`, [
resource.name,
resource.description || null,
resource.type || 'other',
resource.address || null,
resource.city || null,
resource.phone || null,
resource.website || null,
resource.hours || null,
sourceId
]);
if (result.rows[0].inserted) {
added++;
} else {
updated++;
}
} catch (error) {
logger.error('Failed to import 211 resource', {
name: resource.name,
error: error.message
});
}
}
return { added, updated };
}
async function scrape211Alberta() {
logger.info('Starting 211 Alberta scrape');
logger.warn('Note: 211 Alberta has Cloudflare protection. API access may be required.');
const logResult = await db.query(`
INSERT INTO scrape_logs (source, status)
VALUES ('ab211', 'running')
RETURNING id
`);
const logId = logResult.rows[0].id;
let totalFound = 0;
let totalAdded = 0;
let totalUpdated = 0;
try {
for (const location of SEARCH_LOCATIONS) {
const data = await attempt211Fetch(location);
if (data && data.results) {
totalFound += data.results.length;
// Process results if we get any
logger.info(`Found ${data.results.length} results for ${location.name}`);
}
// Rate limiting
await new Promise(resolve => setTimeout(resolve, 2000));
}
await db.query(`
UPDATE scrape_logs
SET completed_at = NOW(),
status = 'completed',
records_found = $1,
records_added = $2,
records_updated = $3
WHERE id = $4
`, [totalFound, totalAdded, totalUpdated, logId]);
logger.info('211 Alberta scrape completed', {
note: 'API access likely blocked by Cloudflare',
found: totalFound
});
return { found: totalFound, added: totalAdded, updated: totalUpdated };
} catch (error) {
await db.query(`
UPDATE scrape_logs
SET completed_at = NOW(),
status = 'failed',
error_message = $1
WHERE id = $2
`, [error.message, logId]);
throw error;
}
}
// Run if called directly
if (require.main === module) {
scrape211Alberta()
.then(result => {
console.log('211 scrape attempted:', result);
process.exit(0);
})
.catch(err => {
console.error('211 scrape failed:', err);
process.exit(1);
});
}
module.exports = { scrape211Alberta, importManualData };