207 lines
5.6 KiB
JavaScript
207 lines
5.6 KiB
JavaScript
const axios = require('axios');
|
|
const db = require('../models/db');
|
|
const logger = require('../utils/logger');
|
|
|
|
/*
|
|
* 211 Alberta Scraper
|
|
*
|
|
* NOTE: ab.211.ca uses Cloudflare protection which blocks automated scraping.
|
|
* This scraper is designed to work with their API if access is granted,
|
|
* or can be used with manual data entry.
|
|
*
|
|
* Options for getting 211 data:
|
|
* 1. Contact 211 Alberta to request API access
|
|
* 2. Use their data export/sharing programs
|
|
* 3. Manual data entry from their website
|
|
*
|
|
* For now, this provides a framework for importing 211 data.
|
|
*/
|
|
|
|
const SEARCH_LOCATIONS = [
|
|
{ name: 'Calgary', lat: 51.0447, lng: -114.0719 },
|
|
{ name: 'Edmonton', lat: 53.5461, lng: -113.4938 },
|
|
{ name: 'Red Deer', lat: 52.2681, lng: -113.8112 },
|
|
{ name: 'Lethbridge', lat: 49.6956, lng: -112.8451 },
|
|
{ name: 'Medicine Hat', lat: 50.0405, lng: -110.6764 }
|
|
];
|
|
|
|
// Topic ID 58 = Food/Meals in 211's taxonomy
|
|
const FOOD_TOPIC_ID = 58;
|
|
|
|
async function attempt211Fetch(location) {
|
|
const url = `https://ab.211.ca/api/v1/search`;
|
|
|
|
try {
|
|
const response = await axios.get(url, {
|
|
params: {
|
|
latitude: location.lat,
|
|
longitude: location.lng,
|
|
topicPath: FOOD_TOPIC_ID,
|
|
distance: 50
|
|
},
|
|
headers: {
|
|
'User-Agent': 'FreeAlbertaFoodBot/1.0',
|
|
'Accept': 'application/json'
|
|
},
|
|
timeout: 30000
|
|
});
|
|
|
|
return response.data;
|
|
} catch (error) {
|
|
if (error.response?.status === 403) {
|
|
logger.warn('211 Alberta blocked request (Cloudflare protection)', {
|
|
location: location.name
|
|
});
|
|
} else {
|
|
logger.error('211 fetch failed', {
|
|
location: location.name,
|
|
error: error.message
|
|
});
|
|
}
|
|
return null;
|
|
}
|
|
}
|
|
|
|
async function importManualData(resources) {
|
|
/*
|
|
* Use this function to import manually collected 211 data.
|
|
* Expected format:
|
|
* [{
|
|
* name: 'Service Name',
|
|
* description: 'Description',
|
|
* address: 'Full address',
|
|
* city: 'City',
|
|
* phone: 'Phone number',
|
|
* website: 'URL',
|
|
* hours: 'Hours of operation',
|
|
* type: 'food_bank|community_meal|hamper|pantry|etc'
|
|
* }]
|
|
*/
|
|
|
|
let added = 0;
|
|
let updated = 0;
|
|
|
|
for (const resource of resources) {
|
|
try {
|
|
const sourceId = resource.id ||
|
|
`211-${resource.name}-${resource.city}`.replace(/\s+/g, '-').toLowerCase();
|
|
|
|
const result = await db.query(`
|
|
INSERT INTO food_resources (
|
|
name, description, resource_type,
|
|
address, city, phone, website,
|
|
hours_of_operation, source, source_id,
|
|
updated_at, last_verified_at
|
|
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, 'ab211', $9, NOW(), NOW())
|
|
ON CONFLICT (source, source_id)
|
|
DO UPDATE SET
|
|
name = EXCLUDED.name,
|
|
description = COALESCE(EXCLUDED.description, food_resources.description),
|
|
address = COALESCE(EXCLUDED.address, food_resources.address),
|
|
phone = COALESCE(EXCLUDED.phone, food_resources.phone),
|
|
website = COALESCE(EXCLUDED.website, food_resources.website),
|
|
hours_of_operation = COALESCE(EXCLUDED.hours_of_operation, food_resources.hours_of_operation),
|
|
updated_at = NOW()
|
|
RETURNING (xmax = 0) AS inserted
|
|
`, [
|
|
resource.name,
|
|
resource.description || null,
|
|
resource.type || 'other',
|
|
resource.address || null,
|
|
resource.city || null,
|
|
resource.phone || null,
|
|
resource.website || null,
|
|
resource.hours || null,
|
|
sourceId
|
|
]);
|
|
|
|
if (result.rows[0].inserted) {
|
|
added++;
|
|
} else {
|
|
updated++;
|
|
}
|
|
} catch (error) {
|
|
logger.error('Failed to import 211 resource', {
|
|
name: resource.name,
|
|
error: error.message
|
|
});
|
|
}
|
|
}
|
|
|
|
return { added, updated };
|
|
}
|
|
|
|
async function scrape211Alberta() {
|
|
logger.info('Starting 211 Alberta scrape');
|
|
logger.warn('Note: 211 Alberta has Cloudflare protection. API access may be required.');
|
|
|
|
const logResult = await db.query(`
|
|
INSERT INTO scrape_logs (source, status)
|
|
VALUES ('ab211', 'running')
|
|
RETURNING id
|
|
`);
|
|
const logId = logResult.rows[0].id;
|
|
|
|
let totalFound = 0;
|
|
let totalAdded = 0;
|
|
let totalUpdated = 0;
|
|
|
|
try {
|
|
for (const location of SEARCH_LOCATIONS) {
|
|
const data = await attempt211Fetch(location);
|
|
|
|
if (data && data.results) {
|
|
totalFound += data.results.length;
|
|
// Process results if we get any
|
|
logger.info(`Found ${data.results.length} results for ${location.name}`);
|
|
}
|
|
|
|
// Rate limiting
|
|
await new Promise(resolve => setTimeout(resolve, 2000));
|
|
}
|
|
|
|
await db.query(`
|
|
UPDATE scrape_logs
|
|
SET completed_at = NOW(),
|
|
status = 'completed',
|
|
records_found = $1,
|
|
records_added = $2,
|
|
records_updated = $3
|
|
WHERE id = $4
|
|
`, [totalFound, totalAdded, totalUpdated, logId]);
|
|
|
|
logger.info('211 Alberta scrape completed', {
|
|
note: 'API access likely blocked by Cloudflare',
|
|
found: totalFound
|
|
});
|
|
|
|
return { found: totalFound, added: totalAdded, updated: totalUpdated };
|
|
|
|
} catch (error) {
|
|
await db.query(`
|
|
UPDATE scrape_logs
|
|
SET completed_at = NOW(),
|
|
status = 'failed',
|
|
error_message = $1
|
|
WHERE id = $2
|
|
`, [error.message, logId]);
|
|
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
// Run if called directly
|
|
if (require.main === module) {
|
|
scrape211Alberta()
|
|
.then(result => {
|
|
console.log('211 scrape attempted:', result);
|
|
process.exit(0);
|
|
})
|
|
.catch(err => {
|
|
console.error('211 scrape failed:', err);
|
|
process.exit(1);
|
|
});
|
|
}
|
|
|
|
module.exports = { scrape211Alberta, importManualData };
|