146 lines
4.8 KiB
JavaScript
Executable File
146 lines
4.8 KiB
JavaScript
Executable File
/**
|
|
* Agent Image Scraper for HomeProz
|
|
*/
|
|
|
|
const { chromium } = require('playwright');
|
|
const fs = require('fs');
|
|
const path = require('path');
|
|
const https = require('https');
|
|
|
|
const OUTPUT_DIR = path.join(__dirname, 'images');
|
|
|
|
async function downloadImage(url, filepath) {
|
|
return new Promise((resolve, reject) => {
|
|
const file = fs.createWriteStream(filepath);
|
|
https.get(url, {
|
|
headers: {
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
|
|
}
|
|
}, (response) => {
|
|
if (response.statusCode === 301 || response.statusCode === 302) {
|
|
downloadImage(response.headers.location, filepath).then(resolve).catch(reject);
|
|
return;
|
|
}
|
|
if (response.statusCode !== 200) {
|
|
reject(new Error(`Failed to download: ${response.statusCode}`));
|
|
return;
|
|
}
|
|
response.pipe(file);
|
|
file.on('finish', () => {
|
|
file.close();
|
|
resolve(filepath);
|
|
});
|
|
}).on('error', (err) => {
|
|
fs.unlink(filepath, () => {});
|
|
reject(err);
|
|
});
|
|
});
|
|
}
|
|
|
|
async function main() {
|
|
console.log('Starting agent image scraper...\n');
|
|
|
|
const browser = await chromium.launch({
|
|
headless: true,
|
|
args: ['--no-sandbox', '--disable-setuid-sandbox']
|
|
});
|
|
|
|
const context = await browser.newContext({
|
|
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
viewport: { width: 1920, height: 1080 }
|
|
});
|
|
|
|
const page = await context.newPage();
|
|
|
|
console.log('Loading homeprozrealestate.com...');
|
|
await page.goto('https://homeprozrealestate.com/', { waitUntil: 'networkidle', timeout: 60000 });
|
|
await page.waitForTimeout(3000);
|
|
|
|
// Take screenshot of team section
|
|
await page.screenshot({ path: path.join(__dirname, 'homepage.png'), fullPage: true });
|
|
console.log('Screenshot saved\n');
|
|
|
|
// Try to find agent images
|
|
const agentData = await page.evaluate(() => {
|
|
const agents = [];
|
|
|
|
// Look for image elements that might be agent photos
|
|
const allImages = document.querySelectorAll('img');
|
|
|
|
allImages.forEach(img => {
|
|
const src = img.src || img.dataset.src;
|
|
const alt = img.alt || '';
|
|
|
|
// Check if this might be an agent photo
|
|
if (src && (
|
|
alt.toLowerCase().includes('anna') ||
|
|
alt.toLowerCase().includes('davy') ||
|
|
alt.toLowerCase().includes('jordan') ||
|
|
alt.toLowerCase().includes('lily') ||
|
|
alt.toLowerCase().includes('agent') ||
|
|
alt.toLowerCase().includes('realtor') ||
|
|
src.includes('agent') ||
|
|
src.includes('team')
|
|
)) {
|
|
agents.push({
|
|
src: src,
|
|
alt: alt,
|
|
width: img.naturalWidth || img.width,
|
|
height: img.naturalHeight || img.height
|
|
});
|
|
}
|
|
});
|
|
|
|
// Also get all images with decent size (likely profile photos)
|
|
const largeImages = [];
|
|
allImages.forEach(img => {
|
|
const src = img.src || img.dataset.src;
|
|
if (src && !src.includes('logo') && !src.includes('icon')) {
|
|
const rect = img.getBoundingClientRect();
|
|
if (rect.width > 100 && rect.height > 100) {
|
|
largeImages.push({
|
|
src: src,
|
|
alt: img.alt || '',
|
|
width: rect.width,
|
|
height: rect.height
|
|
});
|
|
}
|
|
}
|
|
});
|
|
|
|
return { agents, largeImages };
|
|
});
|
|
|
|
console.log('Agent-related images found:', agentData.agents.length);
|
|
console.log('Large images found:', agentData.largeImages.length);
|
|
|
|
// Save all found image URLs
|
|
fs.writeFileSync(
|
|
path.join(__dirname, 'found-images.json'),
|
|
JSON.stringify(agentData, null, 2)
|
|
);
|
|
|
|
// Download large images
|
|
let imgCount = 0;
|
|
for (const img of agentData.largeImages) {
|
|
if (img.src && img.src.startsWith('http')) {
|
|
imgCount++;
|
|
const ext = img.src.includes('.png') ? 'png' : 'jpg';
|
|
const filename = `image-${imgCount}.${ext}`;
|
|
const filepath = path.join(OUTPUT_DIR, filename);
|
|
|
|
try {
|
|
await downloadImage(img.src, filepath);
|
|
console.log(`Downloaded: ${filename} (${img.alt || 'no alt'})`);
|
|
} catch (err) {
|
|
console.log(`Failed to download ${img.src}: ${err.message}`);
|
|
}
|
|
}
|
|
}
|
|
|
|
await browser.close();
|
|
console.log('\nDone!');
|
|
}
|
|
|
|
main().catch(console.error);
|