/** * Agent Image Scraper for HomeProz */ const { chromium } = require('playwright'); const fs = require('fs'); const path = require('path'); const https = require('https'); const OUTPUT_DIR = path.join(__dirname, 'images'); async function downloadImage(url, filepath) { return new Promise((resolve, reject) => { const file = fs.createWriteStream(filepath); https.get(url, { headers: { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' } }, (response) => { if (response.statusCode === 301 || response.statusCode === 302) { downloadImage(response.headers.location, filepath).then(resolve).catch(reject); return; } if (response.statusCode !== 200) { reject(new Error(`Failed to download: ${response.statusCode}`)); return; } response.pipe(file); file.on('finish', () => { file.close(); resolve(filepath); }); }).on('error', (err) => { fs.unlink(filepath, () => {}); reject(err); }); }); } async function main() { console.log('Starting agent image scraper...\n'); const browser = await chromium.launch({ headless: true, args: ['--no-sandbox', '--disable-setuid-sandbox'] }); const context = await browser.newContext({ userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', viewport: { width: 1920, height: 1080 } }); const page = await context.newPage(); console.log('Loading homeprozrealestate.com...'); await page.goto('https://homeprozrealestate.com/', { waitUntil: 'networkidle', timeout: 60000 }); await page.waitForTimeout(3000); // Take screenshot of team section await page.screenshot({ path: path.join(__dirname, 'homepage.png'), fullPage: true }); console.log('Screenshot saved\n'); // Try to find agent images const agentData = await page.evaluate(() => { const agents = []; // Look for image elements that might be agent photos const allImages = document.querySelectorAll('img'); allImages.forEach(img => { const src = img.src || img.dataset.src; const alt = img.alt || ''; // Check if this might be an agent photo if (src && ( alt.toLowerCase().includes('anna') || alt.toLowerCase().includes('davy') || alt.toLowerCase().includes('jordan') || alt.toLowerCase().includes('lily') || alt.toLowerCase().includes('agent') || alt.toLowerCase().includes('realtor') || src.includes('agent') || src.includes('team') )) { agents.push({ src: src, alt: alt, width: img.naturalWidth || img.width, height: img.naturalHeight || img.height }); } }); // Also get all images with decent size (likely profile photos) const largeImages = []; allImages.forEach(img => { const src = img.src || img.dataset.src; if (src && !src.includes('logo') && !src.includes('icon')) { const rect = img.getBoundingClientRect(); if (rect.width > 100 && rect.height > 100) { largeImages.push({ src: src, alt: img.alt || '', width: rect.width, height: rect.height }); } } }); return { agents, largeImages }; }); console.log('Agent-related images found:', agentData.agents.length); console.log('Large images found:', agentData.largeImages.length); // Save all found image URLs fs.writeFileSync( path.join(__dirname, 'found-images.json'), JSON.stringify(agentData, null, 2) ); // Download large images let imgCount = 0; for (const img of agentData.largeImages) { if (img.src && img.src.startsWith('http')) { imgCount++; const ext = img.src.includes('.png') ? 'png' : 'jpg'; const filename = `image-${imgCount}.${ext}`; const filepath = path.join(OUTPUT_DIR, filename); try { await downloadImage(img.src, filepath); console.log(`Downloaded: ${filename} (${img.alt || 'no alt'})`); } catch (err) { console.log(`Failed to download ${img.src}: ${err.message}`); } } } await browser.close(); console.log('\nDone!'); } main().catch(console.error);