Files
homeproz/wp-content/plugins/mls-by-hansonxyz/includes/class-mls-media-handler.php
T
Hanson.xyz Dev 6eadf3d266 Add queue-based media download system with rate limiting
- Add download_status, retry_after, queued_at columns to mls_media table
- Add mls_media_log table for download attempt tracking
- Rewrite media handler to queue downloads instead of immediate download
- Add 700ms delay between downloads (25% buffer over 2/sec limit)
- Add 3-hour backoff for rate-limited (429) responses
- Add max 5 attempts before marking as permanently failed
- Add wp mls media command: status, process, reset, logs
- Deprecate wp mls sync media in favor of wp mls media process
- Update documentation with queue system details and cron examples

Media downloads are now separate from property sync:
1. wp mls sync full/incremental - syncs properties, queues media
2. wp mls media process - downloads queued media with rate limiting

Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-14 22:52:58 -06:00

930 lines
29 KiB
PHP

<?php
/**
* MLS Media Handler
*
* Handles downloading and managing media files from MLS listings
* Uses a queue-based system with rate limiting to comply with API limits
*
* Rate limits: 2 requests/second (500ms minimum between requests)
* We use 700ms between requests (25% buffer)
*/
if (!defined('ABSPATH')) {
exit;
}
class MLS_Media_Handler {
/**
* Upload subdirectory for MLS media
*/
const UPLOAD_SUBDIR = 'mls-listings';
/**
* Minimum delay between media downloads in milliseconds (700ms = 25% buffer over 500ms limit)
*/
const DOWNLOAD_DELAY_MS = 700;
/**
* Retry backoff time in hours for failed downloads
*/
const RETRY_BACKOFF_HOURS = 3;
/**
* Maximum download attempts before permanent failure
*/
const MAX_ATTEMPTS = 5;
/**
* Database instance
*/
private $db;
/**
* Logger instance
*/
private $logger;
/**
* Constructor
*/
public function __construct(MLS_DB $db, MLS_Logger $logger) {
$this->db = $db;
$this->logger = $logger;
}
/**
* Get base upload directory for MLS media
*
* @return string Absolute path
*/
public function get_upload_dir() {
$upload_dir = wp_upload_dir();
return $upload_dir['basedir'] . '/' . self::UPLOAD_SUBDIR;
}
/**
* Get base upload URL for MLS media
*
* @return string URL
*/
public function get_upload_url() {
$upload_dir = wp_upload_dir();
return $upload_dir['baseurl'] . '/' . self::UPLOAD_SUBDIR;
}
/**
* Get storage directory for a specific listing
*
* @param string $listing_key Listing key
* @return string Absolute path
*/
public function get_listing_dir($listing_key) {
// Use first 2 characters as subdirectory to prevent too many files in one folder
$prefix = substr($listing_key, 0, 2);
return $this->get_upload_dir() . '/' . $prefix . '/' . $listing_key;
}
/**
* Queue media for a property (does NOT download immediately)
*
* @param string $listing_key Listing key
* @param array $media_array Media array from API
* @param callable|null $progress_callback Callback for progress updates
*/
public function queue_property_media($listing_key, $media_array, $progress_callback = null) {
global $wpdb;
if (empty($media_array)) {
return;
}
$received_keys = array();
$queued_count = 0;
$skipped_count = 0;
foreach ($media_array as $media) {
$media_key = $media['MediaKey'] ?? null;
if (!$media_key) {
continue;
}
$received_keys[] = $media_key;
// Check if media record exists
$existing = $wpdb->get_row($wpdb->prepare(
"SELECT * FROM {$this->db->media_table()}
WHERE listing_key = %s AND media_key = %s",
$listing_key,
$media_key
));
$data = array(
'listing_key' => $listing_key,
'media_key' => $media_key,
'media_type' => $media['MediaType'] ?? 'Photo',
'media_order' => $media['Order'] ?? 0,
'media_url' => $media['MediaURL'] ?? null,
'image_width' => $media['ImageWidth'] ?? null,
'image_height' => $media['ImageHeight'] ?? null,
'media_modification_timestamp' => isset($media['MediaModificationTimestamp'])
? date('Y-m-d H:i:s', strtotime($media['MediaModificationTimestamp']))
: null,
'updated_at' => current_time('mysql'),
);
if ($existing) {
// Update existing record
$wpdb->update(
$this->db->media_table(),
$data,
array('id' => $existing->id)
);
// Check if we need to re-download (queue it)
if ($this->needs_download($existing, $media)) {
$this->mark_for_download($existing->id);
$queued_count++;
if ($progress_callback) {
call_user_func($progress_callback, 'media_queued', array('media_key' => $media_key));
}
} else {
$skipped_count++;
if ($progress_callback) {
call_user_func($progress_callback, 'media_skipped', array('media_key' => $media_key));
}
}
} else {
// Insert new record - queued for download
$data['created_at'] = current_time('mysql');
$data['queued_at'] = current_time('mysql');
$data['download_status'] = 'pending';
$wpdb->insert($this->db->media_table(), $data);
$queued_count++;
if ($progress_callback) {
call_user_func($progress_callback, 'media_queued', array('media_key' => $media_key));
}
}
}
// Delete media that no longer exists
if (!empty($received_keys)) {
$placeholders = implode(',', array_fill(0, count($received_keys), '%s'));
$values = array_merge(array($listing_key), $received_keys);
$orphaned = $wpdb->get_results($wpdb->prepare(
"SELECT id, local_path FROM {$this->db->media_table()}
WHERE listing_key = %s AND media_key NOT IN ({$placeholders})",
$values
));
foreach ($orphaned as $record) {
// Delete file if exists
if ($record->local_path) {
$file_path = $this->get_upload_dir() . '/' . $record->local_path;
if (file_exists($file_path)) {
unlink($file_path);
}
}
// Delete record
$wpdb->delete($this->db->media_table(), array('id' => $record->id));
}
}
return array(
'queued' => $queued_count,
'skipped' => $skipped_count,
);
}
/**
* Mark a media record for download
*
* @param int $media_id Media ID
*/
private function mark_for_download($media_id) {
global $wpdb;
$wpdb->update(
$this->db->media_table(),
array(
'download_status' => 'pending',
'queued_at' => current_time('mysql'),
'local_path' => null,
'local_url' => null,
'downloaded_at' => null,
'download_error' => null,
),
array('id' => $media_id)
);
}
/**
* Check if media needs to be downloaded
*
* @param object $existing Existing media record
* @param array $new_data New media data from API
* @return bool
*/
private function needs_download($existing, $new_data) {
// No local file
if (empty($existing->local_path)) {
return true;
}
// File doesn't exist
$file_path = $this->get_upload_dir() . '/' . $existing->local_path;
if (!file_exists($file_path)) {
return true;
}
// Media URL changed
if ($existing->media_url !== ($new_data['MediaURL'] ?? null)) {
return true;
}
return false;
}
/**
* Get the next media item to download from the queue
*
* @return object|null Media record or null if queue is empty
*/
public function get_next_queued() {
global $wpdb;
$now = current_time('mysql');
// Get next pending item that's not in retry backoff
return $wpdb->get_row($wpdb->prepare(
"SELECT * FROM {$this->db->media_table()}
WHERE download_status = 'pending'
AND media_url IS NOT NULL
AND download_attempts < %d
AND (retry_after IS NULL OR retry_after <= %s)
ORDER BY queued_at ASC
LIMIT 1",
self::MAX_ATTEMPTS,
$now
));
}
/**
* Get queue statistics
*
* @return array Queue stats
*/
public function get_queue_stats() {
global $wpdb;
$now = current_time('mysql');
return array(
'pending' => (int) $wpdb->get_var(
"SELECT COUNT(*) FROM {$this->db->media_table()}
WHERE download_status = 'pending'"
),
'ready' => (int) $wpdb->get_var($wpdb->prepare(
"SELECT COUNT(*) FROM {$this->db->media_table()}
WHERE download_status = 'pending'
AND media_url IS NOT NULL
AND download_attempts < %d
AND (retry_after IS NULL OR retry_after <= %s)",
self::MAX_ATTEMPTS,
$now
)),
'in_backoff' => (int) $wpdb->get_var($wpdb->prepare(
"SELECT COUNT(*) FROM {$this->db->media_table()}
WHERE download_status = 'pending'
AND retry_after > %s",
$now
)),
'failed' => (int) $wpdb->get_var($wpdb->prepare(
"SELECT COUNT(*) FROM {$this->db->media_table()}
WHERE download_status = 'failed'
OR download_attempts >= %d",
self::MAX_ATTEMPTS
)),
'completed' => (int) $wpdb->get_var(
"SELECT COUNT(*) FROM {$this->db->media_table()}
WHERE download_status = 'completed'"
),
);
}
/**
* Process media queue with rate limiting
*
* @param int $limit Max items to process
* @param callable|null $progress_callback Callback for progress updates
* @return array Processing stats
*/
public function process_queue($limit = 100, $progress_callback = null) {
$stats = array(
'processed' => 0,
'success' => 0,
'failed' => 0,
'skipped' => 0,
);
$last_download_time = 0;
for ($i = 0; $i < $limit; $i++) {
$media = $this->get_next_queued();
if (!$media) {
// Queue empty
break;
}
// Rate limiting: ensure minimum delay between downloads
$now_ms = microtime(true) * 1000;
$elapsed = $now_ms - $last_download_time;
if ($elapsed < self::DOWNLOAD_DELAY_MS && $last_download_time > 0) {
$wait_ms = (int) (self::DOWNLOAD_DELAY_MS - $elapsed);
usleep($wait_ms * 1000);
}
// Download the media
$result = $this->download_media($media->id);
$last_download_time = microtime(true) * 1000;
$stats['processed']++;
if ($result === true) {
$stats['success']++;
if ($progress_callback) {
call_user_func($progress_callback, 'media_downloaded', array(
'media_key' => $media->media_key,
'listing_key' => $media->listing_key,
));
}
} elseif ($result === 'backoff') {
$stats['skipped']++;
if ($progress_callback) {
call_user_func($progress_callback, 'media_backoff', array(
'media_key' => $media->media_key,
'listing_key' => $media->listing_key,
));
}
} else {
$stats['failed']++;
if ($progress_callback) {
call_user_func($progress_callback, 'media_error', array(
'media_key' => $media->media_key,
'listing_key' => $media->listing_key,
'error' => $result,
));
}
}
}
return $stats;
}
/**
* Download a media file
*
* @param int $media_id Media record ID
* @return bool|string True on success, 'backoff' if set for retry, error message on failure
*/
public function download_media($media_id) {
global $wpdb;
$media = $wpdb->get_row($wpdb->prepare(
"SELECT * FROM {$this->db->media_table()} WHERE id = %d",
$media_id
));
if (!$media || empty($media->media_url)) {
return 'No media URL';
}
// Increment attempt counter
$wpdb->update(
$this->db->media_table(),
array('download_attempts' => $media->download_attempts + 1),
array('id' => $media_id)
);
// Make the request
$start_time = microtime(true);
$response = wp_remote_get($media->media_url, array(
'timeout' => 60,
'stream' => false,
));
$response_time_ms = (int) ((microtime(true) - $start_time) * 1000);
$status_code = 0;
$error_msg = null;
if (is_wp_error($response)) {
$error_msg = $response->get_error_message();
$this->log_download($media, 'error', null, $response_time_ms, $error_msg);
$this->handle_download_failure($media_id, $error_msg, false);
return $error_msg;
}
$status_code = wp_remote_retrieve_response_code($response);
$this->log_download($media, 'attempt', $status_code, $response_time_ms, null);
// Success
if ($status_code === 200) {
$body = wp_remote_retrieve_body($response);
if (empty($body)) {
$error_msg = 'Empty response body';
$this->log_download($media, 'error', $status_code, $response_time_ms, $error_msg);
$this->handle_download_failure($media_id, $error_msg, false);
return $error_msg;
}
// Save the file
$save_result = $this->save_media_file($media, $body, $response);
if ($save_result !== true) {
$this->log_download($media, 'error', $status_code, $response_time_ms, $save_result);
$this->handle_download_failure($media_id, $save_result, false);
return $save_result;
}
$this->log_download($media, 'success', $status_code, $response_time_ms, null);
return true;
}
// Rate limited (429) or server error (5xx) - set backoff
$retryable = in_array($status_code, array(429, 500, 502, 503, 504));
$error_msg = "HTTP {$status_code}";
if ($retryable) {
$this->log_download($media, 'rate_limited', $status_code, $response_time_ms, $error_msg);
$this->handle_download_failure($media_id, $error_msg, true);
return 'backoff';
}
// Permanent failure (404, 403, etc.)
$this->log_download($media, 'permanent_error', $status_code, $response_time_ms, $error_msg);
$this->handle_download_failure($media_id, $error_msg, false);
return $error_msg;
}
/**
* Handle download failure
*
* @param int $media_id Media ID
* @param string $error Error message
* @param bool $set_backoff Whether to set retry backoff
*/
private function handle_download_failure($media_id, $error, $set_backoff) {
global $wpdb;
$media = $wpdb->get_row($wpdb->prepare(
"SELECT * FROM {$this->db->media_table()} WHERE id = %d",
$media_id
));
$update_data = array(
'download_error' => $error,
);
if ($set_backoff) {
// Set retry_after to 3 hours from now
$retry_after = date('Y-m-d H:i:s', strtotime('+' . self::RETRY_BACKOFF_HOURS . ' hours'));
$update_data['retry_after'] = $retry_after;
}
// Check if max attempts reached
if ($media && $media->download_attempts >= self::MAX_ATTEMPTS) {
$update_data['download_status'] = 'failed';
$this->log_missing_media($media, $error);
}
$wpdb->update(
$this->db->media_table(),
$update_data,
array('id' => $media_id)
);
}
/**
* Save downloaded media file to disk
*
* @param object $media Media record
* @param string $body File contents
* @param array $response HTTP response
* @return bool|string True on success, error message on failure
*/
private function save_media_file($media, $body, $response) {
global $wpdb;
// Determine file extension from content type or URL
$content_type = wp_remote_retrieve_header($response, 'content-type');
$extension = $this->get_extension_from_content_type($content_type, $media->media_url);
// Create directory
$listing_dir = $this->get_listing_dir($media->listing_key);
if (!file_exists($listing_dir)) {
wp_mkdir_p($listing_dir);
}
// Save file
$filename = $media->media_order . '.' . $extension;
$file_path = $listing_dir . '/' . $filename;
if (file_put_contents($file_path, $body) === false) {
return 'Failed to write file';
}
// Calculate relative path
$prefix = substr($media->listing_key, 0, 2);
$relative_path = $prefix . '/' . $media->listing_key . '/' . $filename;
$local_url = $this->get_upload_url() . '/' . $relative_path;
// Update record
$wpdb->update(
$this->db->media_table(),
array(
'local_path' => $relative_path,
'local_url' => $local_url,
'file_size' => strlen($body),
'mime_type' => $content_type,
'downloaded_at' => current_time('mysql'),
'download_error' => null,
'download_status' => 'completed',
'retry_after' => null,
),
array('id' => $media->id)
);
return true;
}
/**
* Log a download attempt to the media log table
*
* @param object $media Media record
* @param string $action Action type (attempt, success, error, rate_limited, permanent_error)
* @param int|null $status_code HTTP status code
* @param int $response_time_ms Response time in milliseconds
* @param string|null $error Error message
*/
private function log_download($media, $action, $status_code, $response_time_ms, $error) {
global $wpdb;
$wpdb->insert(
$this->db->media_log_table(),
array(
'media_id' => $media->id,
'listing_key' => $media->listing_key,
'media_key' => $media->media_key,
'action' => $action,
'status_code' => $status_code,
'response_time_ms' => $response_time_ms,
'error_message' => $error,
'url' => $media->media_url,
'created_at' => current_time('mysql'),
)
);
}
/**
* Get file extension from content type
*
* @param string $content_type Content type header
* @param string $url Original URL as fallback
* @return string File extension
*/
private function get_extension_from_content_type($content_type, $url) {
// Extract main type from content-type header
$content_type = strtolower(explode(';', $content_type)[0]);
$map = array(
'image/jpeg' => 'jpg',
'image/jpg' => 'jpg',
'image/png' => 'png',
'image/gif' => 'gif',
'image/webp' => 'webp',
);
if (isset($map[$content_type])) {
return $map[$content_type];
}
// Fallback to URL extension
$path = parse_url($url, PHP_URL_PATH);
$ext = pathinfo($path, PATHINFO_EXTENSION);
return $ext ?: 'jpg';
}
/**
* Get the path to the missing media log file
*
* @return string File path
*/
public function get_missing_log_path() {
$upload_dir = wp_upload_dir();
return $upload_dir['basedir'] . '/mls-missing-media.log';
}
/**
* Log a failed media download to the missing media log file
*
* @param object $media Media record
* @param string $error Error message
*/
private function log_missing_media($media, $error) {
$log_file = $this->get_missing_log_path();
$timestamp = date('Y-m-d H:i:s');
$line = sprintf(
"[%s] %s | %s | %s | %s\n",
$timestamp,
$media->listing_key,
$media->media_key,
$error,
$media->media_url
);
file_put_contents($log_file, $line, FILE_APPEND | LOCK_EX);
}
/**
* Clear the missing media log file
*/
public function clear_missing_log() {
$log_file = $this->get_missing_log_path();
if (file_exists($log_file)) {
unlink($log_file);
}
}
/**
* Get missing media count from log file
*
* @return int Number of missing media entries
*/
public function get_missing_count() {
$log_file = $this->get_missing_log_path();
if (!file_exists($log_file)) {
return 0;
}
$content = file_get_contents($log_file);
return substr_count($content, "\n");
}
/**
* Delete all media for a property
*
* @param string $listing_key Listing key
*/
public function delete_property_media($listing_key) {
global $wpdb;
// Delete files
$listing_dir = $this->get_listing_dir($listing_key);
if (file_exists($listing_dir)) {
$this->recursive_delete($listing_dir);
}
// Delete records
$wpdb->delete(
$this->db->media_table(),
array('listing_key' => $listing_key)
);
}
/**
* Recursively delete a directory
*
* @param string $dir Directory path
*/
private function recursive_delete($dir) {
if (!is_dir($dir)) {
return;
}
$files = array_diff(scandir($dir), array('.', '..'));
foreach ($files as $file) {
$path = $dir . '/' . $file;
if (is_dir($path)) {
$this->recursive_delete($path);
} else {
unlink($path);
}
}
rmdir($dir);
}
/**
* Get media for a listing
*
* @param string $listing_key Listing key
* @return array Media records
*/
public function get_listing_media($listing_key) {
global $wpdb;
return $wpdb->get_results($wpdb->prepare(
"SELECT * FROM {$this->db->media_table()}
WHERE listing_key = %s
ORDER BY media_order ASC",
$listing_key
));
}
/**
* Get primary image URL for a listing
*
* @param string $listing_key Listing key
* @return string|null Image URL
*/
public function get_primary_image($listing_key) {
global $wpdb;
$media = $wpdb->get_row($wpdb->prepare(
"SELECT local_url, media_url FROM {$this->db->media_table()}
WHERE listing_key = %s AND local_path IS NOT NULL
ORDER BY media_order ASC
LIMIT 1",
$listing_key
));
if ($media && $media->local_url) {
return $media->local_url;
}
return null;
}
/**
* Reset failed downloads for retry
*
* @param string|null $listing_key Optional listing key to filter
* @return int Number of records reset
*/
public function reset_failed_downloads($listing_key = null) {
global $wpdb;
$where = "download_status = 'failed' OR download_attempts >= " . self::MAX_ATTEMPTS;
$values = array();
if ($listing_key) {
$where .= " AND listing_key = %s";
$values[] = $listing_key;
}
if (!empty($values)) {
$sql = $wpdb->prepare(
"UPDATE {$this->db->media_table()}
SET download_status = 'pending',
download_attempts = 0,
download_error = NULL,
retry_after = NULL,
queued_at = %s
WHERE {$where}",
array_merge(array(current_time('mysql')), $values)
);
} else {
$sql = $wpdb->prepare(
"UPDATE {$this->db->media_table()}
SET download_status = 'pending',
download_attempts = 0,
download_error = NULL,
retry_after = NULL,
queued_at = %s
WHERE {$where}",
current_time('mysql')
);
}
$wpdb->query($sql);
return $wpdb->rows_affected;
}
/**
* Clean up orphaned media (files without database records)
*
* @return int Number of files deleted
*/
public function cleanup_orphaned_files() {
$deleted = 0;
$base_dir = $this->get_upload_dir();
if (!is_dir($base_dir)) {
return 0;
}
// Iterate through prefix directories
foreach (scandir($base_dir) as $prefix) {
if ($prefix === '.' || $prefix === '..' || !is_dir($base_dir . '/' . $prefix)) {
continue;
}
$prefix_dir = $base_dir . '/' . $prefix;
// Iterate through listing directories
foreach (scandir($prefix_dir) as $listing_key) {
if ($listing_key === '.' || $listing_key === '..') {
continue;
}
$listing_dir = $prefix_dir . '/' . $listing_key;
if (!is_dir($listing_dir)) {
continue;
}
// Check if listing exists in database
global $wpdb;
$exists = $wpdb->get_var($wpdb->prepare(
"SELECT COUNT(*) FROM {$this->db->properties_table()} WHERE listing_key = %s",
$listing_key
));
if (!$exists) {
$this->recursive_delete($listing_dir);
$deleted++;
}
}
}
return $deleted;
}
/**
* Get recent download logs
*
* @param int $limit Number of entries to return
* @param string|null $action Optional action filter
* @return array Log entries
*/
public function get_download_logs($limit = 100, $action = null) {
global $wpdb;
$where = '';
$values = array();
if ($action) {
$where = "WHERE action = %s";
$values[] = $action;
}
$values[] = $limit;
return $wpdb->get_results($wpdb->prepare(
"SELECT * FROM {$this->db->media_log_table()}
{$where}
ORDER BY created_at DESC
LIMIT %d",
$values
));
}
/**
* Clear old download logs
*
* @param int $days_old Delete logs older than this many days
* @return int Number of entries deleted
*/
public function clear_old_logs($days_old = 7) {
global $wpdb;
$cutoff = date('Y-m-d H:i:s', strtotime("-{$days_old} days"));
$wpdb->query($wpdb->prepare(
"DELETE FROM {$this->db->media_log_table()} WHERE created_at < %s",
$cutoff
));
return $wpdb->rows_affected;
}
/**
* Legacy sync method - now queues media instead of downloading immediately
* Kept for backward compatibility
*
* @param string $listing_key Listing key
* @param array $media_array Media array from API
* @param bool $force Force re-download all media
* @param callable|null $progress_callback Callback for progress updates
*/
public function sync_property_media($listing_key, $media_array, $force = false, $progress_callback = null) {
// Now just queues media - actual download happens via process_queue()
return $this->queue_property_media($listing_key, $media_array, $progress_callback);
}
/**
* Legacy download_pending method - now uses process_queue
* Kept for backward compatibility
*
* @param int $limit Max media to download
* @param callable|null $progress_callback Callback for progress updates
* @return array Stats
*/
public function download_pending($limit = 100, $progress_callback = null) {
return $this->process_queue($limit, $progress_callback);
}
}