From 6eadf3d266e83bef172b4fb802588ce06ca2e910 Mon Sep 17 00:00:00 2001 From: "Hanson.xyz Dev" Date: Sun, 14 Dec 2025 22:52:58 -0600 Subject: [PATCH] Add queue-based media download system with rate limiting - Add download_status, retry_after, queued_at columns to mls_media table - Add mls_media_log table for download attempt tracking - Rewrite media handler to queue downloads instead of immediate download - Add 700ms delay between downloads (25% buffer over 2/sec limit) - Add 3-hour backoff for rate-limited (429) responses - Add max 5 attempts before marking as permanently failed - Add wp mls media command: status, process, reset, logs - Deprecate wp mls sync media in favor of wp mls media process - Update documentation with queue system details and cron examples Media downloads are now separate from property sync: 1. wp mls sync full/incremental - syncs properties, queues media 2. wp mls media process - downloads queued media with rate limiting Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- .../mls-by-hansonxyz/cli/class-mls-cli.php | 302 ++++++- .../plugins/mls-by-hansonxyz/docs/CLAUDE.md | 80 +- .../includes/class-mls-db.php | 39 +- .../includes/class-mls-media-handler.php | 840 ++++++++++++------ .../mls-by-hansonxyz/mls-by-hansonxyz.php | 3 +- 5 files changed, 930 insertions(+), 334 deletions(-) diff --git a/wp-content/plugins/mls-by-hansonxyz/cli/class-mls-cli.php b/wp-content/plugins/mls-by-hansonxyz/cli/class-mls-cli.php index 76bd75ea..476c0ba9 100644 --- a/wp-content/plugins/mls-by-hansonxyz/cli/class-mls-cli.php +++ b/wp-content/plugins/mls-by-hansonxyz/cli/class-mls-cli.php @@ -37,6 +37,7 @@ class MLS_CLI { WP_CLI::add_command('mls stats', array($instance, 'stats')); WP_CLI::add_command('mls cache', array($instance, 'cache')); WP_CLI::add_command('mls recovery', array($instance, 'recovery')); + WP_CLI::add_command('mls media', array($instance, 'media')); } /** @@ -337,40 +338,14 @@ class MLS_CLI { break; case 'media': - WP_CLI::line('Downloading pending media...'); - if (!$quiet) { - WP_CLI::line('Legend: P=downloaded p=skipped E=error'); - echo "\n"; - } - - $media_handler = $this->plugin->get_media_handler(); - $result = $media_handler->download_pending($limit ?: 100, $progress_callback); - - if (!$quiet) { - echo "\n"; - } - WP_CLI::line(sprintf( - 'Media download complete: %d success, %d failed out of %d total', - $result['success'], - $result['failed'], - $result['total'] + // Redirect to the new media command + WP_CLI::line('Note: "wp mls sync media" is deprecated. Use "wp mls media process" instead.'); + WP_CLI::line(''); + $this->media(array('process'), array( + 'limit' => $limit ?: 100, + 'verbose' => $verbose, + 'quiet' => $quiet, )); - - if ($result['failed'] === 0 && $result['total'] > 0) { - WP_CLI::success('All media downloaded successfully!'); - } elseif ($result['total'] === 0) { - WP_CLI::success('No pending media to download.'); - } else { - WP_CLI::warning('Some media failed to download.'); - $missing_count = $media_handler->get_missing_count(); - if ($missing_count > 0) { - WP_CLI::line(sprintf( - 'Missing media log: %s (%d entries)', - $media_handler->get_missing_log_path(), - $missing_count - )); - } - } break; case 'resume': @@ -823,6 +798,267 @@ class MLS_CLI { } } + /** + * Manage media download queue. + * + * ## OPTIONS + * + * + * : Action: queue, process, status, reset, logs + * + * [--limit=] + * : Limit number of items to process + * + * [--verbose] + * : Show detailed output + * + * [--quiet] + * : Suppress progress output + * + * [--days=] + * : Days of logs to keep (for logs --clear) + * + * [--clear] + * : Clear logs older than --days + * + * ## EXAMPLES + * + * wp mls media status # Show queue statistics + * wp mls media process # Process pending downloads (rate limited) + * wp mls media process --limit=50 # Process up to 50 items + * wp mls media reset # Reset failed downloads for retry + * wp mls media logs # Show recent download logs + * wp mls media logs --clear --days=7 # Clear logs older than 7 days + * + * @subcommand media + */ + public function media($args, $assoc_args) { + $action = isset($args[0]) ? $args[0] : 'status'; + $limit = isset($assoc_args['limit']) ? (int) $assoc_args['limit'] : 100; + $verbose = isset($assoc_args['verbose']); + $quiet = isset($assoc_args['quiet']); + + $media_handler = $this->plugin->get_media_handler(); + + switch ($action) { + case 'status': + case 'queue': + $stats = $media_handler->get_queue_stats(); + + WP_CLI::line(''); + WP_CLI::line('=== Media Download Queue ==='); + WP_CLI::line(''); + WP_CLI::line(sprintf('Pending total: %d', $stats['pending'])); + WP_CLI::line(sprintf('Ready now: %d', $stats['ready'])); + WP_CLI::line(sprintf('In backoff: %d (retry scheduled)', $stats['in_backoff'])); + WP_CLI::line(sprintf('Failed: %d (max attempts reached)', $stats['failed'])); + WP_CLI::line(sprintf('Completed: %d', $stats['completed'])); + WP_CLI::line(''); + + if ($stats['ready'] > 0) { + WP_CLI::line(sprintf( + 'Run "wp mls media process --limit=%d" to download pending media.', + min($stats['ready'], 100) + )); + WP_CLI::line(sprintf( + 'Estimated time: %d minutes (at 700ms per image)', + ceil($stats['ready'] * 0.7 / 60) + )); + } + + if ($stats['failed'] > 0) { + WP_CLI::line(''); + WP_CLI::line('Run "wp mls media reset" to retry failed downloads.'); + } + WP_CLI::line(''); + break; + + case 'process': + $stats = $media_handler->get_queue_stats(); + + if ($stats['ready'] === 0) { + WP_CLI::success('No media ready to download.'); + break; + } + + $process_count = min($limit, $stats['ready']); + + WP_CLI::line(sprintf( + 'Processing %d media items (rate limited: 1 per 700ms)...', + $process_count + )); + WP_CLI::line(sprintf( + 'Estimated time: %d minutes', + ceil($process_count * 0.7 / 60) + )); + + if (!$quiet) { + WP_CLI::line('Legend: P=downloaded B=backoff (retry later) E=error'); + echo "\n"; + } + + // Progress callback + $progress_callback = null; + if (!$quiet) { + $progress_callback = function($event, $data = array()) use ($verbose) { + if ($verbose) { + $this->output_verbose_media_event($event, $data); + } else { + switch ($event) { + case 'media_downloaded': + echo 'P'; + break; + case 'media_backoff': + echo 'B'; + break; + case 'media_error': + echo 'E'; + break; + } + } + }; + } + + $result = $media_handler->process_queue($process_count, $progress_callback); + + if (!$quiet) { + echo "\n\n"; + } + + WP_CLI::line(sprintf( + 'Results: %d success, %d backoff, %d failed out of %d processed', + $result['success'], + $result['skipped'], + $result['failed'], + $result['processed'] + )); + + // Show updated stats + $new_stats = $media_handler->get_queue_stats(); + WP_CLI::line(sprintf('Queue remaining: %d ready, %d in backoff', $new_stats['ready'], $new_stats['in_backoff'])); + + if ($result['failed'] > 0 || $result['skipped'] > 0) { + WP_CLI::line(''); + WP_CLI::line('Items in backoff will be retried after 3 hours.'); + WP_CLI::line('Run "wp mls media logs" to see download history.'); + } + + if ($result['success'] > 0) { + WP_CLI::success('Media processing complete.'); + } + break; + + case 'reset': + WP_CLI::line('Resetting failed downloads for retry...'); + + $reset_count = $media_handler->reset_failed_downloads(); + + if ($reset_count > 0) { + WP_CLI::success(sprintf('Reset %d failed downloads. They will be retried on next process.', $reset_count)); + } else { + WP_CLI::success('No failed downloads to reset.'); + } + break; + + case 'logs': + if (isset($assoc_args['clear'])) { + $days = isset($assoc_args['days']) ? (int) $assoc_args['days'] : 7; + $deleted = $media_handler->clear_old_logs($days); + WP_CLI::success(sprintf('Deleted %d log entries older than %d days.', $deleted, $days)); + break; + } + + $logs = $media_handler->get_download_logs($limit); + + if (empty($logs)) { + WP_CLI::success('No download logs found.'); + break; + } + + WP_CLI::line(''); + WP_CLI::line('=== Recent Download Logs ==='); + WP_CLI::line(''); + + foreach ($logs as $log) { + $status_indicator = ''; + switch ($log->action) { + case 'success': + $status_indicator = '[OK]'; + break; + case 'rate_limited': + $status_indicator = '[429]'; + break; + case 'permanent_error': + $status_indicator = '[ERR]'; + break; + case 'error': + $status_indicator = '[FAIL]'; + break; + default: + $status_indicator = "[{$log->action}]"; + } + + $line = sprintf( + '%s %s %s %s %dms', + $log->created_at, + $status_indicator, + $log->listing_key, + $log->media_key, + $log->response_time_ms + ); + + if ($log->status_code) { + $line .= " HTTP:{$log->status_code}"; + } + + if ($log->error_message) { + $line .= " - {$log->error_message}"; + } + + WP_CLI::line($line); + } + + WP_CLI::line(''); + WP_CLI::line(sprintf('Showing %d most recent entries. Use --limit=N to see more.', count($logs))); + WP_CLI::line(''); + break; + + default: + WP_CLI::error("Unknown action: {$action}. Use 'status', 'process', 'reset', or 'logs'."); + } + } + + /** + * Output verbose media event information + * + * @param string $event Event name + * @param array $data Event data + */ + private function output_verbose_media_event($event, $data) { + $timestamp = date('H:i:s'); + + switch ($event) { + case 'media_downloaded': + $listing = $data['listing_key'] ?? 'unknown'; + $key = $data['media_key'] ?? 'unknown'; + WP_CLI::line("[{$timestamp}] DOWNLOADED: {$listing} / {$key}"); + break; + + case 'media_backoff': + $listing = $data['listing_key'] ?? 'unknown'; + $key = $data['media_key'] ?? 'unknown'; + WP_CLI::warning("[{$timestamp}] BACKOFF: {$listing} / {$key} - will retry in 3 hours"); + break; + + case 'media_error': + $listing = $data['listing_key'] ?? 'unknown'; + $key = $data['media_key'] ?? 'unknown'; + $error = $data['error'] ?? 'Unknown error'; + WP_CLI::error("[{$timestamp}] ERROR: {$listing} / {$key} - {$error}", false); + break; + } + } + /** * Recursively delete a directory */ diff --git a/wp-content/plugins/mls-by-hansonxyz/docs/CLAUDE.md b/wp-content/plugins/mls-by-hansonxyz/docs/CLAUDE.md index 04619c72..7b919d7b 100644 --- a/wp-content/plugins/mls-by-hansonxyz/docs/CLAUDE.md +++ b/wp-content/plugins/mls-by-hansonxyz/docs/CLAUDE.md @@ -18,7 +18,8 @@ All tables use `{$wpdb->prefix}mls_` prefix: | Table | Purpose | |-------|---------| | `mls_properties` | Listing data | -| `mls_media` | Media files | +| `mls_media` | Media files with download queue | +| `mls_media_log` | Media download attempt history | | `mls_sync_state` | Sync progress tracking | | `mls_rate_limits` | API usage tracking | | `mls_sync_log` | Debug logging | @@ -34,18 +35,20 @@ define('MLSGRID_ACCESS_TOKEN', 'your-token-here'); ### MLS Grid API Rate Limits MUST comply with these limits: -- 2 requests/second +- 2 requests/second (500ms minimum between requests) - 7,200 requests/hour - 40,000 requests/day - 4GB data/hour +Media downloads use 700ms delay (25% buffer) between requests. + ### Key Files | File | Purpose | |------|---------| | `includes/class-mls-api-client.php` | API communication, auth, gzip | | `includes/class-mls-sync-engine.php` | Sync orchestration | -| `includes/class-mls-media-handler.php` | Media download/storage | +| `includes/class-mls-media-handler.php` | Media queue and download | | `includes/class-mls-query.php` | Public query API | | `includes/class-mls-rate-limiter.php` | Rate limit compliance | | `cli/class-mls-cli.php` | WP-CLI commands | @@ -61,12 +64,19 @@ wp mls test auth wp mls status wp mls status rate-limits -# Run sync (use --verbose for detailed output) +# Run property sync (queues media, does not download) wp mls sync full [--dry-run] [--limit=N] [--verbose] wp mls sync incremental [--dry-run] [--verbose] -wp mls sync media [--limit=N] [--verbose] wp mls sync resume --id= +# Media download queue (separate from property sync) +wp mls media status # Show queue stats +wp mls media process # Download queued media (rate limited) +wp mls media process --limit=50 --verbose +wp mls media reset # Reset failed downloads for retry +wp mls media logs # View download history +wp mls media logs --clear --days=7 + # Statistics wp mls stats @@ -83,31 +93,54 @@ wp mls recovery auto # Auto-resume most recent failed sync wp mls recovery cleanup # Mark stale (>1hr) syncs as failed ``` +### Media Queue System + +Media downloads are now queue-based and separate from property sync: + +1. **Property sync** (`wp mls sync full/incremental`) queues media records +2. **Media process** (`wp mls media process`) downloads queued media with rate limiting +3. Downloads are rate-limited to 700ms between requests (under 2/sec limit) +4. Failed downloads get 3-hour backoff before retry +5. After 5 attempts, items are marked failed and logged + +**Queue states:** +- `pending` - Ready for download +- `completed` - Successfully downloaded +- `failed` - Max attempts reached + +**Media table columns:** +- `download_status` - pending/completed/failed +- `retry_after` - Next retry time (3hr backoff on rate limit) +- `queued_at` - When item was queued +- `download_attempts` - Attempt count (max 5) + ### Progress Output -Without --verbose (compact mode): +Property sync (compact mode): - `.` = new property created - `#` = property updated - `x` = property deleted - `-` = skipped (dry-run) -- `P` = photo downloaded -- `p` = photo skipped (already exists) -- `E` = photo error +- `q` = media queued +- `p` = media skipped (already downloaded) - `|` = page complete -With --verbose: Full timestamped output showing API requests, responses, and individual item status. +Media process (compact mode): +- `P` = downloaded +- `B` = backoff (retry later) +- `E` = error + +With --verbose: Full timestamped output. ### Missing Media Log -Failed media downloads are logged to: `wp-content/uploads/mls-missing-media.log` +Permanently failed media downloads logged to: `wp-content/uploads/mls-missing-media.log` Format: `[timestamp] listing_key | media_key | error | url` -Media downloads use exponential backoff (1s, 2s, 4s, 8s, 16s) for rate limit (429) and server errors (5xx). - ### Sync Recovery -The sync engine saves progress after each page, allowing interrupted syncs to resume: +The sync engine saves progress after each page: 1. **Automatic state tracking**: `last_next_link` saved after each API page 2. **Stale sync detection**: Syncs running >1 hour marked as failed @@ -116,9 +149,17 @@ The sync engine saves progress after each page, allowing interrupted syncs to re - `wp mls recovery auto` - Auto-resume most recent failed sync - `wp mls recovery list` - View all resumable syncs -For cron jobs, consider adding recovery at the start: +### Recommended Cron Setup + ```bash -wp mls recovery auto --quiet && wp mls sync incremental +# Property sync every 30 minutes +*/30 * * * * cd /var/www/html && wp mls recovery auto --quiet && wp mls sync incremental --allow-root >> /var/log/mls-sync.log 2>&1 + +# Media downloads every 5 minutes (processes up to 50 items per run) +*/5 * * * * cd /var/www/html && wp mls media process --limit=50 --quiet --allow-root >> /var/log/mls-media.log 2>&1 + +# Full sync weekly (Sunday 3am) +0 3 * * 0 cd /var/www/html && wp mls sync full --allow-root >> /var/log/mls-sync.log 2>&1 ``` ### Public API Functions @@ -150,10 +191,10 @@ if (mls_is_available()) { ... } ### Sync Strategy -1. **Initial Import**: Full sync downloads all viewable properties -2. **Incremental**: Uses ModificationTimestamp to fetch only changes +1. **Property Sync**: Full/incremental sync downloads property data and queues media +2. **Media Queue**: Separate process downloads media with rate limiting 3. **Delete Handling**: MlgCanView=false triggers local deletion -4. **Media**: Downloads to wp-content/uploads/mls-listings/ +4. **Media Storage**: Downloads to wp-content/uploads/mls-listings/ 5. **Recovery**: Stores last_next_link for resume on failure ### Testing After Changes @@ -162,6 +203,7 @@ if (mls_is_available()) { ... } wp mls test connection wp mls test auth wp mls sync full --dry-run --limit=10 +wp mls media status wp mls stats ``` diff --git a/wp-content/plugins/mls-by-hansonxyz/includes/class-mls-db.php b/wp-content/plugins/mls-by-hansonxyz/includes/class-mls-db.php index c8255364..5be9e640 100644 --- a/wp-content/plugins/mls-by-hansonxyz/includes/class-mls-db.php +++ b/wp-content/plugins/mls-by-hansonxyz/includes/class-mls-db.php @@ -55,6 +55,13 @@ class MLS_DB { return $this->get_table_name(MLS_TABLE_SYNC_LOG); } + /** + * Get media log table name + */ + public function media_log_table() { + return $this->get_table_name(MLS_TABLE_MEDIA_LOG); + } + /** * Create all database tables */ @@ -163,6 +170,9 @@ class MLS_DB { downloaded_at DATETIME DEFAULT NULL, download_attempts INT(3) DEFAULT 0, download_error TEXT DEFAULT NULL, + retry_after DATETIME DEFAULT NULL, + queued_at DATETIME DEFAULT NULL, + download_status VARCHAR(20) DEFAULT 'pending', created_at DATETIME DEFAULT CURRENT_TIMESTAMP, updated_at DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, @@ -170,7 +180,10 @@ class MLS_DB { PRIMARY KEY (id), UNIQUE KEY listing_media (listing_key, media_key), KEY listing_key (listing_key), - KEY media_order (media_order) + KEY media_order (media_order), + KEY download_status (download_status), + KEY retry_after (retry_after), + KEY queued_at (queued_at) ) {$charset_collate};"; dbDelta($sql_media); @@ -239,6 +252,29 @@ class MLS_DB { ) {$charset_collate};"; dbDelta($sql_sync_log); + + // Media download log table + $table_media_log = $wpdb->prefix . MLS_TABLE_MEDIA_LOG; + $sql_media_log = "CREATE TABLE {$table_media_log} ( + id BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT, + media_id BIGINT(20) UNSIGNED NOT NULL, + listing_key VARCHAR(50) NOT NULL, + media_key VARCHAR(100) NOT NULL, + action VARCHAR(30) NOT NULL, + status_code INT(5) DEFAULT NULL, + response_time_ms INT(11) DEFAULT NULL, + error_message TEXT DEFAULT NULL, + url VARCHAR(1000) DEFAULT NULL, + created_at DATETIME DEFAULT CURRENT_TIMESTAMP, + + PRIMARY KEY (id), + KEY media_id (media_id), + KEY listing_key (listing_key), + KEY action (action), + KEY created_at (created_at) + ) {$charset_collate};"; + + dbDelta($sql_media_log); } /** @@ -253,6 +289,7 @@ class MLS_DB { MLS_TABLE_SYNC_STATE, MLS_TABLE_RATE_LIMITS, MLS_TABLE_SYNC_LOG, + MLS_TABLE_MEDIA_LOG, ); foreach ($tables as $table) { diff --git a/wp-content/plugins/mls-by-hansonxyz/includes/class-mls-media-handler.php b/wp-content/plugins/mls-by-hansonxyz/includes/class-mls-media-handler.php index 4512a945..3ef93ed2 100644 --- a/wp-content/plugins/mls-by-hansonxyz/includes/class-mls-media-handler.php +++ b/wp-content/plugins/mls-by-hansonxyz/includes/class-mls-media-handler.php @@ -3,6 +3,10 @@ * MLS Media Handler * * Handles downloading and managing media files from MLS listings + * Uses a queue-based system with rate limiting to comply with API limits + * + * Rate limits: 2 requests/second (500ms minimum between requests) + * We use 700ms between requests (25% buffer) */ if (!defined('ABSPATH')) { @@ -16,6 +20,21 @@ class MLS_Media_Handler { */ const UPLOAD_SUBDIR = 'mls-listings'; + /** + * Minimum delay between media downloads in milliseconds (700ms = 25% buffer over 500ms limit) + */ + const DOWNLOAD_DELAY_MS = 700; + + /** + * Retry backoff time in hours for failed downloads + */ + const RETRY_BACKOFF_HOURS = 3; + + /** + * Maximum download attempts before permanent failure + */ + const MAX_ATTEMPTS = 5; + /** * Database instance */ @@ -67,14 +86,13 @@ class MLS_Media_Handler { } /** - * Sync media for a property + * Queue media for a property (does NOT download immediately) * * @param string $listing_key Listing key * @param array $media_array Media array from API - * @param bool $force Force re-download all media * @param callable|null $progress_callback Callback for progress updates */ - public function sync_property_media($listing_key, $media_array, $force = false, $progress_callback = null) { + public function queue_property_media($listing_key, $media_array, $progress_callback = null) { global $wpdb; if (empty($media_array)) { @@ -82,6 +100,8 @@ class MLS_Media_Handler { } $received_keys = array(); + $queued_count = 0; + $skipped_count = 0; foreach ($media_array as $media) { $media_key = $media['MediaKey'] ?? null; @@ -121,37 +141,28 @@ class MLS_Media_Handler { array('id' => $existing->id) ); - // Check if we need to re-download - if ($force || $this->needs_download($existing, $media)) { - $result = $this->download_media($existing->id); + // Check if we need to re-download (queue it) + if ($this->needs_download($existing, $media)) { + $this->mark_for_download($existing->id); + $queued_count++; if ($progress_callback) { - if ($result) { - call_user_func($progress_callback, 'media_downloaded', array('media_key' => $media_key)); - } else { - $error = $this->get_last_download_error($existing->id); - call_user_func($progress_callback, 'media_error', array('media_key' => $media_key, 'error' => $error)); - } + call_user_func($progress_callback, 'media_queued', array('media_key' => $media_key)); } } else { + $skipped_count++; if ($progress_callback) { call_user_func($progress_callback, 'media_skipped', array('media_key' => $media_key)); } } } else { - // Insert new record + // Insert new record - queued for download $data['created_at'] = current_time('mysql'); + $data['queued_at'] = current_time('mysql'); + $data['download_status'] = 'pending'; $wpdb->insert($this->db->media_table(), $data); - $new_id = $wpdb->insert_id; - - // Queue download - $result = $this->download_media($new_id); + $queued_count++; if ($progress_callback) { - if ($result) { - call_user_func($progress_callback, 'media_downloaded', array('media_key' => $media_key)); - } else { - $error = $this->get_last_download_error($new_id); - call_user_func($progress_callback, 'media_error', array('media_key' => $media_key, 'error' => $error)); - } + call_user_func($progress_callback, 'media_queued', array('media_key' => $media_key)); } } } @@ -180,20 +191,432 @@ class MLS_Media_Handler { $wpdb->delete($this->db->media_table(), array('id' => $record->id)); } } + + return array( + 'queued' => $queued_count, + 'skipped' => $skipped_count, + ); } /** - * Get the last download error for a media record + * Mark a media record for download * * @param int $media_id Media ID - * @return string|null Error message */ - private function get_last_download_error($media_id) { + private function mark_for_download($media_id) { global $wpdb; - return $wpdb->get_var($wpdb->prepare( - "SELECT download_error FROM {$this->db->media_table()} WHERE id = %d", + + $wpdb->update( + $this->db->media_table(), + array( + 'download_status' => 'pending', + 'queued_at' => current_time('mysql'), + 'local_path' => null, + 'local_url' => null, + 'downloaded_at' => null, + 'download_error' => null, + ), + array('id' => $media_id) + ); + } + + /** + * Check if media needs to be downloaded + * + * @param object $existing Existing media record + * @param array $new_data New media data from API + * @return bool + */ + private function needs_download($existing, $new_data) { + // No local file + if (empty($existing->local_path)) { + return true; + } + + // File doesn't exist + $file_path = $this->get_upload_dir() . '/' . $existing->local_path; + if (!file_exists($file_path)) { + return true; + } + + // Media URL changed + if ($existing->media_url !== ($new_data['MediaURL'] ?? null)) { + return true; + } + + return false; + } + + /** + * Get the next media item to download from the queue + * + * @return object|null Media record or null if queue is empty + */ + public function get_next_queued() { + global $wpdb; + + $now = current_time('mysql'); + + // Get next pending item that's not in retry backoff + return $wpdb->get_row($wpdb->prepare( + "SELECT * FROM {$this->db->media_table()} + WHERE download_status = 'pending' + AND media_url IS NOT NULL + AND download_attempts < %d + AND (retry_after IS NULL OR retry_after <= %s) + ORDER BY queued_at ASC + LIMIT 1", + self::MAX_ATTEMPTS, + $now + )); + } + + /** + * Get queue statistics + * + * @return array Queue stats + */ + public function get_queue_stats() { + global $wpdb; + + $now = current_time('mysql'); + + return array( + 'pending' => (int) $wpdb->get_var( + "SELECT COUNT(*) FROM {$this->db->media_table()} + WHERE download_status = 'pending'" + ), + 'ready' => (int) $wpdb->get_var($wpdb->prepare( + "SELECT COUNT(*) FROM {$this->db->media_table()} + WHERE download_status = 'pending' + AND media_url IS NOT NULL + AND download_attempts < %d + AND (retry_after IS NULL OR retry_after <= %s)", + self::MAX_ATTEMPTS, + $now + )), + 'in_backoff' => (int) $wpdb->get_var($wpdb->prepare( + "SELECT COUNT(*) FROM {$this->db->media_table()} + WHERE download_status = 'pending' + AND retry_after > %s", + $now + )), + 'failed' => (int) $wpdb->get_var($wpdb->prepare( + "SELECT COUNT(*) FROM {$this->db->media_table()} + WHERE download_status = 'failed' + OR download_attempts >= %d", + self::MAX_ATTEMPTS + )), + 'completed' => (int) $wpdb->get_var( + "SELECT COUNT(*) FROM {$this->db->media_table()} + WHERE download_status = 'completed'" + ), + ); + } + + /** + * Process media queue with rate limiting + * + * @param int $limit Max items to process + * @param callable|null $progress_callback Callback for progress updates + * @return array Processing stats + */ + public function process_queue($limit = 100, $progress_callback = null) { + $stats = array( + 'processed' => 0, + 'success' => 0, + 'failed' => 0, + 'skipped' => 0, + ); + + $last_download_time = 0; + + for ($i = 0; $i < $limit; $i++) { + $media = $this->get_next_queued(); + + if (!$media) { + // Queue empty + break; + } + + // Rate limiting: ensure minimum delay between downloads + $now_ms = microtime(true) * 1000; + $elapsed = $now_ms - $last_download_time; + + if ($elapsed < self::DOWNLOAD_DELAY_MS && $last_download_time > 0) { + $wait_ms = (int) (self::DOWNLOAD_DELAY_MS - $elapsed); + usleep($wait_ms * 1000); + } + + // Download the media + $result = $this->download_media($media->id); + $last_download_time = microtime(true) * 1000; + + $stats['processed']++; + + if ($result === true) { + $stats['success']++; + if ($progress_callback) { + call_user_func($progress_callback, 'media_downloaded', array( + 'media_key' => $media->media_key, + 'listing_key' => $media->listing_key, + )); + } + } elseif ($result === 'backoff') { + $stats['skipped']++; + if ($progress_callback) { + call_user_func($progress_callback, 'media_backoff', array( + 'media_key' => $media->media_key, + 'listing_key' => $media->listing_key, + )); + } + } else { + $stats['failed']++; + if ($progress_callback) { + call_user_func($progress_callback, 'media_error', array( + 'media_key' => $media->media_key, + 'listing_key' => $media->listing_key, + 'error' => $result, + )); + } + } + } + + return $stats; + } + + /** + * Download a media file + * + * @param int $media_id Media record ID + * @return bool|string True on success, 'backoff' if set for retry, error message on failure + */ + public function download_media($media_id) { + global $wpdb; + + $media = $wpdb->get_row($wpdb->prepare( + "SELECT * FROM {$this->db->media_table()} WHERE id = %d", $media_id )); + + if (!$media || empty($media->media_url)) { + return 'No media URL'; + } + + // Increment attempt counter + $wpdb->update( + $this->db->media_table(), + array('download_attempts' => $media->download_attempts + 1), + array('id' => $media_id) + ); + + // Make the request + $start_time = microtime(true); + + $response = wp_remote_get($media->media_url, array( + 'timeout' => 60, + 'stream' => false, + )); + + $response_time_ms = (int) ((microtime(true) - $start_time) * 1000); + $status_code = 0; + $error_msg = null; + + if (is_wp_error($response)) { + $error_msg = $response->get_error_message(); + $this->log_download($media, 'error', null, $response_time_ms, $error_msg); + $this->handle_download_failure($media_id, $error_msg, false); + return $error_msg; + } + + $status_code = wp_remote_retrieve_response_code($response); + $this->log_download($media, 'attempt', $status_code, $response_time_ms, null); + + // Success + if ($status_code === 200) { + $body = wp_remote_retrieve_body($response); + if (empty($body)) { + $error_msg = 'Empty response body'; + $this->log_download($media, 'error', $status_code, $response_time_ms, $error_msg); + $this->handle_download_failure($media_id, $error_msg, false); + return $error_msg; + } + + // Save the file + $save_result = $this->save_media_file($media, $body, $response); + if ($save_result !== true) { + $this->log_download($media, 'error', $status_code, $response_time_ms, $save_result); + $this->handle_download_failure($media_id, $save_result, false); + return $save_result; + } + + $this->log_download($media, 'success', $status_code, $response_time_ms, null); + return true; + } + + // Rate limited (429) or server error (5xx) - set backoff + $retryable = in_array($status_code, array(429, 500, 502, 503, 504)); + $error_msg = "HTTP {$status_code}"; + + if ($retryable) { + $this->log_download($media, 'rate_limited', $status_code, $response_time_ms, $error_msg); + $this->handle_download_failure($media_id, $error_msg, true); + return 'backoff'; + } + + // Permanent failure (404, 403, etc.) + $this->log_download($media, 'permanent_error', $status_code, $response_time_ms, $error_msg); + $this->handle_download_failure($media_id, $error_msg, false); + return $error_msg; + } + + /** + * Handle download failure + * + * @param int $media_id Media ID + * @param string $error Error message + * @param bool $set_backoff Whether to set retry backoff + */ + private function handle_download_failure($media_id, $error, $set_backoff) { + global $wpdb; + + $media = $wpdb->get_row($wpdb->prepare( + "SELECT * FROM {$this->db->media_table()} WHERE id = %d", + $media_id + )); + + $update_data = array( + 'download_error' => $error, + ); + + if ($set_backoff) { + // Set retry_after to 3 hours from now + $retry_after = date('Y-m-d H:i:s', strtotime('+' . self::RETRY_BACKOFF_HOURS . ' hours')); + $update_data['retry_after'] = $retry_after; + } + + // Check if max attempts reached + if ($media && $media->download_attempts >= self::MAX_ATTEMPTS) { + $update_data['download_status'] = 'failed'; + $this->log_missing_media($media, $error); + } + + $wpdb->update( + $this->db->media_table(), + $update_data, + array('id' => $media_id) + ); + } + + /** + * Save downloaded media file to disk + * + * @param object $media Media record + * @param string $body File contents + * @param array $response HTTP response + * @return bool|string True on success, error message on failure + */ + private function save_media_file($media, $body, $response) { + global $wpdb; + + // Determine file extension from content type or URL + $content_type = wp_remote_retrieve_header($response, 'content-type'); + $extension = $this->get_extension_from_content_type($content_type, $media->media_url); + + // Create directory + $listing_dir = $this->get_listing_dir($media->listing_key); + if (!file_exists($listing_dir)) { + wp_mkdir_p($listing_dir); + } + + // Save file + $filename = $media->media_order . '.' . $extension; + $file_path = $listing_dir . '/' . $filename; + + if (file_put_contents($file_path, $body) === false) { + return 'Failed to write file'; + } + + // Calculate relative path + $prefix = substr($media->listing_key, 0, 2); + $relative_path = $prefix . '/' . $media->listing_key . '/' . $filename; + $local_url = $this->get_upload_url() . '/' . $relative_path; + + // Update record + $wpdb->update( + $this->db->media_table(), + array( + 'local_path' => $relative_path, + 'local_url' => $local_url, + 'file_size' => strlen($body), + 'mime_type' => $content_type, + 'downloaded_at' => current_time('mysql'), + 'download_error' => null, + 'download_status' => 'completed', + 'retry_after' => null, + ), + array('id' => $media->id) + ); + + return true; + } + + /** + * Log a download attempt to the media log table + * + * @param object $media Media record + * @param string $action Action type (attempt, success, error, rate_limited, permanent_error) + * @param int|null $status_code HTTP status code + * @param int $response_time_ms Response time in milliseconds + * @param string|null $error Error message + */ + private function log_download($media, $action, $status_code, $response_time_ms, $error) { + global $wpdb; + + $wpdb->insert( + $this->db->media_log_table(), + array( + 'media_id' => $media->id, + 'listing_key' => $media->listing_key, + 'media_key' => $media->media_key, + 'action' => $action, + 'status_code' => $status_code, + 'response_time_ms' => $response_time_ms, + 'error_message' => $error, + 'url' => $media->media_url, + 'created_at' => current_time('mysql'), + ) + ); + } + + /** + * Get file extension from content type + * + * @param string $content_type Content type header + * @param string $url Original URL as fallback + * @return string File extension + */ + private function get_extension_from_content_type($content_type, $url) { + // Extract main type from content-type header + $content_type = strtolower(explode(';', $content_type)[0]); + + $map = array( + 'image/jpeg' => 'jpg', + 'image/jpg' => 'jpg', + 'image/png' => 'png', + 'image/gif' => 'gif', + 'image/webp' => 'webp', + ); + + if (isset($map[$content_type])) { + return $map[$content_type]; + } + + // Fallback to URL extension + $path = parse_url($url, PHP_URL_PATH); + $ext = pathinfo($path, PATHINFO_EXTENSION); + + return $ext ?: 'jpg'; } /** @@ -250,227 +673,6 @@ class MLS_Media_Handler { return substr_count($content, "\n"); } - /** - * Check if media needs to be downloaded - * - * @param object $existing Existing media record - * @param array $new_data New media data from API - * @return bool - */ - private function needs_download($existing, $new_data) { - // No local file - if (empty($existing->local_path)) { - return true; - } - - // File doesn't exist - $file_path = $this->get_upload_dir() . '/' . $existing->local_path; - if (!file_exists($file_path)) { - return true; - } - - // Media URL changed - if ($existing->media_url !== ($new_data['MediaURL'] ?? null)) { - return true; - } - - return false; - } - - /** - * Download a media file - * - * @param int $media_id Media record ID - * @return bool Success - */ - public function download_media($media_id) { - global $wpdb; - - $media = $wpdb->get_row($wpdb->prepare( - "SELECT * FROM {$this->db->media_table()} WHERE id = %d", - $media_id - )); - - if (!$media || empty($media->media_url)) { - return false; - } - - // Increment attempt counter - $wpdb->update( - $this->db->media_table(), - array('download_attempts' => $media->download_attempts + 1), - array('id' => $media_id) - ); - - // Download with exponential backoff for rate limits - $max_retries = 5; - $response = null; - $status_code = 0; - $base_delay = 1; // Start with 1 second - - for ($retry = 0; $retry < $max_retries; $retry++) { - // Exponential backoff: 1s, 2s, 4s, 8s, 16s - if ($retry > 0) { - $delay = $base_delay * pow(2, $retry - 1); - $this->logger->debug('Media download retry', array( - 'media_id' => $media_id, - 'retry' => $retry, - 'delay' => $delay, - )); - sleep($delay); - } - - $response = wp_remote_get($media->media_url, array( - 'timeout' => 60, - 'stream' => false, - )); - - if (is_wp_error($response)) { - $error_msg = $response->get_error_message(); - $this->logger->warning('Media download failed', array( - 'media_id' => $media_id, - 'error' => $error_msg, - 'retry' => $retry, - )); - - if ($retry === $max_retries - 1) { - $wpdb->update( - $this->db->media_table(), - array('download_error' => $error_msg), - array('id' => $media_id) - ); - $this->log_missing_media($media, $error_msg); - return false; - } - continue; - } - - $status_code = wp_remote_retrieve_response_code($response); - - // Success - if ($status_code === 200) { - break; - } - - // Retryable errors: 429 (rate limit), 500, 502, 503, 504 (server errors) - $retryable = in_array($status_code, array(429, 500, 502, 503, 504)); - - if ($retryable && $retry < $max_retries - 1) { - $this->logger->debug('Media download retryable error', array( - 'media_id' => $media_id, - 'status_code' => $status_code, - 'retry' => $retry, - )); - continue; - } - - // Non-retryable or exhausted retries - record and fail - $error_msg = "HTTP {$status_code}"; - $wpdb->update( - $this->db->media_table(), - array('download_error' => $error_msg), - array('id' => $media_id) - ); - $this->log_missing_media($media, $error_msg); - return false; - } - - if ($status_code !== 200) { - $error_msg = "HTTP {$status_code}"; - $wpdb->update( - $this->db->media_table(), - array('download_error' => $error_msg), - array('id' => $media_id) - ); - $this->log_missing_media($media, $error_msg); - return false; - } - - $body = wp_remote_retrieve_body($response); - if (empty($body)) { - $wpdb->update( - $this->db->media_table(), - array('download_error' => 'Empty response'), - array('id' => $media_id) - ); - return false; - } - - // Determine file extension from content type or URL - $content_type = wp_remote_retrieve_header($response, 'content-type'); - $extension = $this->get_extension_from_content_type($content_type, $media->media_url); - - // Create directory - $listing_dir = $this->get_listing_dir($media->listing_key); - if (!file_exists($listing_dir)) { - wp_mkdir_p($listing_dir); - } - - // Save file - $filename = $media->media_order . '.' . $extension; - $file_path = $listing_dir . '/' . $filename; - - if (file_put_contents($file_path, $body) === false) { - $wpdb->update( - $this->db->media_table(), - array('download_error' => 'Failed to write file'), - array('id' => $media_id) - ); - return false; - } - - // Calculate relative path - $prefix = substr($media->listing_key, 0, 2); - $relative_path = $prefix . '/' . $media->listing_key . '/' . $filename; - $local_url = $this->get_upload_url() . '/' . $relative_path; - - // Update record - $wpdb->update( - $this->db->media_table(), - array( - 'local_path' => $relative_path, - 'local_url' => $local_url, - 'file_size' => strlen($body), - 'mime_type' => $content_type, - 'downloaded_at' => current_time('mysql'), - 'download_error' => null, - ), - array('id' => $media_id) - ); - - return true; - } - - /** - * Get file extension from content type - * - * @param string $content_type Content type header - * @param string $url Original URL as fallback - * @return string File extension - */ - private function get_extension_from_content_type($content_type, $url) { - // Extract main type from content-type header - $content_type = strtolower(explode(';', $content_type)[0]); - - $map = array( - 'image/jpeg' => 'jpg', - 'image/jpg' => 'jpg', - 'image/png' => 'png', - 'image/gif' => 'gif', - 'image/webp' => 'webp', - ); - - if (isset($map[$content_type])) { - return $map[$content_type]; - } - - // Fallback to URL extension - $path = parse_url($url, PHP_URL_PATH); - $ext = pathinfo($path, PATHINFO_EXTENSION); - - return $ext ?: 'jpg'; - } - /** * Delete all media for a property * @@ -558,44 +760,48 @@ class MLS_Media_Handler { } /** - * Download pending media (for batch processing) + * Reset failed downloads for retry * - * @param int $limit Max media to download - * @param callable|null $progress_callback Callback for progress updates - * @return array Stats + * @param string|null $listing_key Optional listing key to filter + * @return int Number of records reset */ - public function download_pending($limit = 100, $progress_callback = null) { + public function reset_failed_downloads($listing_key = null) { global $wpdb; - $pending = $wpdb->get_results($wpdb->prepare( - "SELECT id, media_key FROM {$this->db->media_table()} - WHERE local_path IS NULL AND media_url IS NOT NULL - AND download_attempts < 3 - LIMIT %d", - $limit - )); + $where = "download_status = 'failed' OR download_attempts >= " . self::MAX_ATTEMPTS; + $values = array(); - $stats = array( - 'total' => count($pending), - 'success' => 0, - 'failed' => 0, - ); - - foreach ($pending as $media) { - if ($this->download_media($media->id)) { - $stats['success']++; - if ($progress_callback) { - call_user_func($progress_callback, 'media_downloaded', array('media_key' => $media->media_key)); - } - } else { - $stats['failed']++; - if ($progress_callback) { - call_user_func($progress_callback, 'media_error', array('media_key' => $media->media_key)); - } - } + if ($listing_key) { + $where .= " AND listing_key = %s"; + $values[] = $listing_key; } - return $stats; + if (!empty($values)) { + $sql = $wpdb->prepare( + "UPDATE {$this->db->media_table()} + SET download_status = 'pending', + download_attempts = 0, + download_error = NULL, + retry_after = NULL, + queued_at = %s + WHERE {$where}", + array_merge(array(current_time('mysql')), $values) + ); + } else { + $sql = $wpdb->prepare( + "UPDATE {$this->db->media_table()} + SET download_status = 'pending', + download_attempts = 0, + download_error = NULL, + retry_after = NULL, + queued_at = %s + WHERE {$where}", + current_time('mysql') + ); + } + + $wpdb->query($sql); + return $wpdb->rows_affected; } /** @@ -646,4 +852,78 @@ class MLS_Media_Handler { return $deleted; } + + /** + * Get recent download logs + * + * @param int $limit Number of entries to return + * @param string|null $action Optional action filter + * @return array Log entries + */ + public function get_download_logs($limit = 100, $action = null) { + global $wpdb; + + $where = ''; + $values = array(); + + if ($action) { + $where = "WHERE action = %s"; + $values[] = $action; + } + + $values[] = $limit; + + return $wpdb->get_results($wpdb->prepare( + "SELECT * FROM {$this->db->media_log_table()} + {$where} + ORDER BY created_at DESC + LIMIT %d", + $values + )); + } + + /** + * Clear old download logs + * + * @param int $days_old Delete logs older than this many days + * @return int Number of entries deleted + */ + public function clear_old_logs($days_old = 7) { + global $wpdb; + + $cutoff = date('Y-m-d H:i:s', strtotime("-{$days_old} days")); + + $wpdb->query($wpdb->prepare( + "DELETE FROM {$this->db->media_log_table()} WHERE created_at < %s", + $cutoff + )); + + return $wpdb->rows_affected; + } + + /** + * Legacy sync method - now queues media instead of downloading immediately + * Kept for backward compatibility + * + * @param string $listing_key Listing key + * @param array $media_array Media array from API + * @param bool $force Force re-download all media + * @param callable|null $progress_callback Callback for progress updates + */ + public function sync_property_media($listing_key, $media_array, $force = false, $progress_callback = null) { + // Now just queues media - actual download happens via process_queue() + return $this->queue_property_media($listing_key, $media_array, $progress_callback); + } + + /** + * Legacy download_pending method - now uses process_queue + * Kept for backward compatibility + * + * @param int $limit Max media to download + * @param callable|null $progress_callback Callback for progress updates + * @return array Stats + */ + public function download_pending($limit = 100, $progress_callback = null) { + return $this->process_queue($limit, $progress_callback); + } } diff --git a/wp-content/plugins/mls-by-hansonxyz/mls-by-hansonxyz.php b/wp-content/plugins/mls-by-hansonxyz/mls-by-hansonxyz.php index 5e292006..ad4845ff 100644 --- a/wp-content/plugins/mls-by-hansonxyz/mls-by-hansonxyz.php +++ b/wp-content/plugins/mls-by-hansonxyz/mls-by-hansonxyz.php @@ -22,7 +22,7 @@ define('MLS_PLUGIN_FILE', __FILE__); define('MLS_PLUGIN_DIR', plugin_dir_path(__FILE__)); define('MLS_PLUGIN_URL', plugin_dir_url(__FILE__)); define('MLS_PLUGIN_BASENAME', plugin_basename(__FILE__)); -define('MLS_DB_VERSION', '1.0.0'); +define('MLS_DB_VERSION', '1.1.0'); // Database table names (without prefix) define('MLS_TABLE_PROPERTIES', 'mls_properties'); @@ -30,6 +30,7 @@ define('MLS_TABLE_MEDIA', 'mls_media'); define('MLS_TABLE_SYNC_STATE', 'mls_sync_state'); define('MLS_TABLE_RATE_LIMITS', 'mls_rate_limits'); define('MLS_TABLE_SYNC_LOG', 'mls_sync_log'); +define('MLS_TABLE_MEDIA_LOG', 'mls_media_log'); /** * Main plugin class