Add queue-based media download system with rate limiting

- Add download_status, retry_after, queued_at columns to mls_media table
- Add mls_media_log table for download attempt tracking
- Rewrite media handler to queue downloads instead of immediate download
- Add 700ms delay between downloads (25% buffer over 2/sec limit)
- Add 3-hour backoff for rate-limited (429) responses
- Add max 5 attempts before marking as permanently failed
- Add wp mls media command: status, process, reset, logs
- Deprecate wp mls sync media in favor of wp mls media process
- Update documentation with queue system details and cron examples

Media downloads are now separate from property sync:
1. wp mls sync full/incremental - syncs properties, queues media
2. wp mls media process - downloads queued media with rate limiting

Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Hanson.xyz Dev
2025-12-14 22:52:58 -06:00
parent b62867d834
commit 6eadf3d266
5 changed files with 930 additions and 334 deletions
@@ -37,6 +37,7 @@ class MLS_CLI {
WP_CLI::add_command('mls stats', array($instance, 'stats'));
WP_CLI::add_command('mls cache', array($instance, 'cache'));
WP_CLI::add_command('mls recovery', array($instance, 'recovery'));
WP_CLI::add_command('mls media', array($instance, 'media'));
}
/**
@@ -337,40 +338,14 @@ class MLS_CLI {
break;
case 'media':
WP_CLI::line('Downloading pending media...');
if (!$quiet) {
WP_CLI::line('Legend: P=downloaded p=skipped E=error');
echo "\n";
}
$media_handler = $this->plugin->get_media_handler();
$result = $media_handler->download_pending($limit ?: 100, $progress_callback);
if (!$quiet) {
echo "\n";
}
WP_CLI::line(sprintf(
'Media download complete: %d success, %d failed out of %d total',
$result['success'],
$result['failed'],
$result['total']
// Redirect to the new media command
WP_CLI::line('Note: "wp mls sync media" is deprecated. Use "wp mls media process" instead.');
WP_CLI::line('');
$this->media(array('process'), array(
'limit' => $limit ?: 100,
'verbose' => $verbose,
'quiet' => $quiet,
));
if ($result['failed'] === 0 && $result['total'] > 0) {
WP_CLI::success('All media downloaded successfully!');
} elseif ($result['total'] === 0) {
WP_CLI::success('No pending media to download.');
} else {
WP_CLI::warning('Some media failed to download.');
$missing_count = $media_handler->get_missing_count();
if ($missing_count > 0) {
WP_CLI::line(sprintf(
'Missing media log: %s (%d entries)',
$media_handler->get_missing_log_path(),
$missing_count
));
}
}
break;
case 'resume':
@@ -823,6 +798,267 @@ class MLS_CLI {
}
}
/**
* Manage media download queue.
*
* ## OPTIONS
*
* <action>
* : Action: queue, process, status, reset, logs
*
* [--limit=<n>]
* : Limit number of items to process
*
* [--verbose]
* : Show detailed output
*
* [--quiet]
* : Suppress progress output
*
* [--days=<n>]
* : Days of logs to keep (for logs --clear)
*
* [--clear]
* : Clear logs older than --days
*
* ## EXAMPLES
*
* wp mls media status # Show queue statistics
* wp mls media process # Process pending downloads (rate limited)
* wp mls media process --limit=50 # Process up to 50 items
* wp mls media reset # Reset failed downloads for retry
* wp mls media logs # Show recent download logs
* wp mls media logs --clear --days=7 # Clear logs older than 7 days
*
* @subcommand media
*/
public function media($args, $assoc_args) {
$action = isset($args[0]) ? $args[0] : 'status';
$limit = isset($assoc_args['limit']) ? (int) $assoc_args['limit'] : 100;
$verbose = isset($assoc_args['verbose']);
$quiet = isset($assoc_args['quiet']);
$media_handler = $this->plugin->get_media_handler();
switch ($action) {
case 'status':
case 'queue':
$stats = $media_handler->get_queue_stats();
WP_CLI::line('');
WP_CLI::line('=== Media Download Queue ===');
WP_CLI::line('');
WP_CLI::line(sprintf('Pending total: %d', $stats['pending']));
WP_CLI::line(sprintf('Ready now: %d', $stats['ready']));
WP_CLI::line(sprintf('In backoff: %d (retry scheduled)', $stats['in_backoff']));
WP_CLI::line(sprintf('Failed: %d (max attempts reached)', $stats['failed']));
WP_CLI::line(sprintf('Completed: %d', $stats['completed']));
WP_CLI::line('');
if ($stats['ready'] > 0) {
WP_CLI::line(sprintf(
'Run "wp mls media process --limit=%d" to download pending media.',
min($stats['ready'], 100)
));
WP_CLI::line(sprintf(
'Estimated time: %d minutes (at 700ms per image)',
ceil($stats['ready'] * 0.7 / 60)
));
}
if ($stats['failed'] > 0) {
WP_CLI::line('');
WP_CLI::line('Run "wp mls media reset" to retry failed downloads.');
}
WP_CLI::line('');
break;
case 'process':
$stats = $media_handler->get_queue_stats();
if ($stats['ready'] === 0) {
WP_CLI::success('No media ready to download.');
break;
}
$process_count = min($limit, $stats['ready']);
WP_CLI::line(sprintf(
'Processing %d media items (rate limited: 1 per 700ms)...',
$process_count
));
WP_CLI::line(sprintf(
'Estimated time: %d minutes',
ceil($process_count * 0.7 / 60)
));
if (!$quiet) {
WP_CLI::line('Legend: P=downloaded B=backoff (retry later) E=error');
echo "\n";
}
// Progress callback
$progress_callback = null;
if (!$quiet) {
$progress_callback = function($event, $data = array()) use ($verbose) {
if ($verbose) {
$this->output_verbose_media_event($event, $data);
} else {
switch ($event) {
case 'media_downloaded':
echo 'P';
break;
case 'media_backoff':
echo 'B';
break;
case 'media_error':
echo 'E';
break;
}
}
};
}
$result = $media_handler->process_queue($process_count, $progress_callback);
if (!$quiet) {
echo "\n\n";
}
WP_CLI::line(sprintf(
'Results: %d success, %d backoff, %d failed out of %d processed',
$result['success'],
$result['skipped'],
$result['failed'],
$result['processed']
));
// Show updated stats
$new_stats = $media_handler->get_queue_stats();
WP_CLI::line(sprintf('Queue remaining: %d ready, %d in backoff', $new_stats['ready'], $new_stats['in_backoff']));
if ($result['failed'] > 0 || $result['skipped'] > 0) {
WP_CLI::line('');
WP_CLI::line('Items in backoff will be retried after 3 hours.');
WP_CLI::line('Run "wp mls media logs" to see download history.');
}
if ($result['success'] > 0) {
WP_CLI::success('Media processing complete.');
}
break;
case 'reset':
WP_CLI::line('Resetting failed downloads for retry...');
$reset_count = $media_handler->reset_failed_downloads();
if ($reset_count > 0) {
WP_CLI::success(sprintf('Reset %d failed downloads. They will be retried on next process.', $reset_count));
} else {
WP_CLI::success('No failed downloads to reset.');
}
break;
case 'logs':
if (isset($assoc_args['clear'])) {
$days = isset($assoc_args['days']) ? (int) $assoc_args['days'] : 7;
$deleted = $media_handler->clear_old_logs($days);
WP_CLI::success(sprintf('Deleted %d log entries older than %d days.', $deleted, $days));
break;
}
$logs = $media_handler->get_download_logs($limit);
if (empty($logs)) {
WP_CLI::success('No download logs found.');
break;
}
WP_CLI::line('');
WP_CLI::line('=== Recent Download Logs ===');
WP_CLI::line('');
foreach ($logs as $log) {
$status_indicator = '';
switch ($log->action) {
case 'success':
$status_indicator = '[OK]';
break;
case 'rate_limited':
$status_indicator = '[429]';
break;
case 'permanent_error':
$status_indicator = '[ERR]';
break;
case 'error':
$status_indicator = '[FAIL]';
break;
default:
$status_indicator = "[{$log->action}]";
}
$line = sprintf(
'%s %s %s %s %dms',
$log->created_at,
$status_indicator,
$log->listing_key,
$log->media_key,
$log->response_time_ms
);
if ($log->status_code) {
$line .= " HTTP:{$log->status_code}";
}
if ($log->error_message) {
$line .= " - {$log->error_message}";
}
WP_CLI::line($line);
}
WP_CLI::line('');
WP_CLI::line(sprintf('Showing %d most recent entries. Use --limit=N to see more.', count($logs)));
WP_CLI::line('');
break;
default:
WP_CLI::error("Unknown action: {$action}. Use 'status', 'process', 'reset', or 'logs'.");
}
}
/**
* Output verbose media event information
*
* @param string $event Event name
* @param array $data Event data
*/
private function output_verbose_media_event($event, $data) {
$timestamp = date('H:i:s');
switch ($event) {
case 'media_downloaded':
$listing = $data['listing_key'] ?? 'unknown';
$key = $data['media_key'] ?? 'unknown';
WP_CLI::line("[{$timestamp}] DOWNLOADED: {$listing} / {$key}");
break;
case 'media_backoff':
$listing = $data['listing_key'] ?? 'unknown';
$key = $data['media_key'] ?? 'unknown';
WP_CLI::warning("[{$timestamp}] BACKOFF: {$listing} / {$key} - will retry in 3 hours");
break;
case 'media_error':
$listing = $data['listing_key'] ?? 'unknown';
$key = $data['media_key'] ?? 'unknown';
$error = $data['error'] ?? 'Unknown error';
WP_CLI::error("[{$timestamp}] ERROR: {$listing} / {$key} - {$error}", false);
break;
}
}
/**
* Recursively delete a directory
*/
@@ -18,7 +18,8 @@ All tables use `{$wpdb->prefix}mls_` prefix:
| Table | Purpose |
|-------|---------|
| `mls_properties` | Listing data |
| `mls_media` | Media files |
| `mls_media` | Media files with download queue |
| `mls_media_log` | Media download attempt history |
| `mls_sync_state` | Sync progress tracking |
| `mls_rate_limits` | API usage tracking |
| `mls_sync_log` | Debug logging |
@@ -34,18 +35,20 @@ define('MLSGRID_ACCESS_TOKEN', 'your-token-here');
### MLS Grid API Rate Limits
MUST comply with these limits:
- 2 requests/second
- 2 requests/second (500ms minimum between requests)
- 7,200 requests/hour
- 40,000 requests/day
- 4GB data/hour
Media downloads use 700ms delay (25% buffer) between requests.
### Key Files
| File | Purpose |
|------|---------|
| `includes/class-mls-api-client.php` | API communication, auth, gzip |
| `includes/class-mls-sync-engine.php` | Sync orchestration |
| `includes/class-mls-media-handler.php` | Media download/storage |
| `includes/class-mls-media-handler.php` | Media queue and download |
| `includes/class-mls-query.php` | Public query API |
| `includes/class-mls-rate-limiter.php` | Rate limit compliance |
| `cli/class-mls-cli.php` | WP-CLI commands |
@@ -61,12 +64,19 @@ wp mls test auth
wp mls status
wp mls status rate-limits
# Run sync (use --verbose for detailed output)
# Run property sync (queues media, does not download)
wp mls sync full [--dry-run] [--limit=N] [--verbose]
wp mls sync incremental [--dry-run] [--verbose]
wp mls sync media [--limit=N] [--verbose]
wp mls sync resume --id=<sync_id>
# Media download queue (separate from property sync)
wp mls media status # Show queue stats
wp mls media process # Download queued media (rate limited)
wp mls media process --limit=50 --verbose
wp mls media reset # Reset failed downloads for retry
wp mls media logs # View download history
wp mls media logs --clear --days=7
# Statistics
wp mls stats
@@ -83,31 +93,54 @@ wp mls recovery auto # Auto-resume most recent failed sync
wp mls recovery cleanup # Mark stale (>1hr) syncs as failed
```
### Media Queue System
Media downloads are now queue-based and separate from property sync:
1. **Property sync** (`wp mls sync full/incremental`) queues media records
2. **Media process** (`wp mls media process`) downloads queued media with rate limiting
3. Downloads are rate-limited to 700ms between requests (under 2/sec limit)
4. Failed downloads get 3-hour backoff before retry
5. After 5 attempts, items are marked failed and logged
**Queue states:**
- `pending` - Ready for download
- `completed` - Successfully downloaded
- `failed` - Max attempts reached
**Media table columns:**
- `download_status` - pending/completed/failed
- `retry_after` - Next retry time (3hr backoff on rate limit)
- `queued_at` - When item was queued
- `download_attempts` - Attempt count (max 5)
### Progress Output
Without --verbose (compact mode):
Property sync (compact mode):
- `.` = new property created
- `#` = property updated
- `x` = property deleted
- `-` = skipped (dry-run)
- `P` = photo downloaded
- `p` = photo skipped (already exists)
- `E` = photo error
- `q` = media queued
- `p` = media skipped (already downloaded)
- `|` = page complete
With --verbose: Full timestamped output showing API requests, responses, and individual item status.
Media process (compact mode):
- `P` = downloaded
- `B` = backoff (retry later)
- `E` = error
With --verbose: Full timestamped output.
### Missing Media Log
Failed media downloads are logged to: `wp-content/uploads/mls-missing-media.log`
Permanently failed media downloads logged to: `wp-content/uploads/mls-missing-media.log`
Format: `[timestamp] listing_key | media_key | error | url`
Media downloads use exponential backoff (1s, 2s, 4s, 8s, 16s) for rate limit (429) and server errors (5xx).
### Sync Recovery
The sync engine saves progress after each page, allowing interrupted syncs to resume:
The sync engine saves progress after each page:
1. **Automatic state tracking**: `last_next_link` saved after each API page
2. **Stale sync detection**: Syncs running >1 hour marked as failed
@@ -116,9 +149,17 @@ The sync engine saves progress after each page, allowing interrupted syncs to re
- `wp mls recovery auto` - Auto-resume most recent failed sync
- `wp mls recovery list` - View all resumable syncs
For cron jobs, consider adding recovery at the start:
### Recommended Cron Setup
```bash
wp mls recovery auto --quiet && wp mls sync incremental
# Property sync every 30 minutes
*/30 * * * * cd /var/www/html && wp mls recovery auto --quiet && wp mls sync incremental --allow-root >> /var/log/mls-sync.log 2>&1
# Media downloads every 5 minutes (processes up to 50 items per run)
*/5 * * * * cd /var/www/html && wp mls media process --limit=50 --quiet --allow-root >> /var/log/mls-media.log 2>&1
# Full sync weekly (Sunday 3am)
0 3 * * 0 cd /var/www/html && wp mls sync full --allow-root >> /var/log/mls-sync.log 2>&1
```
### Public API Functions
@@ -150,10 +191,10 @@ if (mls_is_available()) { ... }
### Sync Strategy
1. **Initial Import**: Full sync downloads all viewable properties
2. **Incremental**: Uses ModificationTimestamp to fetch only changes
1. **Property Sync**: Full/incremental sync downloads property data and queues media
2. **Media Queue**: Separate process downloads media with rate limiting
3. **Delete Handling**: MlgCanView=false triggers local deletion
4. **Media**: Downloads to wp-content/uploads/mls-listings/
4. **Media Storage**: Downloads to wp-content/uploads/mls-listings/
5. **Recovery**: Stores last_next_link for resume on failure
### Testing After Changes
@@ -162,6 +203,7 @@ if (mls_is_available()) { ... }
wp mls test connection
wp mls test auth
wp mls sync full --dry-run --limit=10
wp mls media status
wp mls stats
```
@@ -55,6 +55,13 @@ class MLS_DB {
return $this->get_table_name(MLS_TABLE_SYNC_LOG);
}
/**
* Get media log table name
*/
public function media_log_table() {
return $this->get_table_name(MLS_TABLE_MEDIA_LOG);
}
/**
* Create all database tables
*/
@@ -163,6 +170,9 @@ class MLS_DB {
downloaded_at DATETIME DEFAULT NULL,
download_attempts INT(3) DEFAULT 0,
download_error TEXT DEFAULT NULL,
retry_after DATETIME DEFAULT NULL,
queued_at DATETIME DEFAULT NULL,
download_status VARCHAR(20) DEFAULT 'pending',
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
@@ -170,7 +180,10 @@ class MLS_DB {
PRIMARY KEY (id),
UNIQUE KEY listing_media (listing_key, media_key),
KEY listing_key (listing_key),
KEY media_order (media_order)
KEY media_order (media_order),
KEY download_status (download_status),
KEY retry_after (retry_after),
KEY queued_at (queued_at)
) {$charset_collate};";
dbDelta($sql_media);
@@ -239,6 +252,29 @@ class MLS_DB {
) {$charset_collate};";
dbDelta($sql_sync_log);
// Media download log table
$table_media_log = $wpdb->prefix . MLS_TABLE_MEDIA_LOG;
$sql_media_log = "CREATE TABLE {$table_media_log} (
id BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT,
media_id BIGINT(20) UNSIGNED NOT NULL,
listing_key VARCHAR(50) NOT NULL,
media_key VARCHAR(100) NOT NULL,
action VARCHAR(30) NOT NULL,
status_code INT(5) DEFAULT NULL,
response_time_ms INT(11) DEFAULT NULL,
error_message TEXT DEFAULT NULL,
url VARCHAR(1000) DEFAULT NULL,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (id),
KEY media_id (media_id),
KEY listing_key (listing_key),
KEY action (action),
KEY created_at (created_at)
) {$charset_collate};";
dbDelta($sql_media_log);
}
/**
@@ -253,6 +289,7 @@ class MLS_DB {
MLS_TABLE_SYNC_STATE,
MLS_TABLE_RATE_LIMITS,
MLS_TABLE_SYNC_LOG,
MLS_TABLE_MEDIA_LOG,
);
foreach ($tables as $table) {
@@ -3,6 +3,10 @@
* MLS Media Handler
*
* Handles downloading and managing media files from MLS listings
* Uses a queue-based system with rate limiting to comply with API limits
*
* Rate limits: 2 requests/second (500ms minimum between requests)
* We use 700ms between requests (25% buffer)
*/
if (!defined('ABSPATH')) {
@@ -16,6 +20,21 @@ class MLS_Media_Handler {
*/
const UPLOAD_SUBDIR = 'mls-listings';
/**
* Minimum delay between media downloads in milliseconds (700ms = 25% buffer over 500ms limit)
*/
const DOWNLOAD_DELAY_MS = 700;
/**
* Retry backoff time in hours for failed downloads
*/
const RETRY_BACKOFF_HOURS = 3;
/**
* Maximum download attempts before permanent failure
*/
const MAX_ATTEMPTS = 5;
/**
* Database instance
*/
@@ -67,14 +86,13 @@ class MLS_Media_Handler {
}
/**
* Sync media for a property
* Queue media for a property (does NOT download immediately)
*
* @param string $listing_key Listing key
* @param array $media_array Media array from API
* @param bool $force Force re-download all media
* @param callable|null $progress_callback Callback for progress updates
*/
public function sync_property_media($listing_key, $media_array, $force = false, $progress_callback = null) {
public function queue_property_media($listing_key, $media_array, $progress_callback = null) {
global $wpdb;
if (empty($media_array)) {
@@ -82,6 +100,8 @@ class MLS_Media_Handler {
}
$received_keys = array();
$queued_count = 0;
$skipped_count = 0;
foreach ($media_array as $media) {
$media_key = $media['MediaKey'] ?? null;
@@ -121,37 +141,28 @@ class MLS_Media_Handler {
array('id' => $existing->id)
);
// Check if we need to re-download
if ($force || $this->needs_download($existing, $media)) {
$result = $this->download_media($existing->id);
// Check if we need to re-download (queue it)
if ($this->needs_download($existing, $media)) {
$this->mark_for_download($existing->id);
$queued_count++;
if ($progress_callback) {
if ($result) {
call_user_func($progress_callback, 'media_downloaded', array('media_key' => $media_key));
} else {
$error = $this->get_last_download_error($existing->id);
call_user_func($progress_callback, 'media_error', array('media_key' => $media_key, 'error' => $error));
}
call_user_func($progress_callback, 'media_queued', array('media_key' => $media_key));
}
} else {
$skipped_count++;
if ($progress_callback) {
call_user_func($progress_callback, 'media_skipped', array('media_key' => $media_key));
}
}
} else {
// Insert new record
// Insert new record - queued for download
$data['created_at'] = current_time('mysql');
$data['queued_at'] = current_time('mysql');
$data['download_status'] = 'pending';
$wpdb->insert($this->db->media_table(), $data);
$new_id = $wpdb->insert_id;
// Queue download
$result = $this->download_media($new_id);
$queued_count++;
if ($progress_callback) {
if ($result) {
call_user_func($progress_callback, 'media_downloaded', array('media_key' => $media_key));
} else {
$error = $this->get_last_download_error($new_id);
call_user_func($progress_callback, 'media_error', array('media_key' => $media_key, 'error' => $error));
}
call_user_func($progress_callback, 'media_queued', array('media_key' => $media_key));
}
}
}
@@ -180,20 +191,432 @@ class MLS_Media_Handler {
$wpdb->delete($this->db->media_table(), array('id' => $record->id));
}
}
return array(
'queued' => $queued_count,
'skipped' => $skipped_count,
);
}
/**
* Get the last download error for a media record
* Mark a media record for download
*
* @param int $media_id Media ID
* @return string|null Error message
*/
private function get_last_download_error($media_id) {
private function mark_for_download($media_id) {
global $wpdb;
return $wpdb->get_var($wpdb->prepare(
"SELECT download_error FROM {$this->db->media_table()} WHERE id = %d",
$wpdb->update(
$this->db->media_table(),
array(
'download_status' => 'pending',
'queued_at' => current_time('mysql'),
'local_path' => null,
'local_url' => null,
'downloaded_at' => null,
'download_error' => null,
),
array('id' => $media_id)
);
}
/**
* Check if media needs to be downloaded
*
* @param object $existing Existing media record
* @param array $new_data New media data from API
* @return bool
*/
private function needs_download($existing, $new_data) {
// No local file
if (empty($existing->local_path)) {
return true;
}
// File doesn't exist
$file_path = $this->get_upload_dir() . '/' . $existing->local_path;
if (!file_exists($file_path)) {
return true;
}
// Media URL changed
if ($existing->media_url !== ($new_data['MediaURL'] ?? null)) {
return true;
}
return false;
}
/**
* Get the next media item to download from the queue
*
* @return object|null Media record or null if queue is empty
*/
public function get_next_queued() {
global $wpdb;
$now = current_time('mysql');
// Get next pending item that's not in retry backoff
return $wpdb->get_row($wpdb->prepare(
"SELECT * FROM {$this->db->media_table()}
WHERE download_status = 'pending'
AND media_url IS NOT NULL
AND download_attempts < %d
AND (retry_after IS NULL OR retry_after <= %s)
ORDER BY queued_at ASC
LIMIT 1",
self::MAX_ATTEMPTS,
$now
));
}
/**
* Get queue statistics
*
* @return array Queue stats
*/
public function get_queue_stats() {
global $wpdb;
$now = current_time('mysql');
return array(
'pending' => (int) $wpdb->get_var(
"SELECT COUNT(*) FROM {$this->db->media_table()}
WHERE download_status = 'pending'"
),
'ready' => (int) $wpdb->get_var($wpdb->prepare(
"SELECT COUNT(*) FROM {$this->db->media_table()}
WHERE download_status = 'pending'
AND media_url IS NOT NULL
AND download_attempts < %d
AND (retry_after IS NULL OR retry_after <= %s)",
self::MAX_ATTEMPTS,
$now
)),
'in_backoff' => (int) $wpdb->get_var($wpdb->prepare(
"SELECT COUNT(*) FROM {$this->db->media_table()}
WHERE download_status = 'pending'
AND retry_after > %s",
$now
)),
'failed' => (int) $wpdb->get_var($wpdb->prepare(
"SELECT COUNT(*) FROM {$this->db->media_table()}
WHERE download_status = 'failed'
OR download_attempts >= %d",
self::MAX_ATTEMPTS
)),
'completed' => (int) $wpdb->get_var(
"SELECT COUNT(*) FROM {$this->db->media_table()}
WHERE download_status = 'completed'"
),
);
}
/**
* Process media queue with rate limiting
*
* @param int $limit Max items to process
* @param callable|null $progress_callback Callback for progress updates
* @return array Processing stats
*/
public function process_queue($limit = 100, $progress_callback = null) {
$stats = array(
'processed' => 0,
'success' => 0,
'failed' => 0,
'skipped' => 0,
);
$last_download_time = 0;
for ($i = 0; $i < $limit; $i++) {
$media = $this->get_next_queued();
if (!$media) {
// Queue empty
break;
}
// Rate limiting: ensure minimum delay between downloads
$now_ms = microtime(true) * 1000;
$elapsed = $now_ms - $last_download_time;
if ($elapsed < self::DOWNLOAD_DELAY_MS && $last_download_time > 0) {
$wait_ms = (int) (self::DOWNLOAD_DELAY_MS - $elapsed);
usleep($wait_ms * 1000);
}
// Download the media
$result = $this->download_media($media->id);
$last_download_time = microtime(true) * 1000;
$stats['processed']++;
if ($result === true) {
$stats['success']++;
if ($progress_callback) {
call_user_func($progress_callback, 'media_downloaded', array(
'media_key' => $media->media_key,
'listing_key' => $media->listing_key,
));
}
} elseif ($result === 'backoff') {
$stats['skipped']++;
if ($progress_callback) {
call_user_func($progress_callback, 'media_backoff', array(
'media_key' => $media->media_key,
'listing_key' => $media->listing_key,
));
}
} else {
$stats['failed']++;
if ($progress_callback) {
call_user_func($progress_callback, 'media_error', array(
'media_key' => $media->media_key,
'listing_key' => $media->listing_key,
'error' => $result,
));
}
}
}
return $stats;
}
/**
* Download a media file
*
* @param int $media_id Media record ID
* @return bool|string True on success, 'backoff' if set for retry, error message on failure
*/
public function download_media($media_id) {
global $wpdb;
$media = $wpdb->get_row($wpdb->prepare(
"SELECT * FROM {$this->db->media_table()} WHERE id = %d",
$media_id
));
if (!$media || empty($media->media_url)) {
return 'No media URL';
}
// Increment attempt counter
$wpdb->update(
$this->db->media_table(),
array('download_attempts' => $media->download_attempts + 1),
array('id' => $media_id)
);
// Make the request
$start_time = microtime(true);
$response = wp_remote_get($media->media_url, array(
'timeout' => 60,
'stream' => false,
));
$response_time_ms = (int) ((microtime(true) - $start_time) * 1000);
$status_code = 0;
$error_msg = null;
if (is_wp_error($response)) {
$error_msg = $response->get_error_message();
$this->log_download($media, 'error', null, $response_time_ms, $error_msg);
$this->handle_download_failure($media_id, $error_msg, false);
return $error_msg;
}
$status_code = wp_remote_retrieve_response_code($response);
$this->log_download($media, 'attempt', $status_code, $response_time_ms, null);
// Success
if ($status_code === 200) {
$body = wp_remote_retrieve_body($response);
if (empty($body)) {
$error_msg = 'Empty response body';
$this->log_download($media, 'error', $status_code, $response_time_ms, $error_msg);
$this->handle_download_failure($media_id, $error_msg, false);
return $error_msg;
}
// Save the file
$save_result = $this->save_media_file($media, $body, $response);
if ($save_result !== true) {
$this->log_download($media, 'error', $status_code, $response_time_ms, $save_result);
$this->handle_download_failure($media_id, $save_result, false);
return $save_result;
}
$this->log_download($media, 'success', $status_code, $response_time_ms, null);
return true;
}
// Rate limited (429) or server error (5xx) - set backoff
$retryable = in_array($status_code, array(429, 500, 502, 503, 504));
$error_msg = "HTTP {$status_code}";
if ($retryable) {
$this->log_download($media, 'rate_limited', $status_code, $response_time_ms, $error_msg);
$this->handle_download_failure($media_id, $error_msg, true);
return 'backoff';
}
// Permanent failure (404, 403, etc.)
$this->log_download($media, 'permanent_error', $status_code, $response_time_ms, $error_msg);
$this->handle_download_failure($media_id, $error_msg, false);
return $error_msg;
}
/**
* Handle download failure
*
* @param int $media_id Media ID
* @param string $error Error message
* @param bool $set_backoff Whether to set retry backoff
*/
private function handle_download_failure($media_id, $error, $set_backoff) {
global $wpdb;
$media = $wpdb->get_row($wpdb->prepare(
"SELECT * FROM {$this->db->media_table()} WHERE id = %d",
$media_id
));
$update_data = array(
'download_error' => $error,
);
if ($set_backoff) {
// Set retry_after to 3 hours from now
$retry_after = date('Y-m-d H:i:s', strtotime('+' . self::RETRY_BACKOFF_HOURS . ' hours'));
$update_data['retry_after'] = $retry_after;
}
// Check if max attempts reached
if ($media && $media->download_attempts >= self::MAX_ATTEMPTS) {
$update_data['download_status'] = 'failed';
$this->log_missing_media($media, $error);
}
$wpdb->update(
$this->db->media_table(),
$update_data,
array('id' => $media_id)
);
}
/**
* Save downloaded media file to disk
*
* @param object $media Media record
* @param string $body File contents
* @param array $response HTTP response
* @return bool|string True on success, error message on failure
*/
private function save_media_file($media, $body, $response) {
global $wpdb;
// Determine file extension from content type or URL
$content_type = wp_remote_retrieve_header($response, 'content-type');
$extension = $this->get_extension_from_content_type($content_type, $media->media_url);
// Create directory
$listing_dir = $this->get_listing_dir($media->listing_key);
if (!file_exists($listing_dir)) {
wp_mkdir_p($listing_dir);
}
// Save file
$filename = $media->media_order . '.' . $extension;
$file_path = $listing_dir . '/' . $filename;
if (file_put_contents($file_path, $body) === false) {
return 'Failed to write file';
}
// Calculate relative path
$prefix = substr($media->listing_key, 0, 2);
$relative_path = $prefix . '/' . $media->listing_key . '/' . $filename;
$local_url = $this->get_upload_url() . '/' . $relative_path;
// Update record
$wpdb->update(
$this->db->media_table(),
array(
'local_path' => $relative_path,
'local_url' => $local_url,
'file_size' => strlen($body),
'mime_type' => $content_type,
'downloaded_at' => current_time('mysql'),
'download_error' => null,
'download_status' => 'completed',
'retry_after' => null,
),
array('id' => $media->id)
);
return true;
}
/**
* Log a download attempt to the media log table
*
* @param object $media Media record
* @param string $action Action type (attempt, success, error, rate_limited, permanent_error)
* @param int|null $status_code HTTP status code
* @param int $response_time_ms Response time in milliseconds
* @param string|null $error Error message
*/
private function log_download($media, $action, $status_code, $response_time_ms, $error) {
global $wpdb;
$wpdb->insert(
$this->db->media_log_table(),
array(
'media_id' => $media->id,
'listing_key' => $media->listing_key,
'media_key' => $media->media_key,
'action' => $action,
'status_code' => $status_code,
'response_time_ms' => $response_time_ms,
'error_message' => $error,
'url' => $media->media_url,
'created_at' => current_time('mysql'),
)
);
}
/**
* Get file extension from content type
*
* @param string $content_type Content type header
* @param string $url Original URL as fallback
* @return string File extension
*/
private function get_extension_from_content_type($content_type, $url) {
// Extract main type from content-type header
$content_type = strtolower(explode(';', $content_type)[0]);
$map = array(
'image/jpeg' => 'jpg',
'image/jpg' => 'jpg',
'image/png' => 'png',
'image/gif' => 'gif',
'image/webp' => 'webp',
);
if (isset($map[$content_type])) {
return $map[$content_type];
}
// Fallback to URL extension
$path = parse_url($url, PHP_URL_PATH);
$ext = pathinfo($path, PATHINFO_EXTENSION);
return $ext ?: 'jpg';
}
/**
@@ -250,227 +673,6 @@ class MLS_Media_Handler {
return substr_count($content, "\n");
}
/**
* Check if media needs to be downloaded
*
* @param object $existing Existing media record
* @param array $new_data New media data from API
* @return bool
*/
private function needs_download($existing, $new_data) {
// No local file
if (empty($existing->local_path)) {
return true;
}
// File doesn't exist
$file_path = $this->get_upload_dir() . '/' . $existing->local_path;
if (!file_exists($file_path)) {
return true;
}
// Media URL changed
if ($existing->media_url !== ($new_data['MediaURL'] ?? null)) {
return true;
}
return false;
}
/**
* Download a media file
*
* @param int $media_id Media record ID
* @return bool Success
*/
public function download_media($media_id) {
global $wpdb;
$media = $wpdb->get_row($wpdb->prepare(
"SELECT * FROM {$this->db->media_table()} WHERE id = %d",
$media_id
));
if (!$media || empty($media->media_url)) {
return false;
}
// Increment attempt counter
$wpdb->update(
$this->db->media_table(),
array('download_attempts' => $media->download_attempts + 1),
array('id' => $media_id)
);
// Download with exponential backoff for rate limits
$max_retries = 5;
$response = null;
$status_code = 0;
$base_delay = 1; // Start with 1 second
for ($retry = 0; $retry < $max_retries; $retry++) {
// Exponential backoff: 1s, 2s, 4s, 8s, 16s
if ($retry > 0) {
$delay = $base_delay * pow(2, $retry - 1);
$this->logger->debug('Media download retry', array(
'media_id' => $media_id,
'retry' => $retry,
'delay' => $delay,
));
sleep($delay);
}
$response = wp_remote_get($media->media_url, array(
'timeout' => 60,
'stream' => false,
));
if (is_wp_error($response)) {
$error_msg = $response->get_error_message();
$this->logger->warning('Media download failed', array(
'media_id' => $media_id,
'error' => $error_msg,
'retry' => $retry,
));
if ($retry === $max_retries - 1) {
$wpdb->update(
$this->db->media_table(),
array('download_error' => $error_msg),
array('id' => $media_id)
);
$this->log_missing_media($media, $error_msg);
return false;
}
continue;
}
$status_code = wp_remote_retrieve_response_code($response);
// Success
if ($status_code === 200) {
break;
}
// Retryable errors: 429 (rate limit), 500, 502, 503, 504 (server errors)
$retryable = in_array($status_code, array(429, 500, 502, 503, 504));
if ($retryable && $retry < $max_retries - 1) {
$this->logger->debug('Media download retryable error', array(
'media_id' => $media_id,
'status_code' => $status_code,
'retry' => $retry,
));
continue;
}
// Non-retryable or exhausted retries - record and fail
$error_msg = "HTTP {$status_code}";
$wpdb->update(
$this->db->media_table(),
array('download_error' => $error_msg),
array('id' => $media_id)
);
$this->log_missing_media($media, $error_msg);
return false;
}
if ($status_code !== 200) {
$error_msg = "HTTP {$status_code}";
$wpdb->update(
$this->db->media_table(),
array('download_error' => $error_msg),
array('id' => $media_id)
);
$this->log_missing_media($media, $error_msg);
return false;
}
$body = wp_remote_retrieve_body($response);
if (empty($body)) {
$wpdb->update(
$this->db->media_table(),
array('download_error' => 'Empty response'),
array('id' => $media_id)
);
return false;
}
// Determine file extension from content type or URL
$content_type = wp_remote_retrieve_header($response, 'content-type');
$extension = $this->get_extension_from_content_type($content_type, $media->media_url);
// Create directory
$listing_dir = $this->get_listing_dir($media->listing_key);
if (!file_exists($listing_dir)) {
wp_mkdir_p($listing_dir);
}
// Save file
$filename = $media->media_order . '.' . $extension;
$file_path = $listing_dir . '/' . $filename;
if (file_put_contents($file_path, $body) === false) {
$wpdb->update(
$this->db->media_table(),
array('download_error' => 'Failed to write file'),
array('id' => $media_id)
);
return false;
}
// Calculate relative path
$prefix = substr($media->listing_key, 0, 2);
$relative_path = $prefix . '/' . $media->listing_key . '/' . $filename;
$local_url = $this->get_upload_url() . '/' . $relative_path;
// Update record
$wpdb->update(
$this->db->media_table(),
array(
'local_path' => $relative_path,
'local_url' => $local_url,
'file_size' => strlen($body),
'mime_type' => $content_type,
'downloaded_at' => current_time('mysql'),
'download_error' => null,
),
array('id' => $media_id)
);
return true;
}
/**
* Get file extension from content type
*
* @param string $content_type Content type header
* @param string $url Original URL as fallback
* @return string File extension
*/
private function get_extension_from_content_type($content_type, $url) {
// Extract main type from content-type header
$content_type = strtolower(explode(';', $content_type)[0]);
$map = array(
'image/jpeg' => 'jpg',
'image/jpg' => 'jpg',
'image/png' => 'png',
'image/gif' => 'gif',
'image/webp' => 'webp',
);
if (isset($map[$content_type])) {
return $map[$content_type];
}
// Fallback to URL extension
$path = parse_url($url, PHP_URL_PATH);
$ext = pathinfo($path, PATHINFO_EXTENSION);
return $ext ?: 'jpg';
}
/**
* Delete all media for a property
*
@@ -558,44 +760,48 @@ class MLS_Media_Handler {
}
/**
* Download pending media (for batch processing)
* Reset failed downloads for retry
*
* @param int $limit Max media to download
* @param callable|null $progress_callback Callback for progress updates
* @return array Stats
* @param string|null $listing_key Optional listing key to filter
* @return int Number of records reset
*/
public function download_pending($limit = 100, $progress_callback = null) {
public function reset_failed_downloads($listing_key = null) {
global $wpdb;
$pending = $wpdb->get_results($wpdb->prepare(
"SELECT id, media_key FROM {$this->db->media_table()}
WHERE local_path IS NULL AND media_url IS NOT NULL
AND download_attempts < 3
LIMIT %d",
$limit
));
$where = "download_status = 'failed' OR download_attempts >= " . self::MAX_ATTEMPTS;
$values = array();
$stats = array(
'total' => count($pending),
'success' => 0,
'failed' => 0,
if ($listing_key) {
$where .= " AND listing_key = %s";
$values[] = $listing_key;
}
if (!empty($values)) {
$sql = $wpdb->prepare(
"UPDATE {$this->db->media_table()}
SET download_status = 'pending',
download_attempts = 0,
download_error = NULL,
retry_after = NULL,
queued_at = %s
WHERE {$where}",
array_merge(array(current_time('mysql')), $values)
);
foreach ($pending as $media) {
if ($this->download_media($media->id)) {
$stats['success']++;
if ($progress_callback) {
call_user_func($progress_callback, 'media_downloaded', array('media_key' => $media->media_key));
}
} else {
$stats['failed']++;
if ($progress_callback) {
call_user_func($progress_callback, 'media_error', array('media_key' => $media->media_key));
}
}
$sql = $wpdb->prepare(
"UPDATE {$this->db->media_table()}
SET download_status = 'pending',
download_attempts = 0,
download_error = NULL,
retry_after = NULL,
queued_at = %s
WHERE {$where}",
current_time('mysql')
);
}
return $stats;
$wpdb->query($sql);
return $wpdb->rows_affected;
}
/**
@@ -646,4 +852,78 @@ class MLS_Media_Handler {
return $deleted;
}
/**
* Get recent download logs
*
* @param int $limit Number of entries to return
* @param string|null $action Optional action filter
* @return array Log entries
*/
public function get_download_logs($limit = 100, $action = null) {
global $wpdb;
$where = '';
$values = array();
if ($action) {
$where = "WHERE action = %s";
$values[] = $action;
}
$values[] = $limit;
return $wpdb->get_results($wpdb->prepare(
"SELECT * FROM {$this->db->media_log_table()}
{$where}
ORDER BY created_at DESC
LIMIT %d",
$values
));
}
/**
* Clear old download logs
*
* @param int $days_old Delete logs older than this many days
* @return int Number of entries deleted
*/
public function clear_old_logs($days_old = 7) {
global $wpdb;
$cutoff = date('Y-m-d H:i:s', strtotime("-{$days_old} days"));
$wpdb->query($wpdb->prepare(
"DELETE FROM {$this->db->media_log_table()} WHERE created_at < %s",
$cutoff
));
return $wpdb->rows_affected;
}
/**
* Legacy sync method - now queues media instead of downloading immediately
* Kept for backward compatibility
*
* @param string $listing_key Listing key
* @param array $media_array Media array from API
* @param bool $force Force re-download all media
* @param callable|null $progress_callback Callback for progress updates
*/
public function sync_property_media($listing_key, $media_array, $force = false, $progress_callback = null) {
// Now just queues media - actual download happens via process_queue()
return $this->queue_property_media($listing_key, $media_array, $progress_callback);
}
/**
* Legacy download_pending method - now uses process_queue
* Kept for backward compatibility
*
* @param int $limit Max media to download
* @param callable|null $progress_callback Callback for progress updates
* @return array Stats
*/
public function download_pending($limit = 100, $progress_callback = null) {
return $this->process_queue($limit, $progress_callback);
}
}
@@ -22,7 +22,7 @@ define('MLS_PLUGIN_FILE', __FILE__);
define('MLS_PLUGIN_DIR', plugin_dir_path(__FILE__));
define('MLS_PLUGIN_URL', plugin_dir_url(__FILE__));
define('MLS_PLUGIN_BASENAME', plugin_basename(__FILE__));
define('MLS_DB_VERSION', '1.0.0');
define('MLS_DB_VERSION', '1.1.0');
// Database table names (without prefix)
define('MLS_TABLE_PROPERTIES', 'mls_properties');
@@ -30,6 +30,7 @@ define('MLS_TABLE_MEDIA', 'mls_media');
define('MLS_TABLE_SYNC_STATE', 'mls_sync_state');
define('MLS_TABLE_RATE_LIMITS', 'mls_rate_limits');
define('MLS_TABLE_SYNC_LOG', 'mls_sync_log');
define('MLS_TABLE_MEDIA_LOG', 'mls_media_log');
/**
* Main plugin class