Refactor MLS sync to Active/Pending only with on-demand media

Major changes to sync strategy following MLS Grid best practices:

- Initial sync now fetches only Active/Pending properties (~30K vs 1.3M)
- Replication (incremental) fetches all changes, deletes non-Active/Pending
- On-demand media fetching replaces background queue (avoids rate limits)
- Media downloaded and cached when first viewed, not during sync
- Updated CLI commands: wp mls media status/fetch/clear
- Comprehensive documentation with troubleshooting guide

This fixes the "Value out of range" API error caused by high $skip values.

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Hanson.xyz Dev
2025-12-15 08:25:37 -06:00
parent 6eadf3d266
commit b9cddd2f64
6 changed files with 538 additions and 874 deletions
@@ -338,14 +338,12 @@ class MLS_CLI {
break;
case 'media':
// Redirect to the new media command
WP_CLI::line('Note: "wp mls sync media" is deprecated. Use "wp mls media process" instead.');
// Media is now on-demand, this sync type is deprecated
WP_CLI::line('Note: "wp mls sync media" is deprecated.');
WP_CLI::line('Media is now fetched on-demand when properties are viewed on the website.');
WP_CLI::line('');
$this->media(array('process'), array(
'limit' => $limit ?: 100,
'verbose' => $verbose,
'quiet' => $quiet,
));
WP_CLI::line('Use "wp mls media status" to see cache statistics.');
WP_CLI::line('Use "wp mls media fetch --listing=<key>" to pre-cache a specific listing.');
break;
case 'resume':
@@ -799,263 +797,119 @@ class MLS_CLI {
}
/**
* Manage media download queue.
* Show media cache status and manage cached files.
*
* Media is now fetched on-demand when properties are viewed on the website.
* This command shows cache statistics and allows management of cached files.
*
* ## OPTIONS
*
* <action>
* : Action: queue, process, status, reset, logs
* [<action>]
* : Action: status (default), fetch, clear
*
* [--listing=<key>]
* : Listing key for fetch or clear actions
*
* [--limit=<n>]
* : Limit number of items to process
*
* [--verbose]
* : Show detailed output
*
* [--quiet]
* : Suppress progress output
*
* [--days=<n>]
* : Days of logs to keep (for logs --clear)
*
* [--clear]
* : Clear logs older than --days
* : For fetch action, max images to fetch (default: 1)
*
* ## EXAMPLES
*
* wp mls media status # Show queue statistics
* wp mls media process # Process pending downloads (rate limited)
* wp mls media process --limit=50 # Process up to 50 items
* wp mls media reset # Reset failed downloads for retry
* wp mls media logs # Show recent download logs
* wp mls media logs --clear --days=7 # Clear logs older than 7 days
* wp mls media status # Show cache statistics
* wp mls media fetch --listing=NST123456 # Fetch images for a listing
* wp mls media fetch --listing=NST123456 --limit=10 # Fetch up to 10 images
* wp mls media clear --listing=NST123456 # Clear cached images for a listing
*
* @subcommand media
*/
public function media($args, $assoc_args) {
$action = isset($args[0]) ? $args[0] : 'status';
$limit = isset($assoc_args['limit']) ? (int) $assoc_args['limit'] : 100;
$verbose = isset($assoc_args['verbose']);
$quiet = isset($assoc_args['quiet']);
$media_handler = $this->plugin->get_media_handler();
switch ($action) {
case 'status':
case 'queue':
$stats = $media_handler->get_queue_stats();
$stats = $media_handler->get_cache_stats();
WP_CLI::line('');
WP_CLI::line('=== Media Download Queue ===');
WP_CLI::line('=== Media Cache Status ===');
WP_CLI::line('');
WP_CLI::line(sprintf('Pending total: %d', $stats['pending']));
WP_CLI::line(sprintf('Ready now: %d', $stats['ready']));
WP_CLI::line(sprintf('In backoff: %d (retry scheduled)', $stats['in_backoff']));
WP_CLI::line(sprintf('Failed: %d (max attempts reached)', $stats['failed']));
WP_CLI::line(sprintf('Completed: %d', $stats['completed']));
WP_CLI::line(sprintf('Total media records: %d', $stats['total_media']));
WP_CLI::line(sprintf('Cached locally: %d', $stats['cached']));
WP_CLI::line(sprintf('Not yet cached: %d', $stats['uncached']));
WP_CLI::line('');
if ($stats['ready'] > 0) {
WP_CLI::line(sprintf(
'Run "wp mls media process --limit=%d" to download pending media.',
min($stats['ready'], 100)
));
WP_CLI::line(sprintf(
'Estimated time: %d minutes (at 700ms per image)',
ceil($stats['ready'] * 0.7 / 60)
));
}
if ($stats['failed'] > 0) {
WP_CLI::line('');
WP_CLI::line('Run "wp mls media reset" to retry failed downloads.');
}
$cache_percent = $stats['total_media'] > 0
? round(($stats['cached'] / $stats['total_media']) * 100, 1)
: 0;
WP_CLI::line(sprintf('Cache rate: %.1f%%', $cache_percent));
WP_CLI::line('');
WP_CLI::line('Images are fetched on-demand when properties are viewed.');
WP_CLI::line('Use "wp mls media fetch --listing=<key>" to pre-cache specific listings.');
WP_CLI::line('');
break;
case 'process':
$stats = $media_handler->get_queue_stats();
if ($stats['ready'] === 0) {
WP_CLI::success('No media ready to download.');
break;
case 'fetch':
$listing_key = isset($assoc_args['listing']) ? $assoc_args['listing'] : null;
if (!$listing_key) {
WP_CLI::error('Please specify --listing=<key>');
}
$process_count = min($limit, $stats['ready']);
$limit = isset($assoc_args['limit']) ? (int) $assoc_args['limit'] : 1;
WP_CLI::line(sprintf(
'Processing %d media items (rate limited: 1 per 700ms)...',
$process_count
));
WP_CLI::line(sprintf(
'Estimated time: %d minutes',
ceil($process_count * 0.7 / 60)
));
WP_CLI::line(sprintf('Fetching up to %d images for listing %s...', $limit, $listing_key));
if (!$quiet) {
WP_CLI::line('Legend: P=downloaded B=backoff (retry later) E=error');
echo "\n";
}
$images = $media_handler->get_listing_images($listing_key, $limit);
// Progress callback
$progress_callback = null;
if (!$quiet) {
$progress_callback = function($event, $data = array()) use ($verbose) {
if ($verbose) {
$this->output_verbose_media_event($event, $data);
} else {
switch ($event) {
case 'media_downloaded':
echo 'P';
break;
case 'media_backoff':
echo 'B';
break;
case 'media_error':
echo 'E';
break;
}
}
};
}
$result = $media_handler->process_queue($process_count, $progress_callback);
if (!$quiet) {
echo "\n\n";
$cached_count = 0;
foreach ($images as $img) {
if ($img->local_url) {
$cached_count++;
}
}
WP_CLI::line(sprintf(
'Results: %d success, %d backoff, %d failed out of %d processed',
$result['success'],
$result['skipped'],
$result['failed'],
$result['processed']
'Result: %d/%d images now cached for this listing.',
$cached_count,
count($images)
));
// Show updated stats
$new_stats = $media_handler->get_queue_stats();
WP_CLI::line(sprintf('Queue remaining: %d ready, %d in backoff', $new_stats['ready'], $new_stats['in_backoff']));
if ($result['failed'] > 0 || $result['skipped'] > 0) {
WP_CLI::line('');
WP_CLI::line('Items in backoff will be retried after 3 hours.');
WP_CLI::line('Run "wp mls media logs" to see download history.');
}
if ($result['success'] > 0) {
WP_CLI::success('Media processing complete.');
}
break;
case 'reset':
WP_CLI::line('Resetting failed downloads for retry...');
$reset_count = $media_handler->reset_failed_downloads();
if ($reset_count > 0) {
WP_CLI::success(sprintf('Reset %d failed downloads. They will be retried on next process.', $reset_count));
if ($cached_count > 0) {
WP_CLI::success('Images fetched successfully.');
} elseif (count($images) === 0) {
WP_CLI::warning('No media records found for this listing.');
} else {
WP_CLI::success('No failed downloads to reset.');
WP_CLI::warning('Failed to fetch images. Check logs for details.');
}
break;
case 'logs':
if (isset($assoc_args['clear'])) {
$days = isset($assoc_args['days']) ? (int) $assoc_args['days'] : 7;
$deleted = $media_handler->clear_old_logs($days);
WP_CLI::success(sprintf('Deleted %d log entries older than %d days.', $deleted, $days));
break;
case 'clear':
$listing_key = isset($assoc_args['listing']) ? $assoc_args['listing'] : null;
if (!$listing_key) {
WP_CLI::error('Please specify --listing=<key>. To clear all media, use "wp mls cache clear --confirm".');
}
$logs = $media_handler->get_download_logs($limit);
// Just clear the local files, keep metadata
global $wpdb;
$listing_dir = $media_handler->get_listing_dir($listing_key);
if (empty($logs)) {
WP_CLI::success('No download logs found.');
break;
if (is_dir($listing_dir)) {
$this->recursive_delete($listing_dir);
}
WP_CLI::line('');
WP_CLI::line('=== Recent Download Logs ===');
WP_CLI::line('');
// Clear local_path and local_url but keep the records
$wpdb->query($wpdb->prepare(
"UPDATE {$this->plugin->get_db()->media_table()}
SET local_path = NULL, local_url = NULL, downloaded_at = NULL
WHERE listing_key = %s",
$listing_key
));
foreach ($logs as $log) {
$status_indicator = '';
switch ($log->action) {
case 'success':
$status_indicator = '[OK]';
break;
case 'rate_limited':
$status_indicator = '[429]';
break;
case 'permanent_error':
$status_indicator = '[ERR]';
break;
case 'error':
$status_indicator = '[FAIL]';
break;
default:
$status_indicator = "[{$log->action}]";
}
$line = sprintf(
'%s %s %s %s %dms',
$log->created_at,
$status_indicator,
$log->listing_key,
$log->media_key,
$log->response_time_ms
);
if ($log->status_code) {
$line .= " HTTP:{$log->status_code}";
}
if ($log->error_message) {
$line .= " - {$log->error_message}";
}
WP_CLI::line($line);
}
WP_CLI::line('');
WP_CLI::line(sprintf('Showing %d most recent entries. Use --limit=N to see more.', count($logs)));
WP_CLI::line('');
WP_CLI::success(sprintf('Cleared cached images for listing %s. They will be re-fetched on demand.', $listing_key));
break;
default:
WP_CLI::error("Unknown action: {$action}. Use 'status', 'process', 'reset', or 'logs'.");
}
}
/**
* Output verbose media event information
*
* @param string $event Event name
* @param array $data Event data
*/
private function output_verbose_media_event($event, $data) {
$timestamp = date('H:i:s');
switch ($event) {
case 'media_downloaded':
$listing = $data['listing_key'] ?? 'unknown';
$key = $data['media_key'] ?? 'unknown';
WP_CLI::line("[{$timestamp}] DOWNLOADED: {$listing} / {$key}");
break;
case 'media_backoff':
$listing = $data['listing_key'] ?? 'unknown';
$key = $data['media_key'] ?? 'unknown';
WP_CLI::warning("[{$timestamp}] BACKOFF: {$listing} / {$key} - will retry in 3 hours");
break;
case 'media_error':
$listing = $data['listing_key'] ?? 'unknown';
$key = $data['media_key'] ?? 'unknown';
$error = $data['error'] ?? 'Unknown error';
WP_CLI::error("[{$timestamp}] ERROR: {$listing} / {$key} - {$error}", false);
break;
WP_CLI::error("Unknown action: {$action}. Use 'status', 'fetch', or 'clear'.");
}
}