Refactor MLS sync to Active/Pending only with on-demand media

Major changes to sync strategy following MLS Grid best practices:

- Initial sync now fetches only Active/Pending properties (~30K vs 1.3M)
- Replication (incremental) fetches all changes, deletes non-Active/Pending
- On-demand media fetching replaces background queue (avoids rate limits)
- Media downloaded and cached when first viewed, not during sync
- Updated CLI commands: wp mls media status/fetch/clear
- Comprehensive documentation with troubleshooting guide

This fixes the "Value out of range" API error caused by high $skip values.

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Hanson.xyz Dev
2025-12-15 08:25:37 -06:00
parent 6eadf3d266
commit b9cddd2f64
6 changed files with 538 additions and 874 deletions
@@ -338,14 +338,12 @@ class MLS_CLI {
break; break;
case 'media': case 'media':
// Redirect to the new media command // Media is now on-demand, this sync type is deprecated
WP_CLI::line('Note: "wp mls sync media" is deprecated. Use "wp mls media process" instead.'); WP_CLI::line('Note: "wp mls sync media" is deprecated.');
WP_CLI::line('Media is now fetched on-demand when properties are viewed on the website.');
WP_CLI::line(''); WP_CLI::line('');
$this->media(array('process'), array( WP_CLI::line('Use "wp mls media status" to see cache statistics.');
'limit' => $limit ?: 100, WP_CLI::line('Use "wp mls media fetch --listing=<key>" to pre-cache a specific listing.');
'verbose' => $verbose,
'quiet' => $quiet,
));
break; break;
case 'resume': case 'resume':
@@ -799,263 +797,119 @@ class MLS_CLI {
} }
/** /**
* Manage media download queue. * Show media cache status and manage cached files.
*
* Media is now fetched on-demand when properties are viewed on the website.
* This command shows cache statistics and allows management of cached files.
* *
* ## OPTIONS * ## OPTIONS
* *
* <action> * [<action>]
* : Action: queue, process, status, reset, logs * : Action: status (default), fetch, clear
*
* [--listing=<key>]
* : Listing key for fetch or clear actions
* *
* [--limit=<n>] * [--limit=<n>]
* : Limit number of items to process * : For fetch action, max images to fetch (default: 1)
*
* [--verbose]
* : Show detailed output
*
* [--quiet]
* : Suppress progress output
*
* [--days=<n>]
* : Days of logs to keep (for logs --clear)
*
* [--clear]
* : Clear logs older than --days
* *
* ## EXAMPLES * ## EXAMPLES
* *
* wp mls media status # Show queue statistics * wp mls media status # Show cache statistics
* wp mls media process # Process pending downloads (rate limited) * wp mls media fetch --listing=NST123456 # Fetch images for a listing
* wp mls media process --limit=50 # Process up to 50 items * wp mls media fetch --listing=NST123456 --limit=10 # Fetch up to 10 images
* wp mls media reset # Reset failed downloads for retry * wp mls media clear --listing=NST123456 # Clear cached images for a listing
* wp mls media logs # Show recent download logs
* wp mls media logs --clear --days=7 # Clear logs older than 7 days
* *
* @subcommand media * @subcommand media
*/ */
public function media($args, $assoc_args) { public function media($args, $assoc_args) {
$action = isset($args[0]) ? $args[0] : 'status'; $action = isset($args[0]) ? $args[0] : 'status';
$limit = isset($assoc_args['limit']) ? (int) $assoc_args['limit'] : 100;
$verbose = isset($assoc_args['verbose']);
$quiet = isset($assoc_args['quiet']);
$media_handler = $this->plugin->get_media_handler(); $media_handler = $this->plugin->get_media_handler();
switch ($action) { switch ($action) {
case 'status': case 'status':
case 'queue': $stats = $media_handler->get_cache_stats();
$stats = $media_handler->get_queue_stats();
WP_CLI::line(''); WP_CLI::line('');
WP_CLI::line('=== Media Download Queue ==='); WP_CLI::line('=== Media Cache Status ===');
WP_CLI::line(''); WP_CLI::line('');
WP_CLI::line(sprintf('Pending total: %d', $stats['pending'])); WP_CLI::line(sprintf('Total media records: %d', $stats['total_media']));
WP_CLI::line(sprintf('Ready now: %d', $stats['ready'])); WP_CLI::line(sprintf('Cached locally: %d', $stats['cached']));
WP_CLI::line(sprintf('In backoff: %d (retry scheduled)', $stats['in_backoff'])); WP_CLI::line(sprintf('Not yet cached: %d', $stats['uncached']));
WP_CLI::line(sprintf('Failed: %d (max attempts reached)', $stats['failed']));
WP_CLI::line(sprintf('Completed: %d', $stats['completed']));
WP_CLI::line(''); WP_CLI::line('');
if ($stats['ready'] > 0) { $cache_percent = $stats['total_media'] > 0
WP_CLI::line(sprintf( ? round(($stats['cached'] / $stats['total_media']) * 100, 1)
'Run "wp mls media process --limit=%d" to download pending media.', : 0;
min($stats['ready'], 100) WP_CLI::line(sprintf('Cache rate: %.1f%%', $cache_percent));
)); WP_CLI::line('');
WP_CLI::line(sprintf( WP_CLI::line('Images are fetched on-demand when properties are viewed.');
'Estimated time: %d minutes (at 700ms per image)', WP_CLI::line('Use "wp mls media fetch --listing=<key>" to pre-cache specific listings.');
ceil($stats['ready'] * 0.7 / 60)
));
}
if ($stats['failed'] > 0) {
WP_CLI::line('');
WP_CLI::line('Run "wp mls media reset" to retry failed downloads.');
}
WP_CLI::line(''); WP_CLI::line('');
break; break;
case 'process': case 'fetch':
$stats = $media_handler->get_queue_stats(); $listing_key = isset($assoc_args['listing']) ? $assoc_args['listing'] : null;
if (!$listing_key) {
if ($stats['ready'] === 0) { WP_CLI::error('Please specify --listing=<key>');
WP_CLI::success('No media ready to download.');
break;
} }
$process_count = min($limit, $stats['ready']); $limit = isset($assoc_args['limit']) ? (int) $assoc_args['limit'] : 1;
WP_CLI::line(sprintf( WP_CLI::line(sprintf('Fetching up to %d images for listing %s...', $limit, $listing_key));
'Processing %d media items (rate limited: 1 per 700ms)...',
$process_count
));
WP_CLI::line(sprintf(
'Estimated time: %d minutes',
ceil($process_count * 0.7 / 60)
));
if (!$quiet) { $images = $media_handler->get_listing_images($listing_key, $limit);
WP_CLI::line('Legend: P=downloaded B=backoff (retry later) E=error');
echo "\n";
}
// Progress callback $cached_count = 0;
$progress_callback = null; foreach ($images as $img) {
if (!$quiet) { if ($img->local_url) {
$progress_callback = function($event, $data = array()) use ($verbose) { $cached_count++;
if ($verbose) { }
$this->output_verbose_media_event($event, $data);
} else {
switch ($event) {
case 'media_downloaded':
echo 'P';
break;
case 'media_backoff':
echo 'B';
break;
case 'media_error':
echo 'E';
break;
}
}
};
}
$result = $media_handler->process_queue($process_count, $progress_callback);
if (!$quiet) {
echo "\n\n";
} }
WP_CLI::line(sprintf( WP_CLI::line(sprintf(
'Results: %d success, %d backoff, %d failed out of %d processed', 'Result: %d/%d images now cached for this listing.',
$result['success'], $cached_count,
$result['skipped'], count($images)
$result['failed'],
$result['processed']
)); ));
// Show updated stats if ($cached_count > 0) {
$new_stats = $media_handler->get_queue_stats(); WP_CLI::success('Images fetched successfully.');
WP_CLI::line(sprintf('Queue remaining: %d ready, %d in backoff', $new_stats['ready'], $new_stats['in_backoff'])); } elseif (count($images) === 0) {
WP_CLI::warning('No media records found for this listing.');
if ($result['failed'] > 0 || $result['skipped'] > 0) {
WP_CLI::line('');
WP_CLI::line('Items in backoff will be retried after 3 hours.');
WP_CLI::line('Run "wp mls media logs" to see download history.');
}
if ($result['success'] > 0) {
WP_CLI::success('Media processing complete.');
}
break;
case 'reset':
WP_CLI::line('Resetting failed downloads for retry...');
$reset_count = $media_handler->reset_failed_downloads();
if ($reset_count > 0) {
WP_CLI::success(sprintf('Reset %d failed downloads. They will be retried on next process.', $reset_count));
} else { } else {
WP_CLI::success('No failed downloads to reset.'); WP_CLI::warning('Failed to fetch images. Check logs for details.');
} }
break; break;
case 'logs': case 'clear':
if (isset($assoc_args['clear'])) { $listing_key = isset($assoc_args['listing']) ? $assoc_args['listing'] : null;
$days = isset($assoc_args['days']) ? (int) $assoc_args['days'] : 7; if (!$listing_key) {
$deleted = $media_handler->clear_old_logs($days); WP_CLI::error('Please specify --listing=<key>. To clear all media, use "wp mls cache clear --confirm".');
WP_CLI::success(sprintf('Deleted %d log entries older than %d days.', $deleted, $days));
break;
} }
$logs = $media_handler->get_download_logs($limit); // Just clear the local files, keep metadata
global $wpdb;
$listing_dir = $media_handler->get_listing_dir($listing_key);
if (empty($logs)) { if (is_dir($listing_dir)) {
WP_CLI::success('No download logs found.'); $this->recursive_delete($listing_dir);
break;
} }
WP_CLI::line(''); // Clear local_path and local_url but keep the records
WP_CLI::line('=== Recent Download Logs ==='); $wpdb->query($wpdb->prepare(
WP_CLI::line(''); "UPDATE {$this->plugin->get_db()->media_table()}
SET local_path = NULL, local_url = NULL, downloaded_at = NULL
WHERE listing_key = %s",
$listing_key
));
foreach ($logs as $log) { WP_CLI::success(sprintf('Cleared cached images for listing %s. They will be re-fetched on demand.', $listing_key));
$status_indicator = '';
switch ($log->action) {
case 'success':
$status_indicator = '[OK]';
break;
case 'rate_limited':
$status_indicator = '[429]';
break;
case 'permanent_error':
$status_indicator = '[ERR]';
break;
case 'error':
$status_indicator = '[FAIL]';
break;
default:
$status_indicator = "[{$log->action}]";
}
$line = sprintf(
'%s %s %s %s %dms',
$log->created_at,
$status_indicator,
$log->listing_key,
$log->media_key,
$log->response_time_ms
);
if ($log->status_code) {
$line .= " HTTP:{$log->status_code}";
}
if ($log->error_message) {
$line .= " - {$log->error_message}";
}
WP_CLI::line($line);
}
WP_CLI::line('');
WP_CLI::line(sprintf('Showing %d most recent entries. Use --limit=N to see more.', count($logs)));
WP_CLI::line('');
break; break;
default: default:
WP_CLI::error("Unknown action: {$action}. Use 'status', 'process', 'reset', or 'logs'."); WP_CLI::error("Unknown action: {$action}. Use 'status', 'fetch', or 'clear'.");
}
}
/**
* Output verbose media event information
*
* @param string $event Event name
* @param array $data Event data
*/
private function output_verbose_media_event($event, $data) {
$timestamp = date('H:i:s');
switch ($event) {
case 'media_downloaded':
$listing = $data['listing_key'] ?? 'unknown';
$key = $data['media_key'] ?? 'unknown';
WP_CLI::line("[{$timestamp}] DOWNLOADED: {$listing} / {$key}");
break;
case 'media_backoff':
$listing = $data['listing_key'] ?? 'unknown';
$key = $data['media_key'] ?? 'unknown';
WP_CLI::warning("[{$timestamp}] BACKOFF: {$listing} / {$key} - will retry in 3 hours");
break;
case 'media_error':
$listing = $data['listing_key'] ?? 'unknown';
$key = $data['media_key'] ?? 'unknown';
$error = $data['error'] ?? 'Unknown error';
WP_CLI::error("[{$timestamp}] ERROR: {$listing} / {$key} - {$error}", false);
break;
} }
} }
@@ -17,9 +17,8 @@ All tables use `{$wpdb->prefix}mls_` prefix:
| Table | Purpose | | Table | Purpose |
|-------|---------| |-------|---------|
| `mls_properties` | Listing data | | `mls_properties` | Listing data (Active/Pending only) |
| `mls_media` | Media files with download queue | | `mls_media` | Media metadata and cache status |
| `mls_media_log` | Media download attempt history |
| `mls_sync_state` | Sync progress tracking | | `mls_sync_state` | Sync progress tracking |
| `mls_rate_limits` | API usage tracking | | `mls_rate_limits` | API usage tracking |
| `mls_sync_log` | Debug logging | | `mls_sync_log` | Debug logging |
@@ -40,7 +39,7 @@ MUST comply with these limits:
- 40,000 requests/day - 40,000 requests/day
- 4GB data/hour - 4GB data/hour
Media downloads use 700ms delay (25% buffer) between requests. **Important**: The API rejects `$skip` values over ~80,000. Always use `@odata.nextLink` for pagination, never manual `$skip`.
### Key Files ### Key Files
@@ -48,7 +47,7 @@ Media downloads use 700ms delay (25% buffer) between requests.
|------|---------| |------|---------|
| `includes/class-mls-api-client.php` | API communication, auth, gzip | | `includes/class-mls-api-client.php` | API communication, auth, gzip |
| `includes/class-mls-sync-engine.php` | Sync orchestration | | `includes/class-mls-sync-engine.php` | Sync orchestration |
| `includes/class-mls-media-handler.php` | Media queue and download | | `includes/class-mls-media-handler.php` | On-demand media fetch and cache |
| `includes/class-mls-query.php` | Public query API | | `includes/class-mls-query.php` | Public query API |
| `includes/class-mls-rate-limiter.php` | Rate limit compliance | | `includes/class-mls-rate-limiter.php` | Rate limit compliance |
| `cli/class-mls-cli.php` | WP-CLI commands | | `cli/class-mls-cli.php` | WP-CLI commands |
@@ -64,18 +63,16 @@ wp mls test auth
wp mls status wp mls status
wp mls status rate-limits wp mls status rate-limits
# Run property sync (queues media, does not download) # Run property sync
wp mls sync full [--dry-run] [--limit=N] [--verbose] wp mls sync full [--dry-run] [--limit=N] [--verbose] # Initial: Active/Pending only
wp mls sync incremental [--dry-run] [--verbose] wp mls sync incremental [--dry-run] [--verbose] # Replication: all changes
wp mls sync resume --id=<sync_id> wp mls sync resume --id=<sync_id>
# Media download queue (separate from property sync) # Media cache (images fetched on-demand when viewed)
wp mls media status # Show queue stats wp mls media status # Show cache statistics
wp mls media process # Download queued media (rate limited) wp mls media fetch --listing=<key> # Pre-cache images for a listing
wp mls media process --limit=50 --verbose wp mls media fetch --listing=<key> --limit=10 # Fetch up to 10 images
wp mls media reset # Reset failed downloads for retry wp mls media clear --listing=<key> # Clear cached images for re-fetch
wp mls media logs # View download history
wp mls media logs --clear --days=7
# Statistics # Statistics
wp mls stats wp mls stats
@@ -83,9 +80,6 @@ wp mls stats
# Cache management # Cache management
wp mls cache clear --confirm wp mls cache clear --confirm
wp mls cache cleanup wp mls cache cleanup
wp mls cache missing # View failed media downloads
wp mls cache missing --limit=20 # View first 20 entries
wp mls cache missing --clear # Clear the log
# Recovery commands # Recovery commands
wp mls recovery list # Show resumable syncs wp mls recovery list # Show resumable syncs
@@ -93,26 +87,64 @@ wp mls recovery auto # Auto-resume most recent failed sync
wp mls recovery cleanup # Mark stale (>1hr) syncs as failed wp mls recovery cleanup # Mark stale (>1hr) syncs as failed
``` ```
### Media Queue System ## Sync Strategy (IMPORTANT)
Media downloads are now queue-based and separate from property sync: The sync follows MLS Grid best practices for replication:
1. **Property sync** (`wp mls sync full/incremental`) queues media records ### Initial Import (`wp mls sync full`)
2. **Media process** (`wp mls media process`) downloads queued media with rate limiting
3. Downloads are rate-limited to 700ms between requests (under 2/sec limit)
4. Failed downloads get 3-hour backoff before retry
5. After 5 attempts, items are marked failed and logged
**Queue states:** - Fetches ONLY `Active` and `Pending` properties
- `pending` - Ready for download - Filter: `MlgCanView eq true and (StandardStatus eq 'Active' or StandardStatus eq 'Pending')`
- `completed` - Successfully downloaded - Uses `@odata.nextLink` for pagination (NOT `$skip`)
- `failed` - Max attempts reached - Stores media metadata but does NOT download images
- ~30,000 records for NorthStar MLS (vs 1.3M total including Closed)
**Media table columns:** ### Replication (`wp mls sync incremental`)
- `download_status` - pending/completed/failed
- `retry_after` - Next retry time (3hr backoff on rate limit) - Fetches ALL properties modified since last sync
- `queued_at` - When item was queued - NO filter on `MlgCanView` or `StandardStatus` - we need to see changes
- `download_attempts` - Attempt count (max 5) - For each record received:
- If `MlgCanView = false` -> DELETE from local DB
- If `StandardStatus` not in (Active, Pending) -> DELETE from local DB
- Otherwise -> INSERT or UPDATE
- This handles: new listings, price changes, status changes (Active->Sold), removals
### Why This Approach?
1. **MLS Grid API limits `$skip` to ~80,000** - bulk scanning all 1.3M records fails
2. **We only care about available properties** - no need to store Closed/Sold
3. **Replication is efficient** - only fetches changed records
4. **Proper deletion handling** - when a property sells, we remove it
### Data Flow
```
Initial Import:
API (Active/Pending + MlgCanView=true) -> Local DB
Replication (every 15 min):
API (ModificationTimestamp > last_sync) -> Check each record:
- MlgCanView=false OR Status!=Active/Pending -> DELETE locally
- Otherwise -> UPSERT locally
```
## Media System (On-Demand Fetching)
Per MLS Grid rules, media URLs must NOT be used directly on websites. Images must be downloaded and served from our own server.
**How it works:**
1. **Property sync** stores media metadata (URLs, keys, order) but does NOT download images
2. **On-demand fetch**: When `mls_get_property_image()` is called, the image is fetched and cached locally
3. **Subsequent requests** serve from local cache
4. **Pre-caching**: Use `wp mls media fetch --listing=<key>` to pre-cache specific listings
**Benefits:**
- No rate limit issues from bulk downloading
- Images cached only when needed (saves bandwidth/storage)
- Automatic re-fetch if cache is cleared
- Works with MLS Grid's image URL expiration
**Cache location:** `wp-content/uploads/mls-listings/{prefix}/{listing_key}/`
### Progress Output ### Progress Output
@@ -121,23 +153,10 @@ Property sync (compact mode):
- `#` = property updated - `#` = property updated
- `x` = property deleted - `x` = property deleted
- `-` = skipped (dry-run) - `-` = skipped (dry-run)
- `q` = media queued
- `p` = media skipped (already downloaded)
- `|` = page complete - `|` = page complete
Media process (compact mode):
- `P` = downloaded
- `B` = backoff (retry later)
- `E` = error
With --verbose: Full timestamped output. With --verbose: Full timestamped output.
### Missing Media Log
Permanently failed media downloads logged to: `wp-content/uploads/mls-missing-media.log`
Format: `[timestamp] listing_key | media_key | error | url`
### Sync Recovery ### Sync Recovery
The sync engine saves progress after each page: The sync engine saves progress after each page:
@@ -152,16 +171,15 @@ The sync engine saves progress after each page:
### Recommended Cron Setup ### Recommended Cron Setup
```bash ```bash
# Property sync every 30 minutes # Replication sync every 15 minutes (MLS Grid recommended)
*/30 * * * * cd /var/www/html && wp mls recovery auto --quiet && wp mls sync incremental --allow-root >> /var/log/mls-sync.log 2>&1 */15 * * * * cd /var/www/html && wp mls sync incremental --allow-root >> /var/log/mls-sync.log 2>&1
# Media downloads every 5 minutes (processes up to 50 items per run) # Full re-sync weekly (Sunday 3am) - rebuilds from scratch
*/5 * * * * cd /var/www/html && wp mls media process --limit=50 --quiet --allow-root >> /var/log/mls-media.log 2>&1 0 3 * * 0 cd /var/www/html && wp mls cache clear --confirm --allow-root && wp mls sync full --allow-root >> /var/log/mls-sync.log 2>&1
# Full sync weekly (Sunday 3am)
0 3 * * 0 cd /var/www/html && wp mls sync full --allow-root >> /var/log/mls-sync.log 2>&1
``` ```
Note: No separate media cron needed - images are fetched on-demand when properties are viewed.
### Public API Functions ### Public API Functions
Available for themes/plugins: Available for themes/plugins:
@@ -178,9 +196,19 @@ $properties = mls_get_properties([
// Get single property // Get single property
$property = mls_get_property('NST123456'); $property = mls_get_property('NST123456');
// Get media // Get media (on-demand fetching)
$image_url = mls_get_property_image('NST123456'); // Fetches if not cached
$image_url = mls_get_property_image('NST123456', false); // Return null if not cached
// Get all images (fetches first N on demand)
$images = mls_get_property_images('NST123456'); // Fetches first 1 if uncached
$images = mls_get_property_images('NST123456', 5); // Fetches first 5 if uncached
// Get media metadata (no fetch)
$media = mls_get_property_media('NST123456'); $media = mls_get_property_media('NST123456');
$image_url = mls_get_property_image('NST123456');
// Get cache statistics
$stats = mls_get_cache_stats(); // Returns total_media, cached, uncached counts
// Get distinct values // Get distinct values
$cities = mls_get_cities('Active'); $cities = mls_get_cities('Active');
@@ -189,20 +217,12 @@ $cities = mls_get_cities('Active');
if (mls_is_available()) { ... } if (mls_is_available()) { ... }
``` ```
### Sync Strategy
1. **Property Sync**: Full/incremental sync downloads property data and queues media
2. **Media Queue**: Separate process downloads media with rate limiting
3. **Delete Handling**: MlgCanView=false triggers local deletion
4. **Media Storage**: Downloads to wp-content/uploads/mls-listings/
5. **Recovery**: Stores last_next_link for resume on failure
### Testing After Changes ### Testing After Changes
```bash ```bash
wp mls test connection wp mls test connection
wp mls test auth wp mls test auth
wp mls sync full --dry-run --limit=10 wp mls sync full --dry-run --limit=10 --verbose
wp mls media status wp mls media status
wp mls stats wp mls stats
``` ```
@@ -226,3 +246,28 @@ Key fields from API to database:
| MlgCanView | mlg_can_view | | MlgCanView | mlg_can_view |
Full API response stored in `raw_data` column as JSON. Full API response stored in `raw_data` column as JSON.
## Troubleshooting
### "Value out of range" error
The API is rejecting a high `$skip` value. This means pagination broke. Clear data and re-run initial sync:
```bash
wp mls cache clear --confirm --allow-root
wp mls sync full --allow-root
```
### All properties showing as "Sold"
The initial sync was run without the Active/Pending filter. Clear and re-sync:
```bash
wp mls cache clear --confirm --allow-root
wp mls sync full --allow-root
```
### Media not loading
Images are fetched on-demand. Check:
1. `wp mls media status` - see cache stats
2. `wp mls media fetch --listing=<key>` - manually fetch for a listing
3. Check `wp-content/uploads/mls-listings/` directory permissions
### Sync taking too long
Initial sync of ~30K Active/Pending properties takes about 30-45 minutes. Use `--verbose` to see progress.
@@ -318,26 +318,53 @@ class MLS_API_Client {
} }
/** /**
* Get properties including those marked for deletion (for sync) * Get properties for initial sync (Active/Pending only)
* *
* @param string|null $timestamp Optional modification timestamp filter
* @param string|null $expand Expand parameter * @param string|null $expand Expand parameter
* @param int|null $top Number of records * @param int|null $top Number of records
* @return array|WP_Error Response data or error * @return array|WP_Error Response data or error
*/ */
public function get_properties_for_sync($timestamp = null, $expand = null, $top = null) { public function get_properties_for_initial_sync($expand = null, $top = null) {
// Don't filter by MlgCanView for sync - we need to see deleted records
$params = array(); $params = array();
$system = $this->options->get_originating_system(); $system = $this->options->get_originating_system();
if ($timestamp) { // Initial sync: only Active/Pending with MlgCanView=true
$params['$filter'] = "OriginatingSystemName eq '{$system}' and ModificationTimestamp gt {$timestamp}"; $params['$filter'] = "OriginatingSystemName eq '{$system}' and MlgCanView eq true and (StandardStatus eq 'Active' or StandardStatus eq 'Pending')";
} else {
// Initial sync - only get viewable records if ($expand) {
$params['$filter'] = "OriginatingSystemName eq '{$system}' and MlgCanView eq true"; $params['$expand'] = $expand;
} }
if ($top) {
$params['$top'] = min($top, $expand ? self::MAX_TOP_WITH_EXPAND : self::MAX_TOP_NO_EXPAND);
} else {
$params['$top'] = $expand ? self::MAX_TOP_WITH_EXPAND : self::DEFAULT_TOP;
}
return $this->request('Property', $params);
}
/**
* Get properties modified since timestamp (for replication)
*
* Does NOT filter by MlgCanView or StandardStatus so we can detect:
* - Records that became unavailable (MlgCanView=false)
* - Records that changed status (Active -> Sold)
*
* @param string $timestamp ISO 8601 modification timestamp
* @param string|null $expand Expand parameter
* @param int|null $top Number of records
* @return array|WP_Error Response data or error
*/
public function get_properties_for_replication($timestamp, $expand = null, $top = null) {
$params = array();
$system = $this->options->get_originating_system();
// Replication: get ALL changes since timestamp (no MlgCanView or Status filter)
$params['$filter'] = "OriginatingSystemName eq '{$system}' and ModificationTimestamp gt {$timestamp}";
if ($expand) { if ($expand) {
$params['$expand'] = $expand; $params['$expand'] = $expand;
} }
@@ -2,11 +2,12 @@
/** /**
* MLS Media Handler * MLS Media Handler
* *
* Handles downloading and managing media files from MLS listings * Handles on-demand fetching and caching of media files from MLS listings.
* Uses a queue-based system with rate limiting to comply with API limits * Images are downloaded when first requested and cached locally.
* *
* Rate limits: 2 requests/second (500ms minimum between requests) * Per MLS Grid rules:
* We use 700ms between requests (25% buffer) * - MediaURLs must NOT be used directly on websites
* - Images must be downloaded and served from our own server
*/ */
if (!defined('ABSPATH')) { if (!defined('ABSPATH')) {
@@ -20,21 +21,6 @@ class MLS_Media_Handler {
*/ */
const UPLOAD_SUBDIR = 'mls-listings'; const UPLOAD_SUBDIR = 'mls-listings';
/**
* Minimum delay between media downloads in milliseconds (700ms = 25% buffer over 500ms limit)
*/
const DOWNLOAD_DELAY_MS = 700;
/**
* Retry backoff time in hours for failed downloads
*/
const RETRY_BACKOFF_HOURS = 3;
/**
* Maximum download attempts before permanent failure
*/
const MAX_ATTEMPTS = 5;
/** /**
* Database instance * Database instance
*/ */
@@ -80,28 +66,28 @@ class MLS_Media_Handler {
* @return string Absolute path * @return string Absolute path
*/ */
public function get_listing_dir($listing_key) { public function get_listing_dir($listing_key) {
// Use first 2 characters as subdirectory to prevent too many files in one folder
$prefix = substr($listing_key, 0, 2); $prefix = substr($listing_key, 0, 2);
return $this->get_upload_dir() . '/' . $prefix . '/' . $listing_key; return $this->get_upload_dir() . '/' . $prefix . '/' . $listing_key;
} }
/** /**
* Queue media for a property (does NOT download immediately) * Store media metadata from API sync (no download)
* *
* @param string $listing_key Listing key * @param string $listing_key Listing key
* @param array $media_array Media array from API * @param array $media_array Media array from API
* @param callable|null $progress_callback Callback for progress updates * @param callable|null $progress_callback Callback for progress updates
* @return array Stats
*/ */
public function queue_property_media($listing_key, $media_array, $progress_callback = null) { public function sync_property_media($listing_key, $media_array, $force = false, $progress_callback = null) {
global $wpdb; global $wpdb;
if (empty($media_array)) { if (empty($media_array)) {
return; return array('stored' => 0, 'skipped' => 0);
} }
$received_keys = array(); $received_keys = array();
$queued_count = 0; $stored = 0;
$skipped_count = 0; $skipped = 0;
foreach ($media_array as $media) { foreach ($media_array as $media) {
$media_key = $media['MediaKey'] ?? null; $media_key = $media['MediaKey'] ?? null;
@@ -134,40 +120,35 @@ class MLS_Media_Handler {
); );
if ($existing) { if ($existing) {
// Update existing record // Check if URL changed - if so, clear cached file
if ($existing->media_url !== ($media['MediaURL'] ?? null) && $existing->local_path) {
$file_path = $this->get_upload_dir() . '/' . $existing->local_path;
if (file_exists($file_path)) {
unlink($file_path);
}
$data['local_path'] = null;
$data['local_url'] = null;
$data['downloaded_at'] = null;
}
$wpdb->update( $wpdb->update(
$this->db->media_table(), $this->db->media_table(),
$data, $data,
array('id' => $existing->id) array('id' => $existing->id)
); );
$skipped++;
// Check if we need to re-download (queue it)
if ($this->needs_download($existing, $media)) {
$this->mark_for_download($existing->id);
$queued_count++;
if ($progress_callback) {
call_user_func($progress_callback, 'media_queued', array('media_key' => $media_key));
}
} else {
$skipped_count++;
if ($progress_callback) {
call_user_func($progress_callback, 'media_skipped', array('media_key' => $media_key));
}
}
} else { } else {
// Insert new record - queued for download
$data['created_at'] = current_time('mysql'); $data['created_at'] = current_time('mysql');
$data['queued_at'] = current_time('mysql');
$data['download_status'] = 'pending';
$wpdb->insert($this->db->media_table(), $data); $wpdb->insert($this->db->media_table(), $data);
$queued_count++; $stored++;
if ($progress_callback) { }
call_user_func($progress_callback, 'media_queued', array('media_key' => $media_key));
} if ($progress_callback) {
call_user_func($progress_callback, 'media_stored', array('media_key' => $media_key));
} }
} }
// Delete media that no longer exists // Delete orphaned media records
if (!empty($received_keys)) { if (!empty($received_keys)) {
$placeholders = implode(',', array_fill(0, count($received_keys), '%s')); $placeholders = implode(',', array_fill(0, count($received_keys), '%s'));
$values = array_merge(array($listing_key), $received_keys); $values = array_merge(array($listing_key), $received_keys);
@@ -179,347 +160,203 @@ class MLS_Media_Handler {
)); ));
foreach ($orphaned as $record) { foreach ($orphaned as $record) {
// Delete file if exists
if ($record->local_path) { if ($record->local_path) {
$file_path = $this->get_upload_dir() . '/' . $record->local_path; $file_path = $this->get_upload_dir() . '/' . $record->local_path;
if (file_exists($file_path)) { if (file_exists($file_path)) {
unlink($file_path); unlink($file_path);
} }
} }
// Delete record
$wpdb->delete($this->db->media_table(), array('id' => $record->id)); $wpdb->delete($this->db->media_table(), array('id' => $record->id));
} }
} }
return array( return array('stored' => $stored, 'skipped' => $skipped);
'queued' => $queued_count,
'skipped' => $skipped_count,
);
} }
/** /**
* Mark a media record for download * Get image URL for a media record, fetching on-demand if needed
* *
* @param int $media_id Media ID * @param int|object $media Media ID or media record object
* @param bool $fetch_if_missing Whether to fetch if not cached
* @return string|null Local URL or null
*/ */
private function mark_for_download($media_id) { public function get_image_url($media, $fetch_if_missing = true) {
global $wpdb; global $wpdb;
$wpdb->update( // Get media record if ID passed
$this->db->media_table(), if (is_numeric($media)) {
array( $media = $wpdb->get_row($wpdb->prepare(
'download_status' => 'pending', "SELECT * FROM {$this->db->media_table()} WHERE id = %d",
'queued_at' => current_time('mysql'), $media
'local_path' => null, ));
'local_url' => null, }
'downloaded_at' => null,
'download_error' => null, if (!$media) {
), return null;
array('id' => $media_id) }
);
// Already cached
if ($media->local_url && $media->local_path) {
$file_path = $this->get_upload_dir() . '/' . $media->local_path;
if (file_exists($file_path)) {
return $media->local_url;
}
}
// Fetch on demand
if ($fetch_if_missing && $media->media_url) {
$result = $this->fetch_and_cache($media);
if ($result) {
return $result;
}
}
return null;
} }
/** /**
* Check if media needs to be downloaded * Get primary image URL for a listing (on-demand)
* *
* @param object $existing Existing media record * @param string $listing_key Listing key
* @param array $new_data New media data from API * @param bool $fetch_if_missing Whether to fetch if not cached
* @return bool * @return string|null Image URL
*/ */
private function needs_download($existing, $new_data) { public function get_primary_image($listing_key, $fetch_if_missing = true) {
// No local file
if (empty($existing->local_path)) {
return true;
}
// File doesn't exist
$file_path = $this->get_upload_dir() . '/' . $existing->local_path;
if (!file_exists($file_path)) {
return true;
}
// Media URL changed
if ($existing->media_url !== ($new_data['MediaURL'] ?? null)) {
return true;
}
return false;
}
/**
* Get the next media item to download from the queue
*
* @return object|null Media record or null if queue is empty
*/
public function get_next_queued() {
global $wpdb; global $wpdb;
$now = current_time('mysql'); // First check for already-cached image
$cached = $wpdb->get_row($wpdb->prepare(
// Get next pending item that's not in retry backoff
return $wpdb->get_row($wpdb->prepare(
"SELECT * FROM {$this->db->media_table()} "SELECT * FROM {$this->db->media_table()}
WHERE download_status = 'pending' WHERE listing_key = %s AND local_url IS NOT NULL AND local_path IS NOT NULL
AND media_url IS NOT NULL ORDER BY media_order ASC
AND download_attempts < %d
AND (retry_after IS NULL OR retry_after <= %s)
ORDER BY queued_at ASC
LIMIT 1", LIMIT 1",
self::MAX_ATTEMPTS, $listing_key
$now
)); ));
}
/** if ($cached) {
* Get queue statistics $file_path = $this->get_upload_dir() . '/' . $cached->local_path;
* if (file_exists($file_path)) {
* @return array Queue stats return $cached->local_url;
*/
public function get_queue_stats() {
global $wpdb;
$now = current_time('mysql');
return array(
'pending' => (int) $wpdb->get_var(
"SELECT COUNT(*) FROM {$this->db->media_table()}
WHERE download_status = 'pending'"
),
'ready' => (int) $wpdb->get_var($wpdb->prepare(
"SELECT COUNT(*) FROM {$this->db->media_table()}
WHERE download_status = 'pending'
AND media_url IS NOT NULL
AND download_attempts < %d
AND (retry_after IS NULL OR retry_after <= %s)",
self::MAX_ATTEMPTS,
$now
)),
'in_backoff' => (int) $wpdb->get_var($wpdb->prepare(
"SELECT COUNT(*) FROM {$this->db->media_table()}
WHERE download_status = 'pending'
AND retry_after > %s",
$now
)),
'failed' => (int) $wpdb->get_var($wpdb->prepare(
"SELECT COUNT(*) FROM {$this->db->media_table()}
WHERE download_status = 'failed'
OR download_attempts >= %d",
self::MAX_ATTEMPTS
)),
'completed' => (int) $wpdb->get_var(
"SELECT COUNT(*) FROM {$this->db->media_table()}
WHERE download_status = 'completed'"
),
);
}
/**
* Process media queue with rate limiting
*
* @param int $limit Max items to process
* @param callable|null $progress_callback Callback for progress updates
* @return array Processing stats
*/
public function process_queue($limit = 100, $progress_callback = null) {
$stats = array(
'processed' => 0,
'success' => 0,
'failed' => 0,
'skipped' => 0,
);
$last_download_time = 0;
for ($i = 0; $i < $limit; $i++) {
$media = $this->get_next_queued();
if (!$media) {
// Queue empty
break;
}
// Rate limiting: ensure minimum delay between downloads
$now_ms = microtime(true) * 1000;
$elapsed = $now_ms - $last_download_time;
if ($elapsed < self::DOWNLOAD_DELAY_MS && $last_download_time > 0) {
$wait_ms = (int) (self::DOWNLOAD_DELAY_MS - $elapsed);
usleep($wait_ms * 1000);
}
// Download the media
$result = $this->download_media($media->id);
$last_download_time = microtime(true) * 1000;
$stats['processed']++;
if ($result === true) {
$stats['success']++;
if ($progress_callback) {
call_user_func($progress_callback, 'media_downloaded', array(
'media_key' => $media->media_key,
'listing_key' => $media->listing_key,
));
}
} elseif ($result === 'backoff') {
$stats['skipped']++;
if ($progress_callback) {
call_user_func($progress_callback, 'media_backoff', array(
'media_key' => $media->media_key,
'listing_key' => $media->listing_key,
));
}
} else {
$stats['failed']++;
if ($progress_callback) {
call_user_func($progress_callback, 'media_error', array(
'media_key' => $media->media_key,
'listing_key' => $media->listing_key,
'error' => $result,
));
}
} }
} }
return $stats; // Get first media record (may not be cached)
}
/**
* Download a media file
*
* @param int $media_id Media record ID
* @return bool|string True on success, 'backoff' if set for retry, error message on failure
*/
public function download_media($media_id) {
global $wpdb;
$media = $wpdb->get_row($wpdb->prepare( $media = $wpdb->get_row($wpdb->prepare(
"SELECT * FROM {$this->db->media_table()} WHERE id = %d", "SELECT * FROM {$this->db->media_table()}
$media_id WHERE listing_key = %s AND media_url IS NOT NULL
ORDER BY media_order ASC
LIMIT 1",
$listing_key
)); ));
if (!$media || empty($media->media_url)) { if (!$media) {
return 'No media URL'; return null;
} }
// Increment attempt counter // If already cached and file exists, return it
$wpdb->update( if ($media->local_url && $media->local_path) {
$this->db->media_table(), $file_path = $this->get_upload_dir() . '/' . $media->local_path;
array('download_attempts' => $media->download_attempts + 1), if (file_exists($file_path)) {
array('id' => $media_id) return $media->local_url;
); }
}
// Make the request // Fetch on demand
$start_time = microtime(true); if ($fetch_if_missing) {
return $this->fetch_and_cache($media);
}
$response = wp_remote_get($media->media_url, array( return null;
'timeout' => 60, }
'stream' => false,
/**
* Get all images for a listing (on-demand for first N)
*
* @param string $listing_key Listing key
* @param int $fetch_limit Max images to fetch on-demand (0 = none)
* @return array Media records with local_url populated where available
*/
public function get_listing_images($listing_key, $fetch_limit = 1) {
global $wpdb;
$media = $wpdb->get_results($wpdb->prepare(
"SELECT * FROM {$this->db->media_table()}
WHERE listing_key = %s
ORDER BY media_order ASC",
$listing_key
)); ));
$response_time_ms = (int) ((microtime(true) - $start_time) * 1000); if (empty($media)) {
$status_code = 0; return array();
$error_msg = null; }
$fetched = 0;
foreach ($media as &$item) {
// Check if cached and file exists
if ($item->local_url && $item->local_path) {
$file_path = $this->get_upload_dir() . '/' . $item->local_path;
if (file_exists($file_path)) {
continue;
}
}
// Fetch on demand up to limit
if ($fetched < $fetch_limit && $item->media_url) {
$url = $this->fetch_and_cache($item);
if ($url) {
$item->local_url = $url;
$fetched++;
}
}
}
return $media;
}
/**
* Fetch image from MLS Grid and cache locally
*
* @param object $media Media record
* @return string|null Local URL on success, null on failure
*/
private function fetch_and_cache($media) {
global $wpdb;
if (empty($media->media_url)) {
return null;
}
// Download the image
$response = wp_remote_get($media->media_url, array(
'timeout' => 30,
));
if (is_wp_error($response)) { if (is_wp_error($response)) {
$error_msg = $response->get_error_message(); $this->logger->warning('Media fetch failed', array(
$this->log_download($media, 'error', null, $response_time_ms, $error_msg); 'listing_key' => $media->listing_key,
$this->handle_download_failure($media_id, $error_msg, false); 'media_key' => $media->media_key,
return $error_msg; 'error' => $response->get_error_message(),
));
return null;
} }
$status_code = wp_remote_retrieve_response_code($response); $status_code = wp_remote_retrieve_response_code($response);
$this->log_download($media, 'attempt', $status_code, $response_time_ms, null);
// Success if ($status_code !== 200) {
if ($status_code === 200) { $this->logger->warning('Media fetch HTTP error', array(
$body = wp_remote_retrieve_body($response); 'listing_key' => $media->listing_key,
if (empty($body)) { 'media_key' => $media->media_key,
$error_msg = 'Empty response body'; 'status' => $status_code,
$this->log_download($media, 'error', $status_code, $response_time_ms, $error_msg); ));
$this->handle_download_failure($media_id, $error_msg, false); return null;
return $error_msg;
}
// Save the file
$save_result = $this->save_media_file($media, $body, $response);
if ($save_result !== true) {
$this->log_download($media, 'error', $status_code, $response_time_ms, $save_result);
$this->handle_download_failure($media_id, $save_result, false);
return $save_result;
}
$this->log_download($media, 'success', $status_code, $response_time_ms, null);
return true;
} }
// Rate limited (429) or server error (5xx) - set backoff $body = wp_remote_retrieve_body($response);
$retryable = in_array($status_code, array(429, 500, 502, 503, 504)); if (empty($body)) {
$error_msg = "HTTP {$status_code}"; return null;
if ($retryable) {
$this->log_download($media, 'rate_limited', $status_code, $response_time_ms, $error_msg);
$this->handle_download_failure($media_id, $error_msg, true);
return 'backoff';
} }
// Permanent failure (404, 403, etc.) // Determine extension
$this->log_download($media, 'permanent_error', $status_code, $response_time_ms, $error_msg);
$this->handle_download_failure($media_id, $error_msg, false);
return $error_msg;
}
/**
* Handle download failure
*
* @param int $media_id Media ID
* @param string $error Error message
* @param bool $set_backoff Whether to set retry backoff
*/
private function handle_download_failure($media_id, $error, $set_backoff) {
global $wpdb;
$media = $wpdb->get_row($wpdb->prepare(
"SELECT * FROM {$this->db->media_table()} WHERE id = %d",
$media_id
));
$update_data = array(
'download_error' => $error,
);
if ($set_backoff) {
// Set retry_after to 3 hours from now
$retry_after = date('Y-m-d H:i:s', strtotime('+' . self::RETRY_BACKOFF_HOURS . ' hours'));
$update_data['retry_after'] = $retry_after;
}
// Check if max attempts reached
if ($media && $media->download_attempts >= self::MAX_ATTEMPTS) {
$update_data['download_status'] = 'failed';
$this->log_missing_media($media, $error);
}
$wpdb->update(
$this->db->media_table(),
$update_data,
array('id' => $media_id)
);
}
/**
* Save downloaded media file to disk
*
* @param object $media Media record
* @param string $body File contents
* @param array $response HTTP response
* @return bool|string True on success, error message on failure
*/
private function save_media_file($media, $body, $response) {
global $wpdb;
// Determine file extension from content type or URL
$content_type = wp_remote_retrieve_header($response, 'content-type'); $content_type = wp_remote_retrieve_header($response, 'content-type');
$extension = $this->get_extension_from_content_type($content_type, $media->media_url); $extension = $this->get_extension_from_content_type($content_type, $media->media_url);
@@ -534,15 +371,17 @@ class MLS_Media_Handler {
$file_path = $listing_dir . '/' . $filename; $file_path = $listing_dir . '/' . $filename;
if (file_put_contents($file_path, $body) === false) { if (file_put_contents($file_path, $body) === false) {
return 'Failed to write file'; $this->logger->error('Failed to write media file', array(
'path' => $file_path,
));
return null;
} }
// Calculate relative path // Update database
$prefix = substr($media->listing_key, 0, 2); $prefix = substr($media->listing_key, 0, 2);
$relative_path = $prefix . '/' . $media->listing_key . '/' . $filename; $relative_path = $prefix . '/' . $media->listing_key . '/' . $filename;
$local_url = $this->get_upload_url() . '/' . $relative_path; $local_url = $this->get_upload_url() . '/' . $relative_path;
// Update record
$wpdb->update( $wpdb->update(
$this->db->media_table(), $this->db->media_table(),
array( array(
@@ -551,42 +390,17 @@ class MLS_Media_Handler {
'file_size' => strlen($body), 'file_size' => strlen($body),
'mime_type' => $content_type, 'mime_type' => $content_type,
'downloaded_at' => current_time('mysql'), 'downloaded_at' => current_time('mysql'),
'download_error' => null,
'download_status' => 'completed',
'retry_after' => null,
), ),
array('id' => $media->id) array('id' => $media->id)
); );
return true; $this->logger->debug('Media fetched and cached', array(
} 'listing_key' => $media->listing_key,
'media_key' => $media->media_key,
'size' => strlen($body),
));
/** return $local_url;
* Log a download attempt to the media log table
*
* @param object $media Media record
* @param string $action Action type (attempt, success, error, rate_limited, permanent_error)
* @param int|null $status_code HTTP status code
* @param int $response_time_ms Response time in milliseconds
* @param string|null $error Error message
*/
private function log_download($media, $action, $status_code, $response_time_ms, $error) {
global $wpdb;
$wpdb->insert(
$this->db->media_log_table(),
array(
'media_id' => $media->id,
'listing_key' => $media->listing_key,
'media_key' => $media->media_key,
'action' => $action,
'status_code' => $status_code,
'response_time_ms' => $response_time_ms,
'error_message' => $error,
'url' => $media->media_url,
'created_at' => current_time('mysql'),
)
);
} }
/** /**
@@ -597,7 +411,6 @@ class MLS_Media_Handler {
* @return string File extension * @return string File extension
*/ */
private function get_extension_from_content_type($content_type, $url) { private function get_extension_from_content_type($content_type, $url) {
// Extract main type from content-type header
$content_type = strtolower(explode(';', $content_type)[0]); $content_type = strtolower(explode(';', $content_type)[0]);
$map = array( $map = array(
@@ -619,60 +432,6 @@ class MLS_Media_Handler {
return $ext ?: 'jpg'; return $ext ?: 'jpg';
} }
/**
* Get the path to the missing media log file
*
* @return string File path
*/
public function get_missing_log_path() {
$upload_dir = wp_upload_dir();
return $upload_dir['basedir'] . '/mls-missing-media.log';
}
/**
* Log a failed media download to the missing media log file
*
* @param object $media Media record
* @param string $error Error message
*/
private function log_missing_media($media, $error) {
$log_file = $this->get_missing_log_path();
$timestamp = date('Y-m-d H:i:s');
$line = sprintf(
"[%s] %s | %s | %s | %s\n",
$timestamp,
$media->listing_key,
$media->media_key,
$error,
$media->media_url
);
file_put_contents($log_file, $line, FILE_APPEND | LOCK_EX);
}
/**
* Clear the missing media log file
*/
public function clear_missing_log() {
$log_file = $this->get_missing_log_path();
if (file_exists($log_file)) {
unlink($log_file);
}
}
/**
* Get missing media count from log file
*
* @return int Number of missing media entries
*/
public function get_missing_count() {
$log_file = $this->get_missing_log_path();
if (!file_exists($log_file)) {
return 0;
}
$content = file_get_contents($log_file);
return substr_count($content, "\n");
}
/** /**
* Delete all media for a property * Delete all media for a property
* *
@@ -719,7 +478,7 @@ class MLS_Media_Handler {
} }
/** /**
* Get media for a listing * Get media for a listing (legacy compatibility)
* *
* @param string $listing_key Listing key * @param string $listing_key Listing key
* @return array Media records * @return array Media records
@@ -736,80 +495,13 @@ class MLS_Media_Handler {
} }
/** /**
* Get primary image URL for a listing * Clean up orphaned media files (files without database records)
* *
* @param string $listing_key Listing key * @return int Number of directories deleted
* @return string|null Image URL
*/
public function get_primary_image($listing_key) {
global $wpdb;
$media = $wpdb->get_row($wpdb->prepare(
"SELECT local_url, media_url FROM {$this->db->media_table()}
WHERE listing_key = %s AND local_path IS NOT NULL
ORDER BY media_order ASC
LIMIT 1",
$listing_key
));
if ($media && $media->local_url) {
return $media->local_url;
}
return null;
}
/**
* Reset failed downloads for retry
*
* @param string|null $listing_key Optional listing key to filter
* @return int Number of records reset
*/
public function reset_failed_downloads($listing_key = null) {
global $wpdb;
$where = "download_status = 'failed' OR download_attempts >= " . self::MAX_ATTEMPTS;
$values = array();
if ($listing_key) {
$where .= " AND listing_key = %s";
$values[] = $listing_key;
}
if (!empty($values)) {
$sql = $wpdb->prepare(
"UPDATE {$this->db->media_table()}
SET download_status = 'pending',
download_attempts = 0,
download_error = NULL,
retry_after = NULL,
queued_at = %s
WHERE {$where}",
array_merge(array(current_time('mysql')), $values)
);
} else {
$sql = $wpdb->prepare(
"UPDATE {$this->db->media_table()}
SET download_status = 'pending',
download_attempts = 0,
download_error = NULL,
retry_after = NULL,
queued_at = %s
WHERE {$where}",
current_time('mysql')
);
}
$wpdb->query($sql);
return $wpdb->rows_affected;
}
/**
* Clean up orphaned media (files without database records)
*
* @return int Number of files deleted
*/ */
public function cleanup_orphaned_files() { public function cleanup_orphaned_files() {
global $wpdb;
$deleted = 0; $deleted = 0;
$base_dir = $this->get_upload_dir(); $base_dir = $this->get_upload_dir();
@@ -817,7 +509,6 @@ class MLS_Media_Handler {
return 0; return 0;
} }
// Iterate through prefix directories
foreach (scandir($base_dir) as $prefix) { foreach (scandir($base_dir) as $prefix) {
if ($prefix === '.' || $prefix === '..' || !is_dir($base_dir . '/' . $prefix)) { if ($prefix === '.' || $prefix === '..' || !is_dir($base_dir . '/' . $prefix)) {
continue; continue;
@@ -825,7 +516,6 @@ class MLS_Media_Handler {
$prefix_dir = $base_dir . '/' . $prefix; $prefix_dir = $base_dir . '/' . $prefix;
// Iterate through listing directories
foreach (scandir($prefix_dir) as $listing_key) { foreach (scandir($prefix_dir) as $listing_key) {
if ($listing_key === '.' || $listing_key === '..') { if ($listing_key === '.' || $listing_key === '..') {
continue; continue;
@@ -836,8 +526,6 @@ class MLS_Media_Handler {
continue; continue;
} }
// Check if listing exists in database
global $wpdb;
$exists = $wpdb->get_var($wpdb->prepare( $exists = $wpdb->get_var($wpdb->prepare(
"SELECT COUNT(*) FROM {$this->db->properties_table()} WHERE listing_key = %s", "SELECT COUNT(*) FROM {$this->db->properties_table()} WHERE listing_key = %s",
$listing_key $listing_key
@@ -854,76 +542,57 @@ class MLS_Media_Handler {
} }
/** /**
* Get recent download logs * Get cache statistics
* *
* @param int $limit Number of entries to return * @return array Cache stats
* @param string|null $action Optional action filter
* @return array Log entries
*/ */
public function get_download_logs($limit = 100, $action = null) { public function get_cache_stats() {
global $wpdb; global $wpdb;
$where = ''; return array(
$values = array(); 'total_media' => (int) $wpdb->get_var(
"SELECT COUNT(*) FROM {$this->db->media_table()}"
),
'cached' => (int) $wpdb->get_var(
"SELECT COUNT(*) FROM {$this->db->media_table()} WHERE local_url IS NOT NULL"
),
'uncached' => (int) $wpdb->get_var(
"SELECT COUNT(*) FROM {$this->db->media_table()} WHERE local_url IS NULL"
),
);
}
if ($action) { /**
$where = "WHERE action = %s"; * Get path to missing media log file (legacy compatibility)
$values[] = $action; *
* @return string File path
*/
public function get_missing_log_path() {
$upload_dir = wp_upload_dir();
return $upload_dir['basedir'] . '/mls-missing-media.log';
}
/**
* Get missing media count (legacy compatibility)
*
* @return int
*/
public function get_missing_count() {
$log_file = $this->get_missing_log_path();
if (!file_exists($log_file)) {
return 0;
} }
$content = file_get_contents($log_file);
$values[] = $limit; return substr_count($content, "\n");
return $wpdb->get_results($wpdb->prepare(
"SELECT * FROM {$this->db->media_log_table()}
{$where}
ORDER BY created_at DESC
LIMIT %d",
$values
));
} }
/** /**
* Clear old download logs * Clear missing log (legacy compatibility)
*
* @param int $days_old Delete logs older than this many days
* @return int Number of entries deleted
*/ */
public function clear_old_logs($days_old = 7) { public function clear_missing_log() {
global $wpdb; $log_file = $this->get_missing_log_path();
if (file_exists($log_file)) {
$cutoff = date('Y-m-d H:i:s', strtotime("-{$days_old} days")); unlink($log_file);
}
$wpdb->query($wpdb->prepare(
"DELETE FROM {$this->db->media_log_table()} WHERE created_at < %s",
$cutoff
));
return $wpdb->rows_affected;
}
/**
* Legacy sync method - now queues media instead of downloading immediately
* Kept for backward compatibility
*
* @param string $listing_key Listing key
* @param array $media_array Media array from API
* @param bool $force Force re-download all media
* @param callable|null $progress_callback Callback for progress updates
*/
public function sync_property_media($listing_key, $media_array, $force = false, $progress_callback = null) {
// Now just queues media - actual download happens via process_queue()
return $this->queue_property_media($listing_key, $media_array, $progress_callback);
}
/**
* Legacy download_pending method - now uses process_queue
* Kept for backward compatibility
*
* @param int $limit Max media to download
* @param callable|null $progress_callback Callback for progress updates
* @return array Stats
*/
public function download_pending($limit = 100, $progress_callback = null) {
return $this->process_queue($limit, $progress_callback);
} }
} }
@@ -78,7 +78,10 @@ class MLS_Sync_Engine {
} }
/** /**
* Run full sync * Run full sync (Active/Pending properties only)
*
* Initial import fetches only Active and Pending properties.
* Use incremental sync (replication) for ongoing updates.
* *
* @param bool $dry_run If true, don't make changes * @param bool $dry_run If true, don't make changes
* @param int|null $limit Max records to process * @param int|null $limit Max records to process
@@ -86,7 +89,7 @@ class MLS_Sync_Engine {
* @return array Sync results * @return array Sync results
*/ */
public function run_full_sync($dry_run = false, $limit = null, $progress_callback = null) { public function run_full_sync($dry_run = false, $limit = null, $progress_callback = null) {
$this->logger->info('Starting full sync', array('dry_run' => $dry_run, 'limit' => $limit)); $this->logger->info('Starting full sync (Active/Pending only)', array('dry_run' => $dry_run, 'limit' => $limit));
// Store progress callback for use in process_property // Store progress callback for use in process_property
$this->progress_callback = $progress_callback; $this->progress_callback = $progress_callback;
@@ -106,15 +109,15 @@ class MLS_Sync_Engine {
); );
try { try {
// Get first page of properties with media // Get first page of Active/Pending properties with media
$start_time = microtime(true); $start_time = microtime(true);
$this->emit_progress('api_request', array( $this->emit_progress('api_request', array(
'method' => 'GET', 'method' => 'GET',
'url' => 'Property', 'url' => 'Property',
'params' => array('type' => 'full_sync', 'limit' => $limit), 'params' => array('type' => 'initial_sync', 'filter' => 'Active/Pending', 'limit' => $limit),
)); ));
$response = $this->api_client->get_properties_for_sync(null, 'Media', $limit ? min($limit, 1000) : null); $response = $this->api_client->get_properties_for_initial_sync('Media', $limit ? min($limit, 1000) : null);
$elapsed = round((microtime(true) - $start_time) * 1000); $elapsed = round((microtime(true) - $start_time) * 1000);
if (is_wp_error($response)) { if (is_wp_error($response)) {
@@ -239,7 +242,13 @@ class MLS_Sync_Engine {
} }
/** /**
* Run incremental sync * Run incremental sync (replication)
*
* Fetches all properties modified since last sync, including those that:
* - Became unavailable (MlgCanView=false)
* - Changed status (Active -> Sold)
*
* Properties are deleted from local DB if MlgCanView=false or status not Active/Pending.
* *
* @param bool $dry_run If true, don't make changes * @param bool $dry_run If true, don't make changes
* @param callable|null $progress_callback Callback for progress updates * @param callable|null $progress_callback Callback for progress updates
@@ -254,7 +263,7 @@ class MLS_Sync_Engine {
return $this->run_full_sync($dry_run, null, $progress_callback); return $this->run_full_sync($dry_run, null, $progress_callback);
} }
$this->logger->info('Starting incremental sync', array( $this->logger->info('Starting replication sync', array(
'since' => $last_timestamp, 'since' => $last_timestamp,
'dry_run' => $dry_run, 'dry_run' => $dry_run,
)); ));
@@ -276,15 +285,15 @@ class MLS_Sync_Engine {
); );
try { try {
// Get modified properties (including those marked for deletion) // Get ALL modified properties (no MlgCanView or status filter for replication)
$start_time = microtime(true); $start_time = microtime(true);
$this->emit_progress('api_request', array( $this->emit_progress('api_request', array(
'method' => 'GET', 'method' => 'GET',
'url' => 'Property', 'url' => 'Property',
'params' => array('type' => 'incremental', 'since' => $last_timestamp), 'params' => array('type' => 'replication', 'since' => $last_timestamp),
)); ));
$response = $this->api_client->get_properties_since($last_timestamp, 'Media'); $response = $this->api_client->get_properties_for_replication($last_timestamp, 'Media');
$elapsed = round((microtime(true) - $start_time) * 1000); $elapsed = round((microtime(true) - $start_time) * 1000);
if (is_wp_error($response)) { if (is_wp_error($response)) {
@@ -525,9 +534,18 @@ class MLS_Sync_Engine {
*/ */
private $progress_callback = null; private $progress_callback = null;
/**
* Allowed statuses for our database (Active/Pending only)
*/
const ALLOWED_STATUSES = array('Active', 'Pending');
/** /**
* Process a single property record * Process a single property record
* *
* During replication, properties are deleted if:
* - MlgCanView = false (removed from feed)
* - StandardStatus not in (Active, Pending)
*
* @param array $property Property data from API * @param array $property Property data from API
* @param bool $dry_run If true, don't make changes * @param bool $dry_run If true, don't make changes
*/ */
@@ -543,15 +561,31 @@ class MLS_Sync_Engine {
return; return;
} }
// Check MlgCanView - if false, delete the record // Check MlgCanView and StandardStatus
$can_view = $property['MlgCanView'] ?? true; $can_view = $property['MlgCanView'] ?? true;
$status = $property['StandardStatus'] ?? null;
if (!$can_view) { // Delete if: not viewable OR status is not Active/Pending
if (!$dry_run) { $should_delete = !$can_view || !in_array($status, self::ALLOWED_STATUSES);
$this->delete_property($listing_key);
if ($should_delete) {
// Check if we have this record locally before attempting delete
$exists_locally = $wpdb->get_var($wpdb->prepare(
"SELECT id FROM {$this->db->properties_table()} WHERE listing_key = %s",
$listing_key
));
if ($exists_locally) {
if (!$dry_run) {
$this->delete_property($listing_key);
}
$this->stats['deleted']++;
$this->emit_progress('property_deleted', array(
'listing_key' => $listing_key,
'reason' => !$can_view ? 'MlgCanView=false' : "Status={$status}",
));
} }
$this->stats['deleted']++; // If not in our DB, just skip silently (e.g., Sold property we never had)
$this->emit_progress('property_deleted', array('listing_key' => $listing_key));
return; return;
} }
@@ -271,17 +271,21 @@ function mls_get_property_media($listing_key) {
} }
/** /**
* Get primary image URL for a listing * Get primary image URL for a listing (on-demand fetching)
*
* Images are fetched from MLS Grid and cached locally on first request.
* Per MLS Grid rules, images must be served from our own server.
* *
* @param string $listing_key The listing key * @param string $listing_key The listing key
* @param bool $fetch_if_missing Whether to fetch from MLS Grid if not cached (default: true)
* @return string|null Image URL or null * @return string|null Image URL or null
*/ */
function mls_get_property_image($listing_key) { function mls_get_property_image($listing_key, $fetch_if_missing = true) {
$plugin = mls_plugin(); $plugin = mls_plugin();
if (!$plugin->get_query()) { if (!$plugin->get_media_handler()) {
return null; return null;
} }
return $plugin->get_query()->get_primary_image($listing_key); return $plugin->get_media_handler()->get_primary_image($listing_key, $fetch_if_missing);
} }
/** /**
@@ -324,3 +328,34 @@ function mls_get_property_count($args = array()) {
} }
return $plugin->get_query()->get_count($args); return $plugin->get_query()->get_count($args);
} }
/**
* Get all images for a listing (on-demand fetching)
*
* Returns all media records with local_url populated where cached.
* Can optionally fetch first N uncached images on-demand.
*
* @param string $listing_key The listing key
* @param int $fetch_limit Max images to fetch on-demand (default: 1, 0 = none)
* @return array Array of media objects
*/
function mls_get_property_images($listing_key, $fetch_limit = 1) {
$plugin = mls_plugin();
if (!$plugin->get_media_handler()) {
return array();
}
return $plugin->get_media_handler()->get_listing_images($listing_key, $fetch_limit);
}
/**
* Get media cache statistics
*
* @return array Stats with total_media, cached, uncached counts
*/
function mls_get_cache_stats() {
$plugin = mls_plugin();
if (!$plugin->get_media_handler()) {
return array('total_media' => 0, 'cached' => 0, 'uncached' => 0);
}
return $plugin->get_media_handler()->get_cache_stats();
}