Refactor MLS sync to Active/Pending only with on-demand media

Major changes to sync strategy following MLS Grid best practices:

- Initial sync now fetches only Active/Pending properties (~30K vs 1.3M)
- Replication (incremental) fetches all changes, deletes non-Active/Pending
- On-demand media fetching replaces background queue (avoids rate limits)
- Media downloaded and cached when first viewed, not during sync
- Updated CLI commands: wp mls media status/fetch/clear
- Comprehensive documentation with troubleshooting guide

This fixes the "Value out of range" API error caused by high $skip values.

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Hanson.xyz Dev
2025-12-15 08:25:37 -06:00
parent 6eadf3d266
commit b9cddd2f64
6 changed files with 538 additions and 874 deletions
@@ -78,7 +78,10 @@ class MLS_Sync_Engine {
}
/**
* Run full sync
* Run full sync (Active/Pending properties only)
*
* Initial import fetches only Active and Pending properties.
* Use incremental sync (replication) for ongoing updates.
*
* @param bool $dry_run If true, don't make changes
* @param int|null $limit Max records to process
@@ -86,7 +89,7 @@ class MLS_Sync_Engine {
* @return array Sync results
*/
public function run_full_sync($dry_run = false, $limit = null, $progress_callback = null) {
$this->logger->info('Starting full sync', array('dry_run' => $dry_run, 'limit' => $limit));
$this->logger->info('Starting full sync (Active/Pending only)', array('dry_run' => $dry_run, 'limit' => $limit));
// Store progress callback for use in process_property
$this->progress_callback = $progress_callback;
@@ -106,15 +109,15 @@ class MLS_Sync_Engine {
);
try {
// Get first page of properties with media
// Get first page of Active/Pending properties with media
$start_time = microtime(true);
$this->emit_progress('api_request', array(
'method' => 'GET',
'url' => 'Property',
'params' => array('type' => 'full_sync', 'limit' => $limit),
'params' => array('type' => 'initial_sync', 'filter' => 'Active/Pending', 'limit' => $limit),
));
$response = $this->api_client->get_properties_for_sync(null, 'Media', $limit ? min($limit, 1000) : null);
$response = $this->api_client->get_properties_for_initial_sync('Media', $limit ? min($limit, 1000) : null);
$elapsed = round((microtime(true) - $start_time) * 1000);
if (is_wp_error($response)) {
@@ -239,7 +242,13 @@ class MLS_Sync_Engine {
}
/**
* Run incremental sync
* Run incremental sync (replication)
*
* Fetches all properties modified since last sync, including those that:
* - Became unavailable (MlgCanView=false)
* - Changed status (Active -> Sold)
*
* Properties are deleted from local DB if MlgCanView=false or status not Active/Pending.
*
* @param bool $dry_run If true, don't make changes
* @param callable|null $progress_callback Callback for progress updates
@@ -254,7 +263,7 @@ class MLS_Sync_Engine {
return $this->run_full_sync($dry_run, null, $progress_callback);
}
$this->logger->info('Starting incremental sync', array(
$this->logger->info('Starting replication sync', array(
'since' => $last_timestamp,
'dry_run' => $dry_run,
));
@@ -276,15 +285,15 @@ class MLS_Sync_Engine {
);
try {
// Get modified properties (including those marked for deletion)
// Get ALL modified properties (no MlgCanView or status filter for replication)
$start_time = microtime(true);
$this->emit_progress('api_request', array(
'method' => 'GET',
'url' => 'Property',
'params' => array('type' => 'incremental', 'since' => $last_timestamp),
'params' => array('type' => 'replication', 'since' => $last_timestamp),
));
$response = $this->api_client->get_properties_since($last_timestamp, 'Media');
$response = $this->api_client->get_properties_for_replication($last_timestamp, 'Media');
$elapsed = round((microtime(true) - $start_time) * 1000);
if (is_wp_error($response)) {
@@ -525,9 +534,18 @@ class MLS_Sync_Engine {
*/
private $progress_callback = null;
/**
* Allowed statuses for our database (Active/Pending only)
*/
const ALLOWED_STATUSES = array('Active', 'Pending');
/**
* Process a single property record
*
* During replication, properties are deleted if:
* - MlgCanView = false (removed from feed)
* - StandardStatus not in (Active, Pending)
*
* @param array $property Property data from API
* @param bool $dry_run If true, don't make changes
*/
@@ -543,15 +561,31 @@ class MLS_Sync_Engine {
return;
}
// Check MlgCanView - if false, delete the record
// Check MlgCanView and StandardStatus
$can_view = $property['MlgCanView'] ?? true;
$status = $property['StandardStatus'] ?? null;
if (!$can_view) {
if (!$dry_run) {
$this->delete_property($listing_key);
// Delete if: not viewable OR status is not Active/Pending
$should_delete = !$can_view || !in_array($status, self::ALLOWED_STATUSES);
if ($should_delete) {
// Check if we have this record locally before attempting delete
$exists_locally = $wpdb->get_var($wpdb->prepare(
"SELECT id FROM {$this->db->properties_table()} WHERE listing_key = %s",
$listing_key
));
if ($exists_locally) {
if (!$dry_run) {
$this->delete_property($listing_key);
}
$this->stats['deleted']++;
$this->emit_progress('property_deleted', array(
'listing_key' => $listing_key,
'reason' => !$can_view ? 'MlgCanView=false' : "Status={$status}",
));
}
$this->stats['deleted']++;
$this->emit_progress('property_deleted', array('listing_key' => $listing_key));
// If not in our DB, just skip silently (e.g., Sold property we never had)
return;
}