b6df4dbb92
MLS plugin fixes from this session: - Fix silent insert failures: location column NOT NULL was rejecting wpdb->insert calls, causing ~18k new properties since Dec 2025 to be lost. Inserts now build raw SQL with ST_PointFromText so the spatial column is populated atomically. - Auto-refresh expired media URLs in MLS_Media_Handler::fetch_and_cache(), guarded by a property-level GET_LOCK so concurrent fetches share one API refresh. - Normalize WP_Error to null in mls_get_property_image() so callers can rely on the documented string|null contract. - Support comma-separated property_type filters in MLS_Query and MLS_Cluster so the homepage "View All Commercial" link (?property_type=Commercial+Sale,Land,Farm) actually filters correctly. - Incremental sync now looks back 10 minutes past the latest modification timestamp as a safety margin against missed records. - Smart sync exits silently (info-level, not warning) when a full sync is in progress. Operational: - New cron: weekly full sync Sundays at 3 AM (/usr/local/bin/mls-full-sync). - New cron: hourly 2GB cap on mls-thumbnails/ and cache/transformed-images/ (/usr/local/bin/mls-image-cache-cap). - Logrotate config for wp-content/debug.log (2-day retention, daily rotation, delaycompress). Repo policy: - CLAUDE.md updated with explicit "commit everything except build artifacts" policy. - .gitignore: untrack runtime image caches and debug.log rotations. Other modifications in this snapshot are pre-existing in-flight theme/plugin/db_content_updates work. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
590 lines
19 KiB
PHP
Executable File
590 lines
19 KiB
PHP
Executable File
<?php
|
|
/**
|
|
* Rate limiter class for MLS Grid API compliance
|
|
*
|
|
* MLS Grid Rate Limits (warning thresholds):
|
|
* - 4 requests per second (suspension at 6)
|
|
* - 7,200 requests per hour (suspension at 18,000)
|
|
* - 40,000 requests per 24 hours (suspension at 60,000)
|
|
* - 3GB data per hour / 40GB per 24 hours (suspension at 4GB/60GB)
|
|
*
|
|
* Our strategy: Throttle sync operations to use max 50% of daily quota
|
|
* even if running continuously for 24 hours. This leaves 50% budget
|
|
* for on-demand image fetches and other operations.
|
|
*/
|
|
|
|
if (!defined('ABSPATH')) {
|
|
exit;
|
|
}
|
|
|
|
class MLS_Rate_Limiter {
|
|
|
|
/**
|
|
* MLS Grid absolute limits (for reference)
|
|
*/
|
|
const MLSGRID_LIMIT_PER_SECOND = 4;
|
|
const MLSGRID_LIMIT_PER_HOUR = 7200;
|
|
const MLSGRID_LIMIT_PER_DAY = 40000;
|
|
const MLSGRID_BYTES_PER_HOUR = 3221225472; // 3GB
|
|
const MLSGRID_BYTES_PER_DAY = 42949672960; // 40GB
|
|
|
|
/**
|
|
* Sync operation limits
|
|
*
|
|
* Fixed 5-second interval between API requests for rock-solid rate limiting.
|
|
* This ensures we never exceed MLS Grid limits regardless of sync duration.
|
|
*
|
|
* At 5s intervals: 17,280 requests/day max (43% of 40,000 limit)
|
|
*/
|
|
const SYNC_REQUESTS_PER_DAY = 17280; // 86400s / 5s = 17,280 max
|
|
const SYNC_BYTES_PER_DAY = 21474836480; // 20GB (50% of 40GB)
|
|
const SYNC_MIN_INTERVAL_MS = 5000; // 5 seconds between requests (legacy)
|
|
|
|
/**
|
|
* Global rate limit intervals (cross-process coordination via MySQL advisory locks)
|
|
*
|
|
* These are enforced across ALL processes to stay well under MLS Grid limits.
|
|
* MLS Grid warns at 2 RPS, suspends at 4+ RPS. We use conservative intervals.
|
|
*/
|
|
const GENERAL_API_INTERVAL_MS = 4000; // 4 seconds between general API requests
|
|
const IMAGE_API_INTERVAL_MS = 2000; // 2 seconds between image API requests
|
|
|
|
/**
|
|
* Advisory lock names for cross-process coordination
|
|
*/
|
|
const LOCK_GENERAL_API = 'mls_api_general';
|
|
const LOCK_IMAGE_API = 'mls_api_image';
|
|
|
|
/**
|
|
* Option keys for storing last request times
|
|
*/
|
|
const OPTION_LAST_GENERAL_REQUEST = 'mls_last_general_api_request';
|
|
const OPTION_LAST_IMAGE_REQUEST = 'mls_last_image_api_request';
|
|
|
|
/**
|
|
* Rate limit constants (used for tracking against MLS Grid limits)
|
|
*/
|
|
const LIMIT_PER_HOUR = 7200;
|
|
const LIMIT_PER_DAY = 40000;
|
|
const LIMIT_BYTES_PER_HOUR = 3221225472; // 3GB
|
|
const LIMIT_BYTES_PER_DAY = 42949672960; // 40GB
|
|
|
|
/**
|
|
* Window types
|
|
*/
|
|
const WINDOW_HOUR = 'hour';
|
|
const WINDOW_DAY = 'day';
|
|
|
|
/**
|
|
* Database instance
|
|
*/
|
|
private $db;
|
|
|
|
/**
|
|
* Constructor
|
|
*
|
|
* @param MLS_DB $db Database instance
|
|
*/
|
|
public function __construct(MLS_DB $db) {
|
|
$this->db = $db;
|
|
}
|
|
|
|
/**
|
|
* Wait for and acquire the global API rate limit (general API)
|
|
*
|
|
* Uses MySQL advisory locks to coordinate across all PHP processes.
|
|
* Enforces 4-second minimum interval between general API requests.
|
|
*
|
|
* @param int $timeout_seconds Max seconds to wait for lock (0 = non-blocking)
|
|
* @return bool True if rate limit acquired, false if timeout
|
|
*/
|
|
public function acquire_general_api_slot($timeout_seconds = 30) {
|
|
return $this->acquire_api_slot(
|
|
self::LOCK_GENERAL_API,
|
|
self::OPTION_LAST_GENERAL_REQUEST,
|
|
self::GENERAL_API_INTERVAL_MS,
|
|
$timeout_seconds
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Wait for and acquire the global API rate limit (image API)
|
|
*
|
|
* Uses MySQL advisory locks to coordinate across all PHP processes.
|
|
* Enforces 2-second minimum interval between image API requests.
|
|
*
|
|
* @param int $timeout_seconds Max seconds to wait for lock (0 = non-blocking)
|
|
* @return bool True if rate limit acquired, false if timeout
|
|
*/
|
|
public function acquire_image_api_slot($timeout_seconds = 30) {
|
|
return $this->acquire_api_slot(
|
|
self::LOCK_IMAGE_API,
|
|
self::OPTION_LAST_IMAGE_REQUEST,
|
|
self::IMAGE_API_INTERVAL_MS,
|
|
$timeout_seconds
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Internal method to acquire an API slot with advisory lock coordination
|
|
*
|
|
* @param string $lock_name Advisory lock name
|
|
* @param string $option_key Option key for last request timestamp
|
|
* @param int $interval_ms Minimum interval between requests in milliseconds
|
|
* @param int $timeout_seconds Max seconds to wait
|
|
* @return bool True if slot acquired
|
|
*/
|
|
private function acquire_api_slot($lock_name, $option_key, $interval_ms, $timeout_seconds) {
|
|
global $wpdb;
|
|
|
|
$start_time = time();
|
|
$interval_sec = $interval_ms / 1000.0;
|
|
|
|
while (true) {
|
|
// Check timeout
|
|
if ($timeout_seconds > 0 && (time() - $start_time) >= $timeout_seconds) {
|
|
return false;
|
|
}
|
|
|
|
// Try to acquire the advisory lock (blocking for up to 1 second)
|
|
$lock_acquired = $wpdb->get_var($wpdb->prepare(
|
|
"SELECT GET_LOCK(%s, %d)",
|
|
$lock_name,
|
|
1 // 1 second timeout for each attempt
|
|
));
|
|
|
|
if ($lock_acquired !== '1') {
|
|
// Lock held by another process, wait and retry
|
|
usleep(100000); // 100ms
|
|
continue;
|
|
}
|
|
|
|
try {
|
|
// We have the lock - check/wait for rate limit interval
|
|
$last_request = (float) get_option($option_key, 0);
|
|
$now = microtime(true);
|
|
$elapsed = $now - $last_request;
|
|
|
|
if ($elapsed < $interval_sec) {
|
|
// Need to wait for the remaining interval
|
|
$wait_time = ($interval_sec - $elapsed) * 1000000; // Convert to microseconds
|
|
usleep((int) $wait_time);
|
|
}
|
|
|
|
// Update the last request timestamp
|
|
update_option($option_key, microtime(true), false); // false = don't autoload
|
|
|
|
return true;
|
|
|
|
} finally {
|
|
// Always release the advisory lock
|
|
$wpdb->query($wpdb->prepare("SELECT RELEASE_LOCK(%s)", $lock_name));
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Rate limit channels
|
|
*/
|
|
const CHANNEL_GENERAL = 'general';
|
|
const CHANNEL_IMAGE = 'image';
|
|
|
|
/**
|
|
* Check if we can make a request (and wait if needed)
|
|
*
|
|
* Uses global advisory lock-based rate limiting to coordinate across
|
|
* all PHP processes. Different channels have different intervals:
|
|
* - general: 4-second interval
|
|
* - image: 2-second interval
|
|
*
|
|
* @param bool $wait Whether to wait if rate limited
|
|
* @param string $channel Rate limit channel ('general' or 'image')
|
|
* @return bool True if request can proceed
|
|
*/
|
|
public function check_and_wait($wait = true, $channel = self::CHANNEL_GENERAL) {
|
|
// Use global advisory lock-based rate limiting
|
|
$timeout = $wait ? 60 : 0;
|
|
|
|
if ($channel === self::CHANNEL_IMAGE) {
|
|
if (!$this->acquire_image_api_slot($timeout)) {
|
|
return false;
|
|
}
|
|
} else {
|
|
if (!$this->acquire_general_api_slot($timeout)) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// Check hourly limit (hard stop if approaching MLS Grid limits)
|
|
if (!$this->check_limit(self::WINDOW_HOUR, self::LIMIT_PER_HOUR)) {
|
|
if ($wait) {
|
|
$this->wait_for_window(self::WINDOW_HOUR);
|
|
} else {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// Check daily limit (hard stop if approaching MLS Grid limits)
|
|
if (!$this->check_limit(self::WINDOW_DAY, self::LIMIT_PER_DAY)) {
|
|
if ($wait) {
|
|
$this->wait_for_window(self::WINDOW_DAY);
|
|
} else {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Check if under the limit for a window type
|
|
*
|
|
* @param string $window_type Window type
|
|
* @param int $limit Limit for this window
|
|
* @return bool True if under limit
|
|
*/
|
|
private function check_limit($window_type, $limit) {
|
|
$count = $this->get_window_count($window_type);
|
|
return $count < $limit;
|
|
}
|
|
|
|
/**
|
|
* Get current count for a window
|
|
*
|
|
* @param string $window_type Window type
|
|
* @return int Current request count
|
|
*/
|
|
private function get_window_count($window_type) {
|
|
global $wpdb;
|
|
|
|
$window_start = $this->get_window_start($window_type);
|
|
|
|
$count = $wpdb->get_var($wpdb->prepare(
|
|
"SELECT request_count FROM {$this->db->rate_limits_table()}
|
|
WHERE window_type = %s AND window_start = %s",
|
|
$window_type,
|
|
$window_start
|
|
));
|
|
|
|
return $count ? (int) $count : 0;
|
|
}
|
|
|
|
/**
|
|
* Get window start time
|
|
*
|
|
* @param string $window_type Window type
|
|
* @return string MySQL datetime
|
|
*/
|
|
private function get_window_start($window_type) {
|
|
$now = current_time('timestamp');
|
|
|
|
switch ($window_type) {
|
|
case self::WINDOW_HOUR:
|
|
return gmdate('Y-m-d H:00:00', $now);
|
|
|
|
case self::WINDOW_DAY:
|
|
return gmdate('Y-m-d 00:00:00', $now);
|
|
|
|
default:
|
|
return gmdate('Y-m-d H:i:s', $now);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Record a request
|
|
*
|
|
* @param int $bytes_transferred Optional bytes transferred
|
|
*/
|
|
public function record_request($bytes_transferred = 0) {
|
|
global $wpdb;
|
|
|
|
// Record for hourly window
|
|
$this->increment_window(self::WINDOW_HOUR, $bytes_transferred);
|
|
|
|
// Record for daily window
|
|
$this->increment_window(self::WINDOW_DAY, $bytes_transferred);
|
|
|
|
// Clean up old records
|
|
$this->cleanup_old_records();
|
|
}
|
|
|
|
/**
|
|
* Increment count for a window
|
|
*
|
|
* @param string $window_type Window type
|
|
* @param int $bytes_transferred Bytes transferred
|
|
*/
|
|
private function increment_window($window_type, $bytes_transferred = 0) {
|
|
global $wpdb;
|
|
|
|
$window_start = $this->get_window_start($window_type);
|
|
|
|
// Try to update existing record
|
|
$updated = $wpdb->query($wpdb->prepare(
|
|
"UPDATE {$this->db->rate_limits_table()}
|
|
SET request_count = request_count + 1,
|
|
bytes_transferred = bytes_transferred + %d
|
|
WHERE window_type = %s AND window_start = %s",
|
|
$bytes_transferred,
|
|
$window_type,
|
|
$window_start
|
|
));
|
|
|
|
// If no record existed, insert new one
|
|
if (0 === $updated) {
|
|
$wpdb->insert(
|
|
$this->db->rate_limits_table(),
|
|
array(
|
|
'window_type' => $window_type,
|
|
'window_start' => $window_start,
|
|
'request_count' => 1,
|
|
'bytes_transferred' => $bytes_transferred,
|
|
),
|
|
array('%s', '%s', '%d', '%d')
|
|
);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Wait for a rate limit window to reset
|
|
*
|
|
* @param string $window_type Window type
|
|
*/
|
|
private function wait_for_window($window_type) {
|
|
$now = current_time('timestamp');
|
|
|
|
switch ($window_type) {
|
|
case self::WINDOW_HOUR:
|
|
// Wait until next hour
|
|
$next_hour = strtotime('+1 hour', strtotime(gmdate('Y-m-d H:00:00', $now)));
|
|
$wait_seconds = $next_hour - $now;
|
|
break;
|
|
|
|
case self::WINDOW_DAY:
|
|
// Wait until next day
|
|
$next_day = strtotime('+1 day', strtotime(gmdate('Y-m-d 00:00:00', $now)));
|
|
$wait_seconds = $next_day - $now;
|
|
break;
|
|
|
|
default:
|
|
$wait_seconds = 1;
|
|
}
|
|
|
|
if ($wait_seconds > 0) {
|
|
sleep(min($wait_seconds, 60)); // Max 60 second wait per call
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Clean up old rate limit records
|
|
*/
|
|
private function cleanup_old_records() {
|
|
global $wpdb;
|
|
|
|
// Delete records older than 48 hours
|
|
$cutoff = gmdate('Y-m-d H:i:s', strtotime('-48 hours'));
|
|
|
|
$wpdb->query($wpdb->prepare(
|
|
"DELETE FROM {$this->db->rate_limits_table()} WHERE window_start < %s",
|
|
$cutoff
|
|
));
|
|
}
|
|
|
|
/**
|
|
* Get current rate limit status
|
|
*
|
|
* @return array Rate limit status
|
|
*/
|
|
public function get_status() {
|
|
$bytes_hour = $this->get_bytes_this_hour();
|
|
$bytes_day = $this->get_bytes_today();
|
|
|
|
return array(
|
|
'hourly' => array(
|
|
'used' => $this->get_window_count(self::WINDOW_HOUR),
|
|
'limit' => self::LIMIT_PER_HOUR,
|
|
'remaining' => max(0, self::LIMIT_PER_HOUR - $this->get_window_count(self::WINDOW_HOUR)),
|
|
),
|
|
'daily' => array(
|
|
'used' => $this->get_window_count(self::WINDOW_DAY),
|
|
'limit' => self::LIMIT_PER_DAY,
|
|
'remaining' => max(0, self::LIMIT_PER_DAY - $this->get_window_count(self::WINDOW_DAY)),
|
|
),
|
|
'data_hourly' => array(
|
|
'used' => $bytes_hour,
|
|
'limit' => self::LIMIT_BYTES_PER_HOUR,
|
|
'remaining' => max(0, self::LIMIT_BYTES_PER_HOUR - $bytes_hour),
|
|
),
|
|
'data_daily' => array(
|
|
'used' => $bytes_day,
|
|
'limit' => self::LIMIT_BYTES_PER_DAY,
|
|
'remaining' => max(0, self::LIMIT_BYTES_PER_DAY - $bytes_day),
|
|
),
|
|
// Legacy fields for backward compatibility
|
|
'bytes_this_hour' => $bytes_hour,
|
|
'bytes_limit' => self::LIMIT_BYTES_PER_HOUR,
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Get bytes transferred this hour
|
|
*
|
|
* @return int Bytes
|
|
*/
|
|
public function get_bytes_this_hour() {
|
|
global $wpdb;
|
|
|
|
$window_start = $this->get_window_start(self::WINDOW_HOUR);
|
|
|
|
$bytes = $wpdb->get_var($wpdb->prepare(
|
|
"SELECT bytes_transferred FROM {$this->db->rate_limits_table()}
|
|
WHERE window_type = %s AND window_start = %s",
|
|
self::WINDOW_HOUR,
|
|
$window_start
|
|
));
|
|
|
|
return $bytes ? (int) $bytes : 0;
|
|
}
|
|
|
|
/**
|
|
* Get bytes transferred today
|
|
*
|
|
* @return int Bytes
|
|
*/
|
|
public function get_bytes_today() {
|
|
global $wpdb;
|
|
|
|
$window_start = $this->get_window_start(self::WINDOW_DAY);
|
|
|
|
$bytes = $wpdb->get_var($wpdb->prepare(
|
|
"SELECT bytes_transferred FROM {$this->db->rate_limits_table()}
|
|
WHERE window_type = %s AND window_start = %s",
|
|
self::WINDOW_DAY,
|
|
$window_start
|
|
));
|
|
|
|
return $bytes ? (int) $bytes : 0;
|
|
}
|
|
|
|
/**
|
|
* Get remaining daily data budget
|
|
*
|
|
* @return int Remaining bytes
|
|
*/
|
|
public function get_daily_data_remaining() {
|
|
return max(0, self::LIMIT_BYTES_PER_DAY - $this->get_bytes_today());
|
|
}
|
|
|
|
/**
|
|
* Check if we can fetch an image based on remaining daily data budget
|
|
*
|
|
* @param int $estimated_bytes Estimated size of image (default 400KB)
|
|
* @return bool True if we have budget for this image
|
|
*/
|
|
public function can_fetch_image($estimated_bytes = 409600) {
|
|
return $this->get_daily_data_remaining() > $estimated_bytes;
|
|
}
|
|
|
|
/**
|
|
* Record data transfer (for image downloads, separate from API requests)
|
|
*
|
|
* This tracks bytes against the daily data cap without incrementing
|
|
* the request count (since image fetches aren't API requests).
|
|
*
|
|
* @param int $bytes Bytes transferred
|
|
*/
|
|
public function record_data_transfer($bytes) {
|
|
global $wpdb;
|
|
|
|
if ($bytes <= 0) {
|
|
return;
|
|
}
|
|
|
|
// Record for hourly window (data only, no request count)
|
|
$this->increment_data_only(self::WINDOW_HOUR, $bytes);
|
|
|
|
// Record for daily window (data only, no request count)
|
|
$this->increment_data_only(self::WINDOW_DAY, $bytes);
|
|
}
|
|
|
|
/**
|
|
* Increment data transfer for a window without incrementing request count
|
|
*
|
|
* @param string $window_type Window type
|
|
* @param int $bytes Bytes transferred
|
|
*/
|
|
private function increment_data_only($window_type, $bytes) {
|
|
global $wpdb;
|
|
|
|
$window_start = $this->get_window_start($window_type);
|
|
|
|
// Try to update existing record
|
|
$updated = $wpdb->query($wpdb->prepare(
|
|
"UPDATE {$this->db->rate_limits_table()}
|
|
SET bytes_transferred = bytes_transferred + %d
|
|
WHERE window_type = %s AND window_start = %s",
|
|
$bytes,
|
|
$window_type,
|
|
$window_start
|
|
));
|
|
|
|
// If no record existed, insert new one (request_count = 0 since this is data-only)
|
|
if (0 === $updated) {
|
|
$wpdb->insert(
|
|
$this->db->rate_limits_table(),
|
|
array(
|
|
'window_type' => $window_type,
|
|
'window_start' => $window_start,
|
|
'request_count' => 0,
|
|
'bytes_transferred' => $bytes,
|
|
),
|
|
array('%s', '%s', '%d', '%d')
|
|
);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Check if we're approaching rate limits
|
|
*
|
|
* @param float $threshold Percentage threshold (0.0 - 1.0)
|
|
* @return bool True if approaching limits
|
|
*/
|
|
public function is_approaching_limit($threshold = 0.9) {
|
|
$status = $this->get_status();
|
|
|
|
$hourly_pct = $status['hourly']['used'] / $status['hourly']['limit'];
|
|
$daily_pct = $status['daily']['used'] / $status['daily']['limit'];
|
|
$data_daily_pct = $status['data_daily']['used'] / $status['data_daily']['limit'];
|
|
|
|
return $hourly_pct >= $threshold || $daily_pct >= $threshold || $data_daily_pct >= $threshold;
|
|
}
|
|
|
|
/**
|
|
* Get a summary of current usage for logging/display
|
|
*
|
|
* @return array Summary with percentages
|
|
*/
|
|
public function get_usage_summary() {
|
|
$status = $this->get_status();
|
|
|
|
return array(
|
|
'requests_hourly_pct' => round(($status['hourly']['used'] / $status['hourly']['limit']) * 100, 1),
|
|
'requests_daily_pct' => round(($status['daily']['used'] / $status['daily']['limit']) * 100, 1),
|
|
'data_hourly_pct' => round(($status['data_hourly']['used'] / $status['data_hourly']['limit']) * 100, 1),
|
|
'data_daily_pct' => round(($status['data_daily']['used'] / $status['data_daily']['limit']) * 100, 1),
|
|
'data_daily_remaining_gb' => round($status['data_daily']['remaining'] / 1073741824, 2),
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Reset all rate limit counters (for testing)
|
|
*/
|
|
public function reset() {
|
|
global $wpdb;
|
|
$wpdb->query("TRUNCATE TABLE {$this->db->rate_limits_table()}");
|
|
// Reset global timestamps
|
|
delete_option(self::OPTION_LAST_GENERAL_REQUEST);
|
|
delete_option(self::OPTION_LAST_IMAGE_REQUEST);
|
|
}
|
|
}
|