b6df4dbb92
MLS plugin fixes from this session: - Fix silent insert failures: location column NOT NULL was rejecting wpdb->insert calls, causing ~18k new properties since Dec 2025 to be lost. Inserts now build raw SQL with ST_PointFromText so the spatial column is populated atomically. - Auto-refresh expired media URLs in MLS_Media_Handler::fetch_and_cache(), guarded by a property-level GET_LOCK so concurrent fetches share one API refresh. - Normalize WP_Error to null in mls_get_property_image() so callers can rely on the documented string|null contract. - Support comma-separated property_type filters in MLS_Query and MLS_Cluster so the homepage "View All Commercial" link (?property_type=Commercial+Sale,Land,Farm) actually filters correctly. - Incremental sync now looks back 10 minutes past the latest modification timestamp as a safety margin against missed records. - Smart sync exits silently (info-level, not warning) when a full sync is in progress. Operational: - New cron: weekly full sync Sundays at 3 AM (/usr/local/bin/mls-full-sync). - New cron: hourly 2GB cap on mls-thumbnails/ and cache/transformed-images/ (/usr/local/bin/mls-image-cache-cap). - Logrotate config for wp-content/debug.log (2-day retention, daily rotation, delaycompress). Repo policy: - CLAUDE.md updated with explicit "commit everything except build artifacts" policy. - .gitignore: untrack runtime image caches and debug.log rotations. Other modifications in this snapshot are pre-existing in-flight theme/plugin/db_content_updates work. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
510 lines
16 KiB
PHP
Executable File
510 lines
16 KiB
PHP
Executable File
<?php
|
|
/**
|
|
* MLS Garbage Collector
|
|
*
|
|
* Cleans up old MLS image directories when disk space is low.
|
|
* Runs after sync to prevent disk from filling up.
|
|
*
|
|
* IMPORTANT: Only cleans the standard cache directory (mls-listings).
|
|
* The persistent cache directory (mls-listings-persistent) is NEVER touched.
|
|
* HomeProz listing images are stored in persistent cache and preserved
|
|
* even after listings are sold or removed from MLS.
|
|
*
|
|
* Configuration (wp-config.php):
|
|
* - MLS_GC_DISK_THRESHOLD: Minimum free disk space in bytes before cleanup triggers
|
|
* Example: define('MLS_GC_DISK_THRESHOLD', 5 * 1024 * 1024 * 1024); // 5GB
|
|
*
|
|
* Behavior:
|
|
* - Only runs if MLS_GC_DISK_THRESHOLD is defined
|
|
* - Only cleans standard cache (mls-listings), never persistent cache
|
|
* - Skips directories modified within the last 24 hours
|
|
* - Deletes oldest directories first
|
|
* - Stops when free space >= 5GB or 2GB deleted per run
|
|
*/
|
|
|
|
if (!defined('ABSPATH')) {
|
|
exit;
|
|
}
|
|
|
|
class MLS_Garbage_Collector {
|
|
|
|
/**
|
|
* Target free space to achieve (5GB)
|
|
*/
|
|
const TARGET_FREE_SPACE = 5368709120; // 5 * 1024 * 1024 * 1024
|
|
|
|
/**
|
|
* Maximum bytes to delete per run (2GB)
|
|
*/
|
|
const MAX_DELETE_PER_RUN = 2147483648; // 2 * 1024 * 1024 * 1024
|
|
|
|
/**
|
|
* Minimum age of directories to delete (24 hours)
|
|
*/
|
|
const MIN_AGE_SECONDS = 86400; // 24 * 60 * 60
|
|
|
|
/**
|
|
* Logger instance
|
|
*/
|
|
private $logger;
|
|
|
|
/**
|
|
* Database instance
|
|
*/
|
|
private $db;
|
|
|
|
/**
|
|
* Constructor
|
|
*
|
|
* @param MLS_Logger $logger Logger instance
|
|
* @param MLS_DB|null $db Database instance (optional for backwards compatibility)
|
|
*/
|
|
public function __construct(MLS_Logger $logger, MLS_DB $db = null) {
|
|
$this->logger = $logger;
|
|
$this->db = $db;
|
|
}
|
|
|
|
/**
|
|
* Check if garbage collection is enabled
|
|
*
|
|
* @return bool True if MLS_GC_DISK_THRESHOLD is defined
|
|
*/
|
|
public function is_enabled() {
|
|
return defined('MLS_GC_DISK_THRESHOLD') && MLS_GC_DISK_THRESHOLD > 0;
|
|
}
|
|
|
|
/**
|
|
* Get the disk threshold from config
|
|
*
|
|
* @return int Threshold in bytes, or 0 if not defined
|
|
*/
|
|
public function get_threshold() {
|
|
return defined('MLS_GC_DISK_THRESHOLD') ? (int) MLS_GC_DISK_THRESHOLD : 0;
|
|
}
|
|
|
|
/**
|
|
* Get the MLS images upload directory (standard cache only)
|
|
*
|
|
* Returns the standard cache directory that is subject to garbage collection.
|
|
* The persistent cache (mls-listings-persistent) is intentionally excluded
|
|
* to preserve HomeProz listing images indefinitely.
|
|
*
|
|
* @return string Absolute path to MLS images directory
|
|
*/
|
|
public function get_images_dir() {
|
|
$upload_dir = wp_upload_dir();
|
|
return $upload_dir['basedir'] . '/mls-listings';
|
|
}
|
|
|
|
/**
|
|
* Get free disk space for the volume hosting MLS images
|
|
*
|
|
* @return int|false Free space in bytes, or false on error
|
|
*/
|
|
public function get_free_space() {
|
|
$dir = $this->get_images_dir();
|
|
|
|
// Create directory if it doesn't exist
|
|
if (!file_exists($dir)) {
|
|
wp_mkdir_p($dir);
|
|
}
|
|
|
|
return disk_free_space($dir);
|
|
}
|
|
|
|
/**
|
|
* Check if cleanup is needed
|
|
*
|
|
* @return bool True if free space is below threshold
|
|
*/
|
|
public function needs_cleanup() {
|
|
if (!$this->is_enabled()) {
|
|
return false;
|
|
}
|
|
|
|
$free_space = $this->get_free_space();
|
|
if ($free_space === false) {
|
|
return false;
|
|
}
|
|
|
|
return $free_space < $this->get_threshold();
|
|
}
|
|
|
|
/**
|
|
* Get listing directories sorted by modification time (oldest first)
|
|
*
|
|
* Only returns directories older than MIN_AGE_SECONDS.
|
|
*
|
|
* @return array Array of ['path' => string, 'mtime' => int, 'size' => int]
|
|
*/
|
|
public function get_old_directories() {
|
|
$base_dir = $this->get_images_dir();
|
|
$cutoff_time = time() - self::MIN_AGE_SECONDS;
|
|
$directories = array();
|
|
|
|
if (!is_dir($base_dir)) {
|
|
return $directories;
|
|
}
|
|
|
|
// Iterate through prefix directories (2-char subdirs like "AB", "CD")
|
|
$prefix_dirs = glob($base_dir . '/*', GLOB_ONLYDIR);
|
|
if (!$prefix_dirs) {
|
|
return $directories;
|
|
}
|
|
|
|
foreach ($prefix_dirs as $prefix_dir) {
|
|
// Iterate through listing directories within each prefix
|
|
$listing_dirs = glob($prefix_dir . '/*', GLOB_ONLYDIR);
|
|
if (!$listing_dirs) {
|
|
continue;
|
|
}
|
|
|
|
foreach ($listing_dirs as $listing_dir) {
|
|
$mtime = filemtime($listing_dir);
|
|
|
|
// Skip if modified within the last 24 hours
|
|
if ($mtime >= $cutoff_time) {
|
|
continue;
|
|
}
|
|
|
|
$directories[] = array(
|
|
'path' => $listing_dir,
|
|
'mtime' => $mtime,
|
|
'size' => $this->get_directory_size($listing_dir),
|
|
);
|
|
}
|
|
}
|
|
|
|
// Sort by modification time (oldest first)
|
|
usort($directories, function($a, $b) {
|
|
return $a['mtime'] - $b['mtime'];
|
|
});
|
|
|
|
return $directories;
|
|
}
|
|
|
|
/**
|
|
* Get total size of a directory
|
|
*
|
|
* @param string $dir Directory path
|
|
* @return int Size in bytes
|
|
*/
|
|
private function get_directory_size($dir) {
|
|
$size = 0;
|
|
$files = new RecursiveIteratorIterator(
|
|
new RecursiveDirectoryIterator($dir, RecursiveDirectoryIterator::SKIP_DOTS),
|
|
RecursiveIteratorIterator::LEAVES_ONLY
|
|
);
|
|
|
|
foreach ($files as $file) {
|
|
if ($file->isFile()) {
|
|
$size += $file->getSize();
|
|
}
|
|
}
|
|
|
|
return $size;
|
|
}
|
|
|
|
/**
|
|
* Delete a directory and all its contents
|
|
*
|
|
* @param string $dir Directory path
|
|
* @return bool True on success
|
|
*/
|
|
private function delete_directory($dir) {
|
|
if (!is_dir($dir)) {
|
|
return false;
|
|
}
|
|
|
|
$files = new RecursiveIteratorIterator(
|
|
new RecursiveDirectoryIterator($dir, RecursiveDirectoryIterator::SKIP_DOTS),
|
|
RecursiveIteratorIterator::CHILD_FIRST
|
|
);
|
|
|
|
foreach ($files as $file) {
|
|
if ($file->isDir()) {
|
|
@rmdir($file->getRealPath());
|
|
} else {
|
|
@unlink($file->getRealPath());
|
|
}
|
|
}
|
|
|
|
return @rmdir($dir);
|
|
}
|
|
|
|
/**
|
|
* Run garbage collection
|
|
*
|
|
* Deletes old directories until free space >= TARGET_FREE_SPACE
|
|
* or MAX_DELETE_PER_RUN bytes have been deleted.
|
|
*
|
|
* @param callable|null $status_callback Optional callback for status messages
|
|
* @return array Results with 'deleted_count', 'deleted_bytes', 'free_space_before', 'free_space_after'
|
|
*/
|
|
public function run($status_callback = null) {
|
|
$result = array(
|
|
'enabled' => $this->is_enabled(),
|
|
'ran' => false,
|
|
'deleted_count' => 0,
|
|
'deleted_bytes' => 0,
|
|
'free_space_before' => 0,
|
|
'free_space_after' => 0,
|
|
'threshold' => $this->get_threshold(),
|
|
);
|
|
|
|
// Check if enabled
|
|
if (!$this->is_enabled()) {
|
|
if ($status_callback) {
|
|
$status_callback('Garbage collection disabled (MLS_GC_DISK_THRESHOLD not defined)', 'info');
|
|
}
|
|
return $result;
|
|
}
|
|
|
|
$free_space = $this->get_free_space();
|
|
if ($free_space === false) {
|
|
if ($status_callback) {
|
|
$status_callback('Could not determine free disk space', 'warning');
|
|
}
|
|
return $result;
|
|
}
|
|
|
|
$result['free_space_before'] = $free_space;
|
|
$threshold = $this->get_threshold();
|
|
|
|
// Check if cleanup needed
|
|
if ($free_space >= $threshold) {
|
|
if ($status_callback) {
|
|
$status_callback(sprintf(
|
|
'Disk space OK: %s free (threshold: %s)',
|
|
$this->format_bytes($free_space),
|
|
$this->format_bytes($threshold)
|
|
), 'info');
|
|
}
|
|
$result['free_space_after'] = $free_space;
|
|
return $result;
|
|
}
|
|
|
|
$result['ran'] = true;
|
|
|
|
if ($status_callback) {
|
|
$status_callback(sprintf(
|
|
'Disk space low: %s free (threshold: %s). Starting cleanup...',
|
|
$this->format_bytes($free_space),
|
|
$this->format_bytes($threshold)
|
|
), 'warning');
|
|
}
|
|
|
|
// Get old directories
|
|
$directories = $this->get_old_directories();
|
|
|
|
if (empty($directories)) {
|
|
if ($status_callback) {
|
|
$status_callback('No directories older than 24 hours found for cleanup', 'info');
|
|
}
|
|
$result['free_space_after'] = $free_space;
|
|
return $result;
|
|
}
|
|
|
|
$deleted_bytes = 0;
|
|
$deleted_count = 0;
|
|
|
|
foreach ($directories as $dir_info) {
|
|
// Stop if we've reached target free space
|
|
$current_free = $this->get_free_space();
|
|
if ($current_free !== false && $current_free >= self::TARGET_FREE_SPACE) {
|
|
if ($status_callback) {
|
|
$status_callback(sprintf(
|
|
'Target free space reached: %s',
|
|
$this->format_bytes($current_free)
|
|
), 'info');
|
|
}
|
|
break;
|
|
}
|
|
|
|
// Stop if we've deleted enough this run
|
|
if ($deleted_bytes >= self::MAX_DELETE_PER_RUN) {
|
|
if ($status_callback) {
|
|
$status_callback(sprintf(
|
|
'Max deletion limit reached: %s',
|
|
$this->format_bytes($deleted_bytes)
|
|
), 'info');
|
|
}
|
|
break;
|
|
}
|
|
|
|
// Delete the directory
|
|
$path = $dir_info['path'];
|
|
$size = $dir_info['size'];
|
|
$listing_key = basename($path);
|
|
|
|
if ($this->delete_directory($path)) {
|
|
$deleted_bytes += $size;
|
|
$deleted_count++;
|
|
|
|
// Reset download_status to 'pending' for this listing's media
|
|
// so images can be re-downloaded on demand later
|
|
$this->reset_media_download_status($listing_key);
|
|
|
|
$this->logger->info('Garbage collection deleted directory', array(
|
|
'listing_key' => $listing_key,
|
|
'size' => $size,
|
|
'age_days' => round((time() - $dir_info['mtime']) / 86400, 1),
|
|
));
|
|
|
|
if ($status_callback) {
|
|
$status_callback(sprintf(
|
|
'Deleted: %s (%s)',
|
|
$listing_key,
|
|
$this->format_bytes($size)
|
|
), 'info');
|
|
}
|
|
}
|
|
}
|
|
|
|
// Clean up empty prefix directories
|
|
$this->cleanup_empty_prefix_dirs();
|
|
|
|
$result['deleted_count'] = $deleted_count;
|
|
$result['deleted_bytes'] = $deleted_bytes;
|
|
$result['free_space_after'] = $this->get_free_space();
|
|
|
|
if ($status_callback) {
|
|
$status_callback(sprintf(
|
|
'Cleanup complete: Deleted %d directories (%s). Free space now: %s',
|
|
$deleted_count,
|
|
$this->format_bytes($deleted_bytes),
|
|
$this->format_bytes($result['free_space_after'])
|
|
), 'info');
|
|
}
|
|
|
|
$this->logger->info('Garbage collection completed', array(
|
|
'deleted_count' => $deleted_count,
|
|
'deleted_bytes' => $deleted_bytes,
|
|
'free_space_before' => $result['free_space_before'],
|
|
'free_space_after' => $result['free_space_after'],
|
|
));
|
|
|
|
return $result;
|
|
}
|
|
|
|
/**
|
|
* Reset download_status to 'pending' for a listing's media records
|
|
*
|
|
* Called after deleting cached files so images can be re-downloaded on demand.
|
|
*
|
|
* @param string $listing_key Listing key
|
|
*/
|
|
private function reset_media_download_status($listing_key) {
|
|
global $wpdb;
|
|
|
|
// Get the media table name
|
|
$media_table = $this->db ? $this->db->media_table() : $wpdb->prefix . 'mls_media';
|
|
|
|
$updated = $wpdb->update(
|
|
$media_table,
|
|
array(
|
|
'download_status' => 'pending',
|
|
'local_path' => null,
|
|
'local_url' => null,
|
|
'downloaded_at' => null,
|
|
),
|
|
array('listing_key' => $listing_key),
|
|
array('%s', null, null, null),
|
|
array('%s')
|
|
);
|
|
|
|
if ($updated > 0) {
|
|
$this->logger->debug('Reset media download status for garbage collected listing', array(
|
|
'listing_key' => $listing_key,
|
|
'records_updated' => $updated,
|
|
));
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Clean up empty prefix directories
|
|
*/
|
|
private function cleanup_empty_prefix_dirs() {
|
|
$base_dir = $this->get_images_dir();
|
|
$prefix_dirs = glob($base_dir . '/*', GLOB_ONLYDIR);
|
|
|
|
if (!$prefix_dirs) {
|
|
return;
|
|
}
|
|
|
|
foreach ($prefix_dirs as $prefix_dir) {
|
|
// Check if directory is empty
|
|
$contents = glob($prefix_dir . '/*');
|
|
if (empty($contents)) {
|
|
@rmdir($prefix_dir);
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Format bytes to human readable string
|
|
*
|
|
* @param int $bytes Bytes
|
|
* @return string Formatted string (e.g., "1.5 GB")
|
|
*/
|
|
private function format_bytes($bytes) {
|
|
if ($bytes >= 1073741824) {
|
|
return number_format($bytes / 1073741824, 2) . ' GB';
|
|
} elseif ($bytes >= 1048576) {
|
|
return number_format($bytes / 1048576, 2) . ' MB';
|
|
} elseif ($bytes >= 1024) {
|
|
return number_format($bytes / 1024, 2) . ' KB';
|
|
}
|
|
return $bytes . ' bytes';
|
|
}
|
|
|
|
/**
|
|
* Get statistics about the image cache
|
|
*
|
|
* @return array Stats including total_size, directory_count, oldest_mtime
|
|
*/
|
|
public function get_stats() {
|
|
$base_dir = $this->get_images_dir();
|
|
$stats = array(
|
|
'total_size' => 0,
|
|
'directory_count' => 0,
|
|
'oldest_mtime' => null,
|
|
'newest_mtime' => null,
|
|
'free_space' => $this->get_free_space(),
|
|
'threshold' => $this->get_threshold(),
|
|
'needs_cleanup' => $this->needs_cleanup(),
|
|
);
|
|
|
|
if (!is_dir($base_dir)) {
|
|
return $stats;
|
|
}
|
|
|
|
$prefix_dirs = glob($base_dir . '/*', GLOB_ONLYDIR);
|
|
if (!$prefix_dirs) {
|
|
return $stats;
|
|
}
|
|
|
|
foreach ($prefix_dirs as $prefix_dir) {
|
|
$listing_dirs = glob($prefix_dir . '/*', GLOB_ONLYDIR);
|
|
if (!$listing_dirs) {
|
|
continue;
|
|
}
|
|
|
|
foreach ($listing_dirs as $listing_dir) {
|
|
$stats['directory_count']++;
|
|
$stats['total_size'] += $this->get_directory_size($listing_dir);
|
|
|
|
$mtime = filemtime($listing_dir);
|
|
if ($stats['oldest_mtime'] === null || $mtime < $stats['oldest_mtime']) {
|
|
$stats['oldest_mtime'] = $mtime;
|
|
}
|
|
if ($stats['newest_mtime'] === null || $mtime > $stats['newest_mtime']) {
|
|
$stats['newest_mtime'] = $mtime;
|
|
}
|
|
}
|
|
}
|
|
|
|
return $stats;
|
|
}
|
|
}
|