Files
homeproz/wp-content/plugins/mls-by-hansonxyz/includes/class-mls-garbage-collector.php
T
root b6df4dbb92 Snapshot: MLS sync fixes, image refresh, plugin/theme updates
MLS plugin fixes from this session:
- Fix silent insert failures: location column NOT NULL was rejecting wpdb->insert calls,
  causing ~18k new properties since Dec 2025 to be lost. Inserts now build raw SQL
  with ST_PointFromText so the spatial column is populated atomically.
- Auto-refresh expired media URLs in MLS_Media_Handler::fetch_and_cache(), guarded by
  a property-level GET_LOCK so concurrent fetches share one API refresh.
- Normalize WP_Error to null in mls_get_property_image() so callers can rely on the
  documented string|null contract.
- Support comma-separated property_type filters in MLS_Query and MLS_Cluster so the
  homepage "View All Commercial" link (?property_type=Commercial+Sale,Land,Farm)
  actually filters correctly.
- Incremental sync now looks back 10 minutes past the latest modification timestamp
  as a safety margin against missed records.
- Smart sync exits silently (info-level, not warning) when a full sync is in progress.

Operational:
- New cron: weekly full sync Sundays at 3 AM (/usr/local/bin/mls-full-sync).
- New cron: hourly 2GB cap on mls-thumbnails/ and cache/transformed-images/
  (/usr/local/bin/mls-image-cache-cap).
- Logrotate config for wp-content/debug.log (2-day retention, daily rotation,
  delaycompress).

Repo policy:
- CLAUDE.md updated with explicit "commit everything except build artifacts" policy.
- .gitignore: untrack runtime image caches and debug.log rotations.

Other modifications in this snapshot are pre-existing in-flight theme/plugin/db_content_updates work.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-29 15:32:23 +00:00

510 lines
16 KiB
PHP
Executable File

<?php
/**
* MLS Garbage Collector
*
* Cleans up old MLS image directories when disk space is low.
* Runs after sync to prevent disk from filling up.
*
* IMPORTANT: Only cleans the standard cache directory (mls-listings).
* The persistent cache directory (mls-listings-persistent) is NEVER touched.
* HomeProz listing images are stored in persistent cache and preserved
* even after listings are sold or removed from MLS.
*
* Configuration (wp-config.php):
* - MLS_GC_DISK_THRESHOLD: Minimum free disk space in bytes before cleanup triggers
* Example: define('MLS_GC_DISK_THRESHOLD', 5 * 1024 * 1024 * 1024); // 5GB
*
* Behavior:
* - Only runs if MLS_GC_DISK_THRESHOLD is defined
* - Only cleans standard cache (mls-listings), never persistent cache
* - Skips directories modified within the last 24 hours
* - Deletes oldest directories first
* - Stops when free space >= 5GB or 2GB deleted per run
*/
if (!defined('ABSPATH')) {
exit;
}
class MLS_Garbage_Collector {
/**
* Target free space to achieve (5GB)
*/
const TARGET_FREE_SPACE = 5368709120; // 5 * 1024 * 1024 * 1024
/**
* Maximum bytes to delete per run (2GB)
*/
const MAX_DELETE_PER_RUN = 2147483648; // 2 * 1024 * 1024 * 1024
/**
* Minimum age of directories to delete (24 hours)
*/
const MIN_AGE_SECONDS = 86400; // 24 * 60 * 60
/**
* Logger instance
*/
private $logger;
/**
* Database instance
*/
private $db;
/**
* Constructor
*
* @param MLS_Logger $logger Logger instance
* @param MLS_DB|null $db Database instance (optional for backwards compatibility)
*/
public function __construct(MLS_Logger $logger, MLS_DB $db = null) {
$this->logger = $logger;
$this->db = $db;
}
/**
* Check if garbage collection is enabled
*
* @return bool True if MLS_GC_DISK_THRESHOLD is defined
*/
public function is_enabled() {
return defined('MLS_GC_DISK_THRESHOLD') && MLS_GC_DISK_THRESHOLD > 0;
}
/**
* Get the disk threshold from config
*
* @return int Threshold in bytes, or 0 if not defined
*/
public function get_threshold() {
return defined('MLS_GC_DISK_THRESHOLD') ? (int) MLS_GC_DISK_THRESHOLD : 0;
}
/**
* Get the MLS images upload directory (standard cache only)
*
* Returns the standard cache directory that is subject to garbage collection.
* The persistent cache (mls-listings-persistent) is intentionally excluded
* to preserve HomeProz listing images indefinitely.
*
* @return string Absolute path to MLS images directory
*/
public function get_images_dir() {
$upload_dir = wp_upload_dir();
return $upload_dir['basedir'] . '/mls-listings';
}
/**
* Get free disk space for the volume hosting MLS images
*
* @return int|false Free space in bytes, or false on error
*/
public function get_free_space() {
$dir = $this->get_images_dir();
// Create directory if it doesn't exist
if (!file_exists($dir)) {
wp_mkdir_p($dir);
}
return disk_free_space($dir);
}
/**
* Check if cleanup is needed
*
* @return bool True if free space is below threshold
*/
public function needs_cleanup() {
if (!$this->is_enabled()) {
return false;
}
$free_space = $this->get_free_space();
if ($free_space === false) {
return false;
}
return $free_space < $this->get_threshold();
}
/**
* Get listing directories sorted by modification time (oldest first)
*
* Only returns directories older than MIN_AGE_SECONDS.
*
* @return array Array of ['path' => string, 'mtime' => int, 'size' => int]
*/
public function get_old_directories() {
$base_dir = $this->get_images_dir();
$cutoff_time = time() - self::MIN_AGE_SECONDS;
$directories = array();
if (!is_dir($base_dir)) {
return $directories;
}
// Iterate through prefix directories (2-char subdirs like "AB", "CD")
$prefix_dirs = glob($base_dir . '/*', GLOB_ONLYDIR);
if (!$prefix_dirs) {
return $directories;
}
foreach ($prefix_dirs as $prefix_dir) {
// Iterate through listing directories within each prefix
$listing_dirs = glob($prefix_dir . '/*', GLOB_ONLYDIR);
if (!$listing_dirs) {
continue;
}
foreach ($listing_dirs as $listing_dir) {
$mtime = filemtime($listing_dir);
// Skip if modified within the last 24 hours
if ($mtime >= $cutoff_time) {
continue;
}
$directories[] = array(
'path' => $listing_dir,
'mtime' => $mtime,
'size' => $this->get_directory_size($listing_dir),
);
}
}
// Sort by modification time (oldest first)
usort($directories, function($a, $b) {
return $a['mtime'] - $b['mtime'];
});
return $directories;
}
/**
* Get total size of a directory
*
* @param string $dir Directory path
* @return int Size in bytes
*/
private function get_directory_size($dir) {
$size = 0;
$files = new RecursiveIteratorIterator(
new RecursiveDirectoryIterator($dir, RecursiveDirectoryIterator::SKIP_DOTS),
RecursiveIteratorIterator::LEAVES_ONLY
);
foreach ($files as $file) {
if ($file->isFile()) {
$size += $file->getSize();
}
}
return $size;
}
/**
* Delete a directory and all its contents
*
* @param string $dir Directory path
* @return bool True on success
*/
private function delete_directory($dir) {
if (!is_dir($dir)) {
return false;
}
$files = new RecursiveIteratorIterator(
new RecursiveDirectoryIterator($dir, RecursiveDirectoryIterator::SKIP_DOTS),
RecursiveIteratorIterator::CHILD_FIRST
);
foreach ($files as $file) {
if ($file->isDir()) {
@rmdir($file->getRealPath());
} else {
@unlink($file->getRealPath());
}
}
return @rmdir($dir);
}
/**
* Run garbage collection
*
* Deletes old directories until free space >= TARGET_FREE_SPACE
* or MAX_DELETE_PER_RUN bytes have been deleted.
*
* @param callable|null $status_callback Optional callback for status messages
* @return array Results with 'deleted_count', 'deleted_bytes', 'free_space_before', 'free_space_after'
*/
public function run($status_callback = null) {
$result = array(
'enabled' => $this->is_enabled(),
'ran' => false,
'deleted_count' => 0,
'deleted_bytes' => 0,
'free_space_before' => 0,
'free_space_after' => 0,
'threshold' => $this->get_threshold(),
);
// Check if enabled
if (!$this->is_enabled()) {
if ($status_callback) {
$status_callback('Garbage collection disabled (MLS_GC_DISK_THRESHOLD not defined)', 'info');
}
return $result;
}
$free_space = $this->get_free_space();
if ($free_space === false) {
if ($status_callback) {
$status_callback('Could not determine free disk space', 'warning');
}
return $result;
}
$result['free_space_before'] = $free_space;
$threshold = $this->get_threshold();
// Check if cleanup needed
if ($free_space >= $threshold) {
if ($status_callback) {
$status_callback(sprintf(
'Disk space OK: %s free (threshold: %s)',
$this->format_bytes($free_space),
$this->format_bytes($threshold)
), 'info');
}
$result['free_space_after'] = $free_space;
return $result;
}
$result['ran'] = true;
if ($status_callback) {
$status_callback(sprintf(
'Disk space low: %s free (threshold: %s). Starting cleanup...',
$this->format_bytes($free_space),
$this->format_bytes($threshold)
), 'warning');
}
// Get old directories
$directories = $this->get_old_directories();
if (empty($directories)) {
if ($status_callback) {
$status_callback('No directories older than 24 hours found for cleanup', 'info');
}
$result['free_space_after'] = $free_space;
return $result;
}
$deleted_bytes = 0;
$deleted_count = 0;
foreach ($directories as $dir_info) {
// Stop if we've reached target free space
$current_free = $this->get_free_space();
if ($current_free !== false && $current_free >= self::TARGET_FREE_SPACE) {
if ($status_callback) {
$status_callback(sprintf(
'Target free space reached: %s',
$this->format_bytes($current_free)
), 'info');
}
break;
}
// Stop if we've deleted enough this run
if ($deleted_bytes >= self::MAX_DELETE_PER_RUN) {
if ($status_callback) {
$status_callback(sprintf(
'Max deletion limit reached: %s',
$this->format_bytes($deleted_bytes)
), 'info');
}
break;
}
// Delete the directory
$path = $dir_info['path'];
$size = $dir_info['size'];
$listing_key = basename($path);
if ($this->delete_directory($path)) {
$deleted_bytes += $size;
$deleted_count++;
// Reset download_status to 'pending' for this listing's media
// so images can be re-downloaded on demand later
$this->reset_media_download_status($listing_key);
$this->logger->info('Garbage collection deleted directory', array(
'listing_key' => $listing_key,
'size' => $size,
'age_days' => round((time() - $dir_info['mtime']) / 86400, 1),
));
if ($status_callback) {
$status_callback(sprintf(
'Deleted: %s (%s)',
$listing_key,
$this->format_bytes($size)
), 'info');
}
}
}
// Clean up empty prefix directories
$this->cleanup_empty_prefix_dirs();
$result['deleted_count'] = $deleted_count;
$result['deleted_bytes'] = $deleted_bytes;
$result['free_space_after'] = $this->get_free_space();
if ($status_callback) {
$status_callback(sprintf(
'Cleanup complete: Deleted %d directories (%s). Free space now: %s',
$deleted_count,
$this->format_bytes($deleted_bytes),
$this->format_bytes($result['free_space_after'])
), 'info');
}
$this->logger->info('Garbage collection completed', array(
'deleted_count' => $deleted_count,
'deleted_bytes' => $deleted_bytes,
'free_space_before' => $result['free_space_before'],
'free_space_after' => $result['free_space_after'],
));
return $result;
}
/**
* Reset download_status to 'pending' for a listing's media records
*
* Called after deleting cached files so images can be re-downloaded on demand.
*
* @param string $listing_key Listing key
*/
private function reset_media_download_status($listing_key) {
global $wpdb;
// Get the media table name
$media_table = $this->db ? $this->db->media_table() : $wpdb->prefix . 'mls_media';
$updated = $wpdb->update(
$media_table,
array(
'download_status' => 'pending',
'local_path' => null,
'local_url' => null,
'downloaded_at' => null,
),
array('listing_key' => $listing_key),
array('%s', null, null, null),
array('%s')
);
if ($updated > 0) {
$this->logger->debug('Reset media download status for garbage collected listing', array(
'listing_key' => $listing_key,
'records_updated' => $updated,
));
}
}
/**
* Clean up empty prefix directories
*/
private function cleanup_empty_prefix_dirs() {
$base_dir = $this->get_images_dir();
$prefix_dirs = glob($base_dir . '/*', GLOB_ONLYDIR);
if (!$prefix_dirs) {
return;
}
foreach ($prefix_dirs as $prefix_dir) {
// Check if directory is empty
$contents = glob($prefix_dir . '/*');
if (empty($contents)) {
@rmdir($prefix_dir);
}
}
}
/**
* Format bytes to human readable string
*
* @param int $bytes Bytes
* @return string Formatted string (e.g., "1.5 GB")
*/
private function format_bytes($bytes) {
if ($bytes >= 1073741824) {
return number_format($bytes / 1073741824, 2) . ' GB';
} elseif ($bytes >= 1048576) {
return number_format($bytes / 1048576, 2) . ' MB';
} elseif ($bytes >= 1024) {
return number_format($bytes / 1024, 2) . ' KB';
}
return $bytes . ' bytes';
}
/**
* Get statistics about the image cache
*
* @return array Stats including total_size, directory_count, oldest_mtime
*/
public function get_stats() {
$base_dir = $this->get_images_dir();
$stats = array(
'total_size' => 0,
'directory_count' => 0,
'oldest_mtime' => null,
'newest_mtime' => null,
'free_space' => $this->get_free_space(),
'threshold' => $this->get_threshold(),
'needs_cleanup' => $this->needs_cleanup(),
);
if (!is_dir($base_dir)) {
return $stats;
}
$prefix_dirs = glob($base_dir . '/*', GLOB_ONLYDIR);
if (!$prefix_dirs) {
return $stats;
}
foreach ($prefix_dirs as $prefix_dir) {
$listing_dirs = glob($prefix_dir . '/*', GLOB_ONLYDIR);
if (!$listing_dirs) {
continue;
}
foreach ($listing_dirs as $listing_dir) {
$stats['directory_count']++;
$stats['total_size'] += $this->get_directory_size($listing_dir);
$mtime = filemtime($listing_dir);
if ($stats['oldest_mtime'] === null || $mtime < $stats['oldest_mtime']) {
$stats['oldest_mtime'] = $mtime;
}
if ($stats['newest_mtime'] === null || $mtime > $stats['newest_mtime']) {
$stats['newest_mtime'] = $mtime;
}
}
}
return $stats;
}
}