modeld standby, fan standstill clamping, log rotation, diagnostic logging

- modeld standby: skip inference at standstill (model stays loaded in GPU),
  ModelStandby param + ModelStandbyTs heartbeat for race-free suppression
- controlsd: suppress commIssue/modeldLagging when ModelStandbyTs < 2s old,
  ignore telemetryd/dashcamd in process_not_running check
- Fan controller: standstill below 74°C clamps to 0-10% (near silent),
  standstill above 74°C allows 0-100%, thermald reads carState.standstill
- Deleter: enforce 4GB quota on /data/log2 session logs, oldest-first cleanup
- Diagnostic logging: steerTempUnavailable and controlsdLagging log to stderr
  with full context (steering angle, torque, speed, remaining time)
- CLAUDE.md: document memory params method name difference (C++ camelCase
  vs Python snake_case)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-14 01:35:14 -05:00
parent 3d9143c41b
commit bf030db9fe
11 changed files with 111 additions and 12 deletions

View File

@@ -48,7 +48,7 @@ CAMERA_OFFSET = 0.04
REPLAY = "REPLAY" in os.environ
SIMULATION = "SIMULATION" in os.environ
TESTING_CLOSET = "TESTING_CLOSET" in os.environ
IGNORE_PROCESSES = {"loggerd", "encoderd", "statsd"}
IGNORE_PROCESSES = {"loggerd", "encoderd", "statsd", "telemetryd", "dashcamd"}
ThermalStatus = log.DeviceState.ThermalStatus
State = log.ControlsState.OpenpilotState
@@ -455,6 +455,8 @@ class Controls:
elif not self.sm.all_freq_ok(self.camera_packets):
self.events.add(EventName.cameraFrameRate)
if not REPLAY and self.rk.lagging:
import sys
print(f"CLP controlsdLagging: remaining={self.rk.remaining:.4f} standstill={CS.standstill} vEgo={CS.vEgo:.2f}", file=sys.stderr)
self.events.add(EventName.controlsdLagging)
if not self.radarless_model:
if len(self.sm['radarState'].radarErrors) or (not self.rk.lagging and not self.sm.all_checks(['radarState'])):
@@ -467,9 +469,16 @@ class Controls:
self.events.add(EventName.canError)
# generic catch-all. ideally, a more specific event should be added above instead
# CLEARPILOT: skip comm check when modeld was recently in standby
# ModelStandbyTs is written every 1s while in standby — if < 2s old, suppress
try:
standby_ts = float(self.params_memory.get("ModelStandbyTs") or "0")
except (ValueError, TypeError):
standby_ts = 0
model_suppress = (time.monotonic() - standby_ts) < 2.0
has_disable_events = self.events.contains(ET.NO_ENTRY) and (self.events.contains(ET.SOFT_DISABLE) or self.events.contains(ET.IMMEDIATE_DISABLE))
no_system_errors = (not has_disable_events) or (len(self.events) == num_events)
if (not self.sm.all_checks() or self.card.can_rcv_timeout) and no_system_errors:
if (not self.sm.all_checks() or self.card.can_rcv_timeout) and no_system_errors and not model_suppress:
if not self.sm.all_alive():
self.events.add(EventName.commIssue)
elif not self.sm.all_freq_ok():
@@ -542,7 +551,7 @@ class Controls:
self.distance_traveled = 0
self.distance_traveled += CS.vEgo * DT_CTRL
if self.sm['modelV2'].frameDropPerc > 20:
if self.sm['modelV2'].frameDropPerc > 20 and not model_suppress:
self.events.add(EventName.modeldLagging)