modeld standby, fan standstill clamping, log rotation, diagnostic logging

- modeld standby: skip inference at standstill (model stays loaded in GPU),
  ModelStandby param + ModelStandbyTs heartbeat for race-free suppression
- controlsd: suppress commIssue/modeldLagging when ModelStandbyTs < 2s old,
  ignore telemetryd/dashcamd in process_not_running check
- Fan controller: standstill below 74°C clamps to 0-10% (near silent),
  standstill above 74°C allows 0-100%, thermald reads carState.standstill
- Deleter: enforce 4GB quota on /data/log2 session logs, oldest-first cleanup
- Diagnostic logging: steerTempUnavailable and controlsdLagging log to stderr
  with full context (steering angle, torque, speed, remaining time)
- CLAUDE.md: document memory params method name difference (C++ camelCase
  vs Python snake_case)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-14 01:35:14 -05:00
parent 3d9143c41b
commit bf030db9fe
11 changed files with 111 additions and 12 deletions

View File

@@ -101,7 +101,8 @@ ClearPilot uses memory params (`/dev/shm/params/d/`) for UI toggles that should
- **Python access**: Use `Params("/dev/shm/params")` - **Python access**: Use `Params("/dev/shm/params")`
- **Defaults**: Set in `manager_init()` via `Params("/dev/shm/params").put(key, value)` - **Defaults**: Set in `manager_init()` via `Params("/dev/shm/params").put(key, value)`
- **UI toggles**: Use `ToggleControl` with manual `toggleFlipped` lambda that writes via `Params("/dev/shm/params")`. Do NOT use `ParamControl` for memory params — it reads/writes persistent params only - **UI toggles**: Use `ToggleControl` with manual `toggleFlipped` lambda that writes via `Params("/dev/shm/params")`. Do NOT use `ParamControl` for memory params — it reads/writes persistent params only
- **Current memory params**: `TelemetryEnabled` (default "0"), `VpnEnabled` (default "1"), `ScreenDisplayMode` - **Current memory params**: `TelemetryEnabled` (default "0"), `VpnEnabled` (default "1"), `ModelStandby` (default "0"), `ScreenDisplayMode`
- **IMPORTANT — method names differ between C++ and Python**: C++ uses camelCase (`putBool`, `getBool`, `getInt`), Python uses snake_case (`put_bool`, `get_bool`, `get_int`). This is a common source of silent failures — the wrong casing compiles/runs but doesn't work.
### Building Native (C++) Processes ### Building Native (C++) Processes

View File

@@ -198,6 +198,8 @@ std::unordered_map<std::string, uint32_t> keys = {
{"TelemetryEnabled", PERSISTENT}, {"TelemetryEnabled", PERSISTENT},
{"Timezone", PERSISTENT}, {"Timezone", PERSISTENT},
{"TrainingVersion", PERSISTENT}, {"TrainingVersion", PERSISTENT},
{"ModelStandby", PERSISTENT},
{"ModelStandbyTs", PERSISTENT},
{"UbloxAvailable", PERSISTENT}, {"UbloxAvailable", PERSISTENT},
{"VpnEnabled", PERSISTENT}, {"VpnEnabled", PERSISTENT},
{"UpdateAvailable", CLEAR_ON_MANAGER_START | CLEAR_ON_ONROAD_TRANSITION}, {"UpdateAvailable", CLEAR_ON_MANAGER_START | CLEAR_ON_ONROAD_TRANSITION},

View File

@@ -484,10 +484,19 @@ class CarInterfaceBase(ABC):
self.silent_steer_warning = True self.silent_steer_warning = True
events.add(EventName.steerTempUnavailableSilent) events.add(EventName.steerTempUnavailableSilent)
else: else:
# CLEARPILOT: log once per instance of this warning
if not getattr(self, '_steer_fault_logged', False):
import sys
print(f"CLP steerTempUnavailable: steerFaultTemporary={cs_out.steerFaultTemporary} "
f"steeringPressed={cs_out.steeringPressed} standstill={cs_out.standstill} "
f"steering_unpressed={self.steering_unpressed} steeringAngleDeg={cs_out.steeringAngleDeg:.1f} "
f"steeringTorque={cs_out.steeringTorque:.1f} vEgo={cs_out.vEgo:.2f}", file=sys.stderr)
self._steer_fault_logged = True
events.add(EventName.steerTempUnavailable) events.add(EventName.steerTempUnavailable)
else: else:
self.no_steer_warning = False self.no_steer_warning = False
self.silent_steer_warning = False self.silent_steer_warning = False
self._steer_fault_logged = False
if cs_out.steerFaultPermanent: if cs_out.steerFaultPermanent:
events.add(EventName.steerUnavailable) events.add(EventName.steerUnavailable)

Binary file not shown.

View File

@@ -48,7 +48,7 @@ CAMERA_OFFSET = 0.04
REPLAY = "REPLAY" in os.environ REPLAY = "REPLAY" in os.environ
SIMULATION = "SIMULATION" in os.environ SIMULATION = "SIMULATION" in os.environ
TESTING_CLOSET = "TESTING_CLOSET" in os.environ TESTING_CLOSET = "TESTING_CLOSET" in os.environ
IGNORE_PROCESSES = {"loggerd", "encoderd", "statsd"} IGNORE_PROCESSES = {"loggerd", "encoderd", "statsd", "telemetryd", "dashcamd"}
ThermalStatus = log.DeviceState.ThermalStatus ThermalStatus = log.DeviceState.ThermalStatus
State = log.ControlsState.OpenpilotState State = log.ControlsState.OpenpilotState
@@ -455,6 +455,8 @@ class Controls:
elif not self.sm.all_freq_ok(self.camera_packets): elif not self.sm.all_freq_ok(self.camera_packets):
self.events.add(EventName.cameraFrameRate) self.events.add(EventName.cameraFrameRate)
if not REPLAY and self.rk.lagging: if not REPLAY and self.rk.lagging:
import sys
print(f"CLP controlsdLagging: remaining={self.rk.remaining:.4f} standstill={CS.standstill} vEgo={CS.vEgo:.2f}", file=sys.stderr)
self.events.add(EventName.controlsdLagging) self.events.add(EventName.controlsdLagging)
if not self.radarless_model: if not self.radarless_model:
if len(self.sm['radarState'].radarErrors) or (not self.rk.lagging and not self.sm.all_checks(['radarState'])): if len(self.sm['radarState'].radarErrors) or (not self.rk.lagging and not self.sm.all_checks(['radarState'])):
@@ -467,9 +469,16 @@ class Controls:
self.events.add(EventName.canError) self.events.add(EventName.canError)
# generic catch-all. ideally, a more specific event should be added above instead # generic catch-all. ideally, a more specific event should be added above instead
# CLEARPILOT: skip comm check when modeld was recently in standby
# ModelStandbyTs is written every 1s while in standby — if < 2s old, suppress
try:
standby_ts = float(self.params_memory.get("ModelStandbyTs") or "0")
except (ValueError, TypeError):
standby_ts = 0
model_suppress = (time.monotonic() - standby_ts) < 2.0
has_disable_events = self.events.contains(ET.NO_ENTRY) and (self.events.contains(ET.SOFT_DISABLE) or self.events.contains(ET.IMMEDIATE_DISABLE)) has_disable_events = self.events.contains(ET.NO_ENTRY) and (self.events.contains(ET.SOFT_DISABLE) or self.events.contains(ET.IMMEDIATE_DISABLE))
no_system_errors = (not has_disable_events) or (len(self.events) == num_events) no_system_errors = (not has_disable_events) or (len(self.events) == num_events)
if (not self.sm.all_checks() or self.card.can_rcv_timeout) and no_system_errors: if (not self.sm.all_checks() or self.card.can_rcv_timeout) and no_system_errors and not model_suppress:
if not self.sm.all_alive(): if not self.sm.all_alive():
self.events.add(EventName.commIssue) self.events.add(EventName.commIssue)
elif not self.sm.all_freq_ok(): elif not self.sm.all_freq_ok():
@@ -542,7 +551,7 @@ class Controls:
self.distance_traveled = 0 self.distance_traveled = 0
self.distance_traveled += CS.vEgo * DT_CTRL self.distance_traveled += CS.vEgo * DT_CTRL
if self.sm['modelV2'].frameDropPerc > 20: if self.sm['modelV2'].frameDropPerc > 20 and not model_suppress:
self.events.add(EventName.modeldLagging) self.events.add(EventName.modeldLagging)

View File

@@ -87,6 +87,8 @@ def manager_init(frogpilot_functions) -> None:
params_memory = Params("/dev/shm/params") params_memory = Params("/dev/shm/params")
params_memory.put("TelemetryEnabled", "0") params_memory.put("TelemetryEnabled", "0")
params_memory.put("VpnEnabled", "1") params_memory.put("VpnEnabled", "1")
params_memory.put("ModelStandby", "0")
params_memory.put("ModelStandbyTs", "0")
params.clear_all(ParamKeyType.CLEAR_ON_ONROAD_TRANSITION) params.clear_all(ParamKeyType.CLEAR_ON_ONROAD_TRANSITION)
params.clear_all(ParamKeyType.CLEAR_ON_OFFROAD_TRANSITION) params.clear_all(ParamKeyType.CLEAR_ON_OFFROAD_TRANSITION)
if is_release_branch(): if is_release_branch():

View File

@@ -134,11 +134,15 @@ def main(demo=False):
setproctitle(PROCESS_NAME) setproctitle(PROCESS_NAME)
config_realtime_process(7, 54) config_realtime_process(7, 54)
import time as _time
cloudlog.warning("setting up CL context") cloudlog.warning("setting up CL context")
_t0 = _time.monotonic()
cl_context = CLContext() cl_context = CLContext()
cloudlog.warning("CL context ready; loading model") _t1 = _time.monotonic()
cloudlog.warning("CL context ready in %.3fs; loading model", _t1 - _t0)
model = ModelState(cl_context) model = ModelState(cl_context)
cloudlog.warning("models loaded, modeld starting") _t2 = _time.monotonic()
cloudlog.warning("model loaded in %.3fs (total init %.3fs), modeld starting", _t2 - _t1, _t2 - _t0)
# visionipc clients # visionipc clients
while True: while True:
@@ -179,6 +183,9 @@ def main(demo=False):
model_transform_main = np.zeros((3, 3), dtype=np.float32) model_transform_main = np.zeros((3, 3), dtype=np.float32)
model_transform_extra = np.zeros((3, 3), dtype=np.float32) model_transform_extra = np.zeros((3, 3), dtype=np.float32)
live_calib_seen = False live_calib_seen = False
model_standby = False
last_standby_ts_write = 0
params_memory = Params("/dev/shm/params")
nav_features = np.zeros(ModelConstants.NAV_FEATURE_LEN, dtype=np.float32) nav_features = np.zeros(ModelConstants.NAV_FEATURE_LEN, dtype=np.float32)
nav_instructions = np.zeros(ModelConstants.NAV_INSTRUCTION_LEN, dtype=np.float32) nav_instructions = np.zeros(ModelConstants.NAV_INSTRUCTION_LEN, dtype=np.float32)
buf_main, buf_extra = None, None buf_main, buf_extra = None, None
@@ -233,6 +240,27 @@ def main(demo=False):
meta_extra = meta_main meta_extra = meta_main
sm.update(0) sm.update(0)
# CLEARPILOT: standstill power saving — skip inference, keep model loaded
standstill = sm['carState'].standstill
if standstill and not model_standby:
params_memory.put_bool("ModelStandby", True)
model_standby = True
cloudlog.warning("modeld: standby ON (standstill)")
elif not standstill and model_standby:
params_memory.put_bool("ModelStandby", False)
model_standby = False
run_count = 0
frame_dropped_filter.x = 0.
cloudlog.warning("modeld: standby OFF")
if model_standby:
now = _time.monotonic()
if now - last_standby_ts_write > 1.0:
params_memory.put("ModelStandbyTs", str(now))
last_standby_ts_write = now
last_vipc_frame_id = meta_main.frame_id
continue
desire = DH.desire desire = DH.desire
is_rhd = sm["driverMonitoringState"].isRHD is_rhd = sm["driverMonitoringState"].isRHD
frame_id = sm["roadCameraState"].frameId frame_id = sm["roadCameraState"].frameId

View File

@@ -8,7 +8,7 @@ from openpilot.selfdrive.controls.lib.pid import PIDController
class BaseFanController(ABC): class BaseFanController(ABC):
@abstractmethod @abstractmethod
def update(self, cur_temp: float, ignition: bool) -> int: def update(self, cur_temp: float, ignition: bool, standstill: bool = False) -> int:
pass pass
@@ -20,9 +20,21 @@ class TiciFanController(BaseFanController):
self.last_ignition = False self.last_ignition = False
self.controller = PIDController(k_p=0, k_i=4e-3, k_f=1, rate=(1 / DT_TRML)) self.controller = PIDController(k_p=0, k_i=4e-3, k_f=1, rate=(1 / DT_TRML))
def update(self, cur_temp: float, ignition: bool) -> int: def update(self, cur_temp: float, ignition: bool, standstill: bool = False) -> int:
self.controller.neg_limit = -(100 if ignition else 30) # CLEARPILOT: at standstill below 74°C, clamp to 0-10% (near silent)
self.controller.pos_limit = -(30 if ignition else 0) # at standstill above 74°C, allow full 0-100% range
if ignition and standstill and cur_temp < 74:
self.controller.neg_limit = -10
self.controller.pos_limit = 0
elif ignition and standstill:
self.controller.neg_limit = -100
self.controller.pos_limit = 0
elif ignition:
self.controller.neg_limit = -100
self.controller.pos_limit = -30
else:
self.controller.neg_limit = -30
self.controller.pos_limit = 0
if ignition != self.last_ignition: if ignition != self.last_ignition:
self.controller.reset() self.controller.reset()

View File

@@ -164,7 +164,7 @@ def hw_state_thread(end_event, hw_queue):
def thermald_thread(end_event, hw_queue) -> None: def thermald_thread(end_event, hw_queue) -> None:
pm = messaging.PubMaster(['deviceState', 'frogpilotDeviceState']) pm = messaging.PubMaster(['deviceState', 'frogpilotDeviceState'])
sm = messaging.SubMaster(["peripheralState", "gpsLocationExternal", "controlsState", "pandaStates"], poll="pandaStates") sm = messaging.SubMaster(["peripheralState", "gpsLocationExternal", "controlsState", "pandaStates", "carState"], poll="pandaStates")
count = 0 count = 0
@@ -290,7 +290,8 @@ def thermald_thread(end_event, hw_queue) -> None:
msg.deviceState.maxTempC = all_comp_temp msg.deviceState.maxTempC = all_comp_temp
if fan_controller is not None: if fan_controller is not None:
msg.deviceState.fanSpeedPercentDesired = fan_controller.update(all_comp_temp, onroad_conditions["ignition"]) standstill = sm['carState'].standstill if sm.seen['carState'] else False
msg.deviceState.fanSpeedPercentDesired = fan_controller.update(all_comp_temp, onroad_conditions["ignition"], standstill)
is_offroad_for_5_min = (started_ts is None) and ((not started_seen) or (off_ts is None) or (time.monotonic() - off_ts > 60 * 5)) is_offroad_for_5_min = (started_ts is None) and ((not started_seen) or (off_ts is None) or (time.monotonic() - off_ts > 60 * 5))
if is_offroad_for_5_min and offroad_comp_temp > OFFROAD_DANGER_TEMP: if is_offroad_for_5_min and offroad_comp_temp > OFFROAD_DANGER_TEMP:

Binary file not shown.

View File

@@ -101,6 +101,39 @@ def delete_oldest_video():
return False return False
# CLEARPILOT: max total size for /data/log2 session logs
LOG2_MAX_BYTES = 4 * 1024 * 1024 * 1024
def cleanup_log2():
"""Delete oldest session log directories until /data/log2 is under LOG2_MAX_BYTES."""
log_base = "/data/log2"
if not os.path.isdir(log_base):
return
# Get all session dirs sorted oldest first (by name = timestamp)
dirs = []
for entry in sorted(os.listdir(log_base)):
if entry == "current":
continue
path = os.path.join(log_base, entry)
if os.path.isdir(path) and not os.path.islink(path):
size = sum(f.stat().st_size for f in os.scandir(path) if f.is_file())
dirs.append((entry, path, size))
total = sum(s for _, _, s in dirs)
# Also count current session
current = os.path.join(log_base, "current")
if os.path.isdir(current):
total += sum(f.stat().st_size for f in os.scandir(current) if f.is_file())
# Delete oldest until under quota
while total > LOG2_MAX_BYTES and dirs:
entry, path, size = dirs.pop(0)
try:
cloudlog.info(f"deleting log session {path} ({size // 1024 // 1024} MB)")
shutil.rmtree(path)
total -= size
except OSError:
cloudlog.exception(f"issue deleting log {path}")
def deleter_thread(exit_event): def deleter_thread(exit_event):
while not exit_event.is_set(): while not exit_event.is_set():
out_of_bytes = get_available_bytes(default=MIN_BYTES + 1) < MIN_BYTES out_of_bytes = get_available_bytes(default=MIN_BYTES + 1) < MIN_BYTES
@@ -135,6 +168,8 @@ def deleter_thread(exit_event):
cloudlog.exception(f"issue deleting {delete_path}") cloudlog.exception(f"issue deleting {delete_path}")
exit_event.wait(.1) exit_event.wait(.1)
else: else:
# CLEARPILOT: enforce log2 size quota even when disk space is fine
cleanup_log2()
exit_event.wait(30) exit_event.wait(30)