modeld standby, fan standstill clamping, log rotation, diagnostic logging
- modeld standby: skip inference at standstill (model stays loaded in GPU), ModelStandby param + ModelStandbyTs heartbeat for race-free suppression - controlsd: suppress commIssue/modeldLagging when ModelStandbyTs < 2s old, ignore telemetryd/dashcamd in process_not_running check - Fan controller: standstill below 74°C clamps to 0-10% (near silent), standstill above 74°C allows 0-100%, thermald reads carState.standstill - Deleter: enforce 4GB quota on /data/log2 session logs, oldest-first cleanup - Diagnostic logging: steerTempUnavailable and controlsdLagging log to stderr with full context (steering angle, torque, speed, remaining time) - CLAUDE.md: document memory params method name difference (C++ camelCase vs Python snake_case) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -101,7 +101,8 @@ ClearPilot uses memory params (`/dev/shm/params/d/`) for UI toggles that should
|
|||||||
- **Python access**: Use `Params("/dev/shm/params")`
|
- **Python access**: Use `Params("/dev/shm/params")`
|
||||||
- **Defaults**: Set in `manager_init()` via `Params("/dev/shm/params").put(key, value)`
|
- **Defaults**: Set in `manager_init()` via `Params("/dev/shm/params").put(key, value)`
|
||||||
- **UI toggles**: Use `ToggleControl` with manual `toggleFlipped` lambda that writes via `Params("/dev/shm/params")`. Do NOT use `ParamControl` for memory params — it reads/writes persistent params only
|
- **UI toggles**: Use `ToggleControl` with manual `toggleFlipped` lambda that writes via `Params("/dev/shm/params")`. Do NOT use `ParamControl` for memory params — it reads/writes persistent params only
|
||||||
- **Current memory params**: `TelemetryEnabled` (default "0"), `VpnEnabled` (default "1"), `ScreenDisplayMode`
|
- **Current memory params**: `TelemetryEnabled` (default "0"), `VpnEnabled` (default "1"), `ModelStandby` (default "0"), `ScreenDisplayMode`
|
||||||
|
- **IMPORTANT — method names differ between C++ and Python**: C++ uses camelCase (`putBool`, `getBool`, `getInt`), Python uses snake_case (`put_bool`, `get_bool`, `get_int`). This is a common source of silent failures — the wrong casing compiles/runs but doesn't work.
|
||||||
|
|
||||||
### Building Native (C++) Processes
|
### Building Native (C++) Processes
|
||||||
|
|
||||||
|
|||||||
@@ -198,6 +198,8 @@ std::unordered_map<std::string, uint32_t> keys = {
|
|||||||
{"TelemetryEnabled", PERSISTENT},
|
{"TelemetryEnabled", PERSISTENT},
|
||||||
{"Timezone", PERSISTENT},
|
{"Timezone", PERSISTENT},
|
||||||
{"TrainingVersion", PERSISTENT},
|
{"TrainingVersion", PERSISTENT},
|
||||||
|
{"ModelStandby", PERSISTENT},
|
||||||
|
{"ModelStandbyTs", PERSISTENT},
|
||||||
{"UbloxAvailable", PERSISTENT},
|
{"UbloxAvailable", PERSISTENT},
|
||||||
{"VpnEnabled", PERSISTENT},
|
{"VpnEnabled", PERSISTENT},
|
||||||
{"UpdateAvailable", CLEAR_ON_MANAGER_START | CLEAR_ON_ONROAD_TRANSITION},
|
{"UpdateAvailable", CLEAR_ON_MANAGER_START | CLEAR_ON_ONROAD_TRANSITION},
|
||||||
|
|||||||
@@ -484,10 +484,19 @@ class CarInterfaceBase(ABC):
|
|||||||
self.silent_steer_warning = True
|
self.silent_steer_warning = True
|
||||||
events.add(EventName.steerTempUnavailableSilent)
|
events.add(EventName.steerTempUnavailableSilent)
|
||||||
else:
|
else:
|
||||||
|
# CLEARPILOT: log once per instance of this warning
|
||||||
|
if not getattr(self, '_steer_fault_logged', False):
|
||||||
|
import sys
|
||||||
|
print(f"CLP steerTempUnavailable: steerFaultTemporary={cs_out.steerFaultTemporary} "
|
||||||
|
f"steeringPressed={cs_out.steeringPressed} standstill={cs_out.standstill} "
|
||||||
|
f"steering_unpressed={self.steering_unpressed} steeringAngleDeg={cs_out.steeringAngleDeg:.1f} "
|
||||||
|
f"steeringTorque={cs_out.steeringTorque:.1f} vEgo={cs_out.vEgo:.2f}", file=sys.stderr)
|
||||||
|
self._steer_fault_logged = True
|
||||||
events.add(EventName.steerTempUnavailable)
|
events.add(EventName.steerTempUnavailable)
|
||||||
else:
|
else:
|
||||||
self.no_steer_warning = False
|
self.no_steer_warning = False
|
||||||
self.silent_steer_warning = False
|
self.silent_steer_warning = False
|
||||||
|
self._steer_fault_logged = False
|
||||||
if cs_out.steerFaultPermanent:
|
if cs_out.steerFaultPermanent:
|
||||||
events.add(EventName.steerUnavailable)
|
events.add(EventName.steerUnavailable)
|
||||||
|
|
||||||
|
|||||||
Binary file not shown.
@@ -48,7 +48,7 @@ CAMERA_OFFSET = 0.04
|
|||||||
REPLAY = "REPLAY" in os.environ
|
REPLAY = "REPLAY" in os.environ
|
||||||
SIMULATION = "SIMULATION" in os.environ
|
SIMULATION = "SIMULATION" in os.environ
|
||||||
TESTING_CLOSET = "TESTING_CLOSET" in os.environ
|
TESTING_CLOSET = "TESTING_CLOSET" in os.environ
|
||||||
IGNORE_PROCESSES = {"loggerd", "encoderd", "statsd"}
|
IGNORE_PROCESSES = {"loggerd", "encoderd", "statsd", "telemetryd", "dashcamd"}
|
||||||
|
|
||||||
ThermalStatus = log.DeviceState.ThermalStatus
|
ThermalStatus = log.DeviceState.ThermalStatus
|
||||||
State = log.ControlsState.OpenpilotState
|
State = log.ControlsState.OpenpilotState
|
||||||
@@ -455,6 +455,8 @@ class Controls:
|
|||||||
elif not self.sm.all_freq_ok(self.camera_packets):
|
elif not self.sm.all_freq_ok(self.camera_packets):
|
||||||
self.events.add(EventName.cameraFrameRate)
|
self.events.add(EventName.cameraFrameRate)
|
||||||
if not REPLAY and self.rk.lagging:
|
if not REPLAY and self.rk.lagging:
|
||||||
|
import sys
|
||||||
|
print(f"CLP controlsdLagging: remaining={self.rk.remaining:.4f} standstill={CS.standstill} vEgo={CS.vEgo:.2f}", file=sys.stderr)
|
||||||
self.events.add(EventName.controlsdLagging)
|
self.events.add(EventName.controlsdLagging)
|
||||||
if not self.radarless_model:
|
if not self.radarless_model:
|
||||||
if len(self.sm['radarState'].radarErrors) or (not self.rk.lagging and not self.sm.all_checks(['radarState'])):
|
if len(self.sm['radarState'].radarErrors) or (not self.rk.lagging and not self.sm.all_checks(['radarState'])):
|
||||||
@@ -467,9 +469,16 @@ class Controls:
|
|||||||
self.events.add(EventName.canError)
|
self.events.add(EventName.canError)
|
||||||
|
|
||||||
# generic catch-all. ideally, a more specific event should be added above instead
|
# generic catch-all. ideally, a more specific event should be added above instead
|
||||||
|
# CLEARPILOT: skip comm check when modeld was recently in standby
|
||||||
|
# ModelStandbyTs is written every 1s while in standby — if < 2s old, suppress
|
||||||
|
try:
|
||||||
|
standby_ts = float(self.params_memory.get("ModelStandbyTs") or "0")
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
standby_ts = 0
|
||||||
|
model_suppress = (time.monotonic() - standby_ts) < 2.0
|
||||||
has_disable_events = self.events.contains(ET.NO_ENTRY) and (self.events.contains(ET.SOFT_DISABLE) or self.events.contains(ET.IMMEDIATE_DISABLE))
|
has_disable_events = self.events.contains(ET.NO_ENTRY) and (self.events.contains(ET.SOFT_DISABLE) or self.events.contains(ET.IMMEDIATE_DISABLE))
|
||||||
no_system_errors = (not has_disable_events) or (len(self.events) == num_events)
|
no_system_errors = (not has_disable_events) or (len(self.events) == num_events)
|
||||||
if (not self.sm.all_checks() or self.card.can_rcv_timeout) and no_system_errors:
|
if (not self.sm.all_checks() or self.card.can_rcv_timeout) and no_system_errors and not model_suppress:
|
||||||
if not self.sm.all_alive():
|
if not self.sm.all_alive():
|
||||||
self.events.add(EventName.commIssue)
|
self.events.add(EventName.commIssue)
|
||||||
elif not self.sm.all_freq_ok():
|
elif not self.sm.all_freq_ok():
|
||||||
@@ -542,7 +551,7 @@ class Controls:
|
|||||||
self.distance_traveled = 0
|
self.distance_traveled = 0
|
||||||
self.distance_traveled += CS.vEgo * DT_CTRL
|
self.distance_traveled += CS.vEgo * DT_CTRL
|
||||||
|
|
||||||
if self.sm['modelV2'].frameDropPerc > 20:
|
if self.sm['modelV2'].frameDropPerc > 20 and not model_suppress:
|
||||||
self.events.add(EventName.modeldLagging)
|
self.events.add(EventName.modeldLagging)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -87,6 +87,8 @@ def manager_init(frogpilot_functions) -> None:
|
|||||||
params_memory = Params("/dev/shm/params")
|
params_memory = Params("/dev/shm/params")
|
||||||
params_memory.put("TelemetryEnabled", "0")
|
params_memory.put("TelemetryEnabled", "0")
|
||||||
params_memory.put("VpnEnabled", "1")
|
params_memory.put("VpnEnabled", "1")
|
||||||
|
params_memory.put("ModelStandby", "0")
|
||||||
|
params_memory.put("ModelStandbyTs", "0")
|
||||||
params.clear_all(ParamKeyType.CLEAR_ON_ONROAD_TRANSITION)
|
params.clear_all(ParamKeyType.CLEAR_ON_ONROAD_TRANSITION)
|
||||||
params.clear_all(ParamKeyType.CLEAR_ON_OFFROAD_TRANSITION)
|
params.clear_all(ParamKeyType.CLEAR_ON_OFFROAD_TRANSITION)
|
||||||
if is_release_branch():
|
if is_release_branch():
|
||||||
|
|||||||
@@ -134,11 +134,15 @@ def main(demo=False):
|
|||||||
setproctitle(PROCESS_NAME)
|
setproctitle(PROCESS_NAME)
|
||||||
config_realtime_process(7, 54)
|
config_realtime_process(7, 54)
|
||||||
|
|
||||||
|
import time as _time
|
||||||
cloudlog.warning("setting up CL context")
|
cloudlog.warning("setting up CL context")
|
||||||
|
_t0 = _time.monotonic()
|
||||||
cl_context = CLContext()
|
cl_context = CLContext()
|
||||||
cloudlog.warning("CL context ready; loading model")
|
_t1 = _time.monotonic()
|
||||||
|
cloudlog.warning("CL context ready in %.3fs; loading model", _t1 - _t0)
|
||||||
model = ModelState(cl_context)
|
model = ModelState(cl_context)
|
||||||
cloudlog.warning("models loaded, modeld starting")
|
_t2 = _time.monotonic()
|
||||||
|
cloudlog.warning("model loaded in %.3fs (total init %.3fs), modeld starting", _t2 - _t1, _t2 - _t0)
|
||||||
|
|
||||||
# visionipc clients
|
# visionipc clients
|
||||||
while True:
|
while True:
|
||||||
@@ -179,6 +183,9 @@ def main(demo=False):
|
|||||||
model_transform_main = np.zeros((3, 3), dtype=np.float32)
|
model_transform_main = np.zeros((3, 3), dtype=np.float32)
|
||||||
model_transform_extra = np.zeros((3, 3), dtype=np.float32)
|
model_transform_extra = np.zeros((3, 3), dtype=np.float32)
|
||||||
live_calib_seen = False
|
live_calib_seen = False
|
||||||
|
model_standby = False
|
||||||
|
last_standby_ts_write = 0
|
||||||
|
params_memory = Params("/dev/shm/params")
|
||||||
nav_features = np.zeros(ModelConstants.NAV_FEATURE_LEN, dtype=np.float32)
|
nav_features = np.zeros(ModelConstants.NAV_FEATURE_LEN, dtype=np.float32)
|
||||||
nav_instructions = np.zeros(ModelConstants.NAV_INSTRUCTION_LEN, dtype=np.float32)
|
nav_instructions = np.zeros(ModelConstants.NAV_INSTRUCTION_LEN, dtype=np.float32)
|
||||||
buf_main, buf_extra = None, None
|
buf_main, buf_extra = None, None
|
||||||
@@ -233,6 +240,27 @@ def main(demo=False):
|
|||||||
meta_extra = meta_main
|
meta_extra = meta_main
|
||||||
|
|
||||||
sm.update(0)
|
sm.update(0)
|
||||||
|
|
||||||
|
# CLEARPILOT: standstill power saving — skip inference, keep model loaded
|
||||||
|
standstill = sm['carState'].standstill
|
||||||
|
if standstill and not model_standby:
|
||||||
|
params_memory.put_bool("ModelStandby", True)
|
||||||
|
model_standby = True
|
||||||
|
cloudlog.warning("modeld: standby ON (standstill)")
|
||||||
|
elif not standstill and model_standby:
|
||||||
|
params_memory.put_bool("ModelStandby", False)
|
||||||
|
model_standby = False
|
||||||
|
run_count = 0
|
||||||
|
frame_dropped_filter.x = 0.
|
||||||
|
cloudlog.warning("modeld: standby OFF")
|
||||||
|
if model_standby:
|
||||||
|
now = _time.monotonic()
|
||||||
|
if now - last_standby_ts_write > 1.0:
|
||||||
|
params_memory.put("ModelStandbyTs", str(now))
|
||||||
|
last_standby_ts_write = now
|
||||||
|
last_vipc_frame_id = meta_main.frame_id
|
||||||
|
continue
|
||||||
|
|
||||||
desire = DH.desire
|
desire = DH.desire
|
||||||
is_rhd = sm["driverMonitoringState"].isRHD
|
is_rhd = sm["driverMonitoringState"].isRHD
|
||||||
frame_id = sm["roadCameraState"].frameId
|
frame_id = sm["roadCameraState"].frameId
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ from openpilot.selfdrive.controls.lib.pid import PIDController
|
|||||||
|
|
||||||
class BaseFanController(ABC):
|
class BaseFanController(ABC):
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def update(self, cur_temp: float, ignition: bool) -> int:
|
def update(self, cur_temp: float, ignition: bool, standstill: bool = False) -> int:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
@@ -20,9 +20,21 @@ class TiciFanController(BaseFanController):
|
|||||||
self.last_ignition = False
|
self.last_ignition = False
|
||||||
self.controller = PIDController(k_p=0, k_i=4e-3, k_f=1, rate=(1 / DT_TRML))
|
self.controller = PIDController(k_p=0, k_i=4e-3, k_f=1, rate=(1 / DT_TRML))
|
||||||
|
|
||||||
def update(self, cur_temp: float, ignition: bool) -> int:
|
def update(self, cur_temp: float, ignition: bool, standstill: bool = False) -> int:
|
||||||
self.controller.neg_limit = -(100 if ignition else 30)
|
# CLEARPILOT: at standstill below 74°C, clamp to 0-10% (near silent)
|
||||||
self.controller.pos_limit = -(30 if ignition else 0)
|
# at standstill above 74°C, allow full 0-100% range
|
||||||
|
if ignition and standstill and cur_temp < 74:
|
||||||
|
self.controller.neg_limit = -10
|
||||||
|
self.controller.pos_limit = 0
|
||||||
|
elif ignition and standstill:
|
||||||
|
self.controller.neg_limit = -100
|
||||||
|
self.controller.pos_limit = 0
|
||||||
|
elif ignition:
|
||||||
|
self.controller.neg_limit = -100
|
||||||
|
self.controller.pos_limit = -30
|
||||||
|
else:
|
||||||
|
self.controller.neg_limit = -30
|
||||||
|
self.controller.pos_limit = 0
|
||||||
|
|
||||||
if ignition != self.last_ignition:
|
if ignition != self.last_ignition:
|
||||||
self.controller.reset()
|
self.controller.reset()
|
||||||
|
|||||||
@@ -164,7 +164,7 @@ def hw_state_thread(end_event, hw_queue):
|
|||||||
|
|
||||||
def thermald_thread(end_event, hw_queue) -> None:
|
def thermald_thread(end_event, hw_queue) -> None:
|
||||||
pm = messaging.PubMaster(['deviceState', 'frogpilotDeviceState'])
|
pm = messaging.PubMaster(['deviceState', 'frogpilotDeviceState'])
|
||||||
sm = messaging.SubMaster(["peripheralState", "gpsLocationExternal", "controlsState", "pandaStates"], poll="pandaStates")
|
sm = messaging.SubMaster(["peripheralState", "gpsLocationExternal", "controlsState", "pandaStates", "carState"], poll="pandaStates")
|
||||||
|
|
||||||
count = 0
|
count = 0
|
||||||
|
|
||||||
@@ -290,7 +290,8 @@ def thermald_thread(end_event, hw_queue) -> None:
|
|||||||
msg.deviceState.maxTempC = all_comp_temp
|
msg.deviceState.maxTempC = all_comp_temp
|
||||||
|
|
||||||
if fan_controller is not None:
|
if fan_controller is not None:
|
||||||
msg.deviceState.fanSpeedPercentDesired = fan_controller.update(all_comp_temp, onroad_conditions["ignition"])
|
standstill = sm['carState'].standstill if sm.seen['carState'] else False
|
||||||
|
msg.deviceState.fanSpeedPercentDesired = fan_controller.update(all_comp_temp, onroad_conditions["ignition"], standstill)
|
||||||
|
|
||||||
is_offroad_for_5_min = (started_ts is None) and ((not started_seen) or (off_ts is None) or (time.monotonic() - off_ts > 60 * 5))
|
is_offroad_for_5_min = (started_ts is None) and ((not started_seen) or (off_ts is None) or (time.monotonic() - off_ts > 60 * 5))
|
||||||
if is_offroad_for_5_min and offroad_comp_temp > OFFROAD_DANGER_TEMP:
|
if is_offroad_for_5_min and offroad_comp_temp > OFFROAD_DANGER_TEMP:
|
||||||
|
|||||||
Binary file not shown.
@@ -101,6 +101,39 @@ def delete_oldest_video():
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
# CLEARPILOT: max total size for /data/log2 session logs
|
||||||
|
LOG2_MAX_BYTES = 4 * 1024 * 1024 * 1024
|
||||||
|
|
||||||
|
def cleanup_log2():
|
||||||
|
"""Delete oldest session log directories until /data/log2 is under LOG2_MAX_BYTES."""
|
||||||
|
log_base = "/data/log2"
|
||||||
|
if not os.path.isdir(log_base):
|
||||||
|
return
|
||||||
|
# Get all session dirs sorted oldest first (by name = timestamp)
|
||||||
|
dirs = []
|
||||||
|
for entry in sorted(os.listdir(log_base)):
|
||||||
|
if entry == "current":
|
||||||
|
continue
|
||||||
|
path = os.path.join(log_base, entry)
|
||||||
|
if os.path.isdir(path) and not os.path.islink(path):
|
||||||
|
size = sum(f.stat().st_size for f in os.scandir(path) if f.is_file())
|
||||||
|
dirs.append((entry, path, size))
|
||||||
|
total = sum(s for _, _, s in dirs)
|
||||||
|
# Also count current session
|
||||||
|
current = os.path.join(log_base, "current")
|
||||||
|
if os.path.isdir(current):
|
||||||
|
total += sum(f.stat().st_size for f in os.scandir(current) if f.is_file())
|
||||||
|
# Delete oldest until under quota
|
||||||
|
while total > LOG2_MAX_BYTES and dirs:
|
||||||
|
entry, path, size = dirs.pop(0)
|
||||||
|
try:
|
||||||
|
cloudlog.info(f"deleting log session {path} ({size // 1024 // 1024} MB)")
|
||||||
|
shutil.rmtree(path)
|
||||||
|
total -= size
|
||||||
|
except OSError:
|
||||||
|
cloudlog.exception(f"issue deleting log {path}")
|
||||||
|
|
||||||
|
|
||||||
def deleter_thread(exit_event):
|
def deleter_thread(exit_event):
|
||||||
while not exit_event.is_set():
|
while not exit_event.is_set():
|
||||||
out_of_bytes = get_available_bytes(default=MIN_BYTES + 1) < MIN_BYTES
|
out_of_bytes = get_available_bytes(default=MIN_BYTES + 1) < MIN_BYTES
|
||||||
@@ -135,6 +168,8 @@ def deleter_thread(exit_event):
|
|||||||
cloudlog.exception(f"issue deleting {delete_path}")
|
cloudlog.exception(f"issue deleting {delete_path}")
|
||||||
exit_event.wait(.1)
|
exit_event.wait(.1)
|
||||||
else:
|
else:
|
||||||
|
# CLEARPILOT: enforce log2 size quota even when disk space is fine
|
||||||
|
cleanup_log2()
|
||||||
exit_event.wait(30)
|
exit_event.wait(30)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user