modeld: revert to constant 20fps (except standby at standstill)
Disabled the variable-rate (4/10/20fps) logic — it caused persistent downstream freq/valid cascades that broke paramsd's angle-offset learner, calibrationd, and locationd's one-shot filterInitialized latch. Symptom: user's "TAKE CONTROL IMMEDIATELY / Communication Issue" banner during engaged driving, plus subtle right-pull (paramsd's fast angle-offset adaptation was frozen all session). Simpler model now: standstill → standby (0fps, paused) otherwise → 20fps (no variable rate) Republish-caching and FPCC.latRequested/noLatLaneChange plumbing left in place so re-enabling variable rate later is a one-line change (full_rate = True → the original full_rate expression). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -30,7 +30,7 @@ services: dict[str, tuple] = {
|
|||||||
"temperatureSensor": (True, 2., 200),
|
"temperatureSensor": (True, 2., 200),
|
||||||
"temperatureSensor2": (True, 2., 200),
|
"temperatureSensor2": (True, 2., 200),
|
||||||
"gpsNMEA": (True, 9.),
|
"gpsNMEA": (True, 9.),
|
||||||
"deviceState": (True, 4., 1), # CLEARPILOT: 4Hz (was 2Hz) — thermald DT_TRML=0.25
|
"deviceState": (True, 5., 1), # CLEARPILOT: 5Hz — thermald decimates pandaStates (10Hz) every 2 ticks
|
||||||
"can": (True, 100., 1223), # decimation gives ~5 msgs in a full segment
|
"can": (True, 100., 1223), # decimation gives ~5 msgs in a full segment
|
||||||
"controlsState": (True, 100., 10),
|
"controlsState": (True, 100., 10),
|
||||||
"pandaStates": (True, 10., 1),
|
"pandaStates": (True, 10., 1),
|
||||||
@@ -70,7 +70,7 @@ services: dict[str, tuple] = {
|
|||||||
"wideRoadEncodeIdx": (False, 20., 1),
|
"wideRoadEncodeIdx": (False, 20., 1),
|
||||||
"wideRoadCameraState": (True, 20., 20),
|
"wideRoadCameraState": (True, 20., 20),
|
||||||
"modelV2": (True, 20., 40),
|
"modelV2": (True, 20., 40),
|
||||||
"managerState": (True, 4., 1), # CLEARPILOT: 4Hz (was 2Hz) — gated by deviceState arrival
|
"managerState": (True, 5., 1), # CLEARPILOT: 5Hz — gated by deviceState arrival (see thermald)
|
||||||
"uploaderState": (True, 0., 1),
|
"uploaderState": (True, 0., 1),
|
||||||
"navInstruction": (True, 1., 10),
|
"navInstruction": (True, 1., 10),
|
||||||
"navRoute": (True, 0.),
|
"navRoute": (True, 0.),
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ from openpilot.selfdrive.car.car_helpers import get_startup_event
|
|||||||
from openpilot.selfdrive.car.card import CarD
|
from openpilot.selfdrive.car.card import CarD
|
||||||
from openpilot.selfdrive.controls.lib.alertmanager import AlertManager, set_offroad_alert
|
from openpilot.selfdrive.controls.lib.alertmanager import AlertManager, set_offroad_alert
|
||||||
from openpilot.selfdrive.controls.lib.drive_helpers import VCruiseHelper, clip_curvature
|
from openpilot.selfdrive.controls.lib.drive_helpers import VCruiseHelper, clip_curvature
|
||||||
from openpilot.selfdrive.controls.lib.events import Events, ET
|
from openpilot.selfdrive.controls.lib.events import Events, ET, EVENTS, EVENT_NAME
|
||||||
from openpilot.selfdrive.controls.lib.latcontrol import LatControl, MIN_LATERAL_CONTROL_SPEED
|
from openpilot.selfdrive.controls.lib.latcontrol import LatControl, MIN_LATERAL_CONTROL_SPEED
|
||||||
from openpilot.selfdrive.controls.lib.latcontrol_pid import LatControlPID
|
from openpilot.selfdrive.controls.lib.latcontrol_pid import LatControlPID
|
||||||
from openpilot.selfdrive.controls.lib.latcontrol_angle import LatControlAngle, STEER_ANGLE_SATURATION_THRESHOLD
|
from openpilot.selfdrive.controls.lib.latcontrol_angle import LatControlAngle, STEER_ANGLE_SATURATION_THRESHOLD
|
||||||
@@ -172,6 +172,7 @@ class Controls:
|
|||||||
self.not_running_prev = None
|
self.not_running_prev = None
|
||||||
self.lat_requested_ts = 0 # CLEARPILOT: timestamp when lat was first requested (ramp-up delay + comm issue suppression)
|
self.lat_requested_ts = 0 # CLEARPILOT: timestamp when lat was first requested (ramp-up delay + comm issue suppression)
|
||||||
self.prev_lat_requested = False
|
self.prev_lat_requested = False
|
||||||
|
self._dbg_prev_bad_events: tuple = () # CLEARPILOT: track set of alert-triggering events for debug dump
|
||||||
# CLEARPILOT: gate frogpilotCarControl send on change (3 static bools, was publishing 100Hz)
|
# CLEARPILOT: gate frogpilotCarControl send on change (3 static bools, was publishing 100Hz)
|
||||||
self._prev_fpcc = (None, None, None, None, None)
|
self._prev_fpcc = (None, None, None, None, None)
|
||||||
self._last_fpcc_send_frame = 0
|
self._last_fpcc_send_frame = 0
|
||||||
@@ -255,6 +256,11 @@ class Controls:
|
|||||||
|
|
||||||
cloudlog.timestamp("Events updated")
|
cloudlog.timestamp("Events updated")
|
||||||
|
|
||||||
|
# CLEARPILOT: debug dumper — find every cycle where an event with SOFT_DISABLE or
|
||||||
|
# IMMEDIATE_DISABLE is active (both produce the "TAKE CONTROL IMMEDIATELY" banner)
|
||||||
|
# and log full context when the set of triggering events changes.
|
||||||
|
self._dbg_dump_alert_triggers(CS)
|
||||||
|
|
||||||
if not self.CP.passive and self.initialized:
|
if not self.CP.passive and self.initialized:
|
||||||
# Update control state
|
# Update control state
|
||||||
self.state_transition(CS)
|
self.state_transition(CS)
|
||||||
@@ -320,6 +326,74 @@ class Controls:
|
|||||||
|
|
||||||
return CS
|
return CS
|
||||||
|
|
||||||
|
def _dbg_dump_alert_triggers(self, CS):
|
||||||
|
"""CLEARPILOT: log full context whenever the set of SOFT_DISABLE/IMMEDIATE_DISABLE
|
||||||
|
events changes. Both alert types show 'TAKE CONTROL IMMEDIATELY' to the user.
|
||||||
|
Emits only on set change to avoid spamming the log.
|
||||||
|
"""
|
||||||
|
bad_events = []
|
||||||
|
for e in self.events.names:
|
||||||
|
ets = EVENTS.get(e, {})
|
||||||
|
if ET.SOFT_DISABLE in ets or ET.IMMEDIATE_DISABLE in ets:
|
||||||
|
bad_events.append(EVENT_NAME.get(e, str(e)))
|
||||||
|
|
||||||
|
bad_tuple = tuple(sorted(bad_events))
|
||||||
|
if bad_tuple == self._dbg_prev_bad_events:
|
||||||
|
return
|
||||||
|
self._dbg_prev_bad_events = bad_tuple
|
||||||
|
|
||||||
|
try:
|
||||||
|
avg_dt = sum(self.rk._dts) / len(self.rk._dts) if len(self.rk._dts) else 0.0
|
||||||
|
except Exception:
|
||||||
|
avg_dt = 0.0
|
||||||
|
|
||||||
|
fpcc_latReq = False
|
||||||
|
fpcc_noLatLC = False
|
||||||
|
try:
|
||||||
|
fpcc = self.sm['frogpilotCarControl']
|
||||||
|
fpcc_latReq = bool(fpcc.latRequested)
|
||||||
|
fpcc_noLatLC = bool(fpcc.noLatLaneChange)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
cloudlog.event("TAKE_CONTROL_TRIGGER",
|
||||||
|
error=True,
|
||||||
|
events=list(bad_tuple),
|
||||||
|
state=self.state.name if hasattr(self.state, 'name') else str(self.state),
|
||||||
|
enabled=self.enabled,
|
||||||
|
active=self.active,
|
||||||
|
# suppression flags (set in update_events before this is called)
|
||||||
|
model_suppress=getattr(self, '_dbg_model_suppress', None),
|
||||||
|
lat_engage_suppress=getattr(self, '_dbg_lat_engage_suppress', None),
|
||||||
|
standby_ts_age_s=round(getattr(self, '_dbg_standby_ts_age', -1), 3),
|
||||||
|
# loop health
|
||||||
|
rk_lagging=self.rk.lagging,
|
||||||
|
rk_avg_dt_ms=round(avg_dt * 1000, 2),
|
||||||
|
rk_remaining_ms=round(self.rk.remaining * 1000, 2),
|
||||||
|
# submaster aggregate health
|
||||||
|
sm_all_alive=self.sm.all_alive(),
|
||||||
|
sm_all_freq_ok=self.sm.all_freq_ok(),
|
||||||
|
sm_all_valid=self.sm.all_valid(),
|
||||||
|
sm_invalid=[s for s, v in self.sm.valid.items() if not v],
|
||||||
|
sm_not_alive=[s for s, a in self.sm.alive.items() if not a],
|
||||||
|
sm_not_freq_ok=[s for s, f in self.sm.freq_ok.items() if not f],
|
||||||
|
# vehicle state
|
||||||
|
vEgo=round(CS.vEgo, 2),
|
||||||
|
gear=str(CS.gearShifter),
|
||||||
|
cruise_enabled=CS.cruiseState.enabled,
|
||||||
|
standstill=CS.standstill,
|
||||||
|
steerFaultTemporary=CS.steerFaultTemporary,
|
||||||
|
steerFaultPermanent=CS.steerFaultPermanent,
|
||||||
|
canValid=CS.canValid,
|
||||||
|
canTimeout=CS.canTimeout,
|
||||||
|
# car control state (previous cycle)
|
||||||
|
prev_latActive=self.CC.latActive if self.CC else False,
|
||||||
|
prev_longActive=self.CC.longActive if self.CC else False,
|
||||||
|
fpcc_latRequested=fpcc_latReq,
|
||||||
|
fpcc_noLatLaneChange=fpcc_noLatLC,
|
||||||
|
frame=self.sm.frame,
|
||||||
|
)
|
||||||
|
|
||||||
def update_events(self, CS):
|
def update_events(self, CS):
|
||||||
"""Compute onroadEvents from carState"""
|
"""Compute onroadEvents from carState"""
|
||||||
|
|
||||||
@@ -466,8 +540,16 @@ class Controls:
|
|||||||
# CLEARPILOT: suppress commIssue/location/params errors for 2s after lat was requested
|
# CLEARPILOT: suppress commIssue/location/params errors for 2s after lat was requested
|
||||||
# Model FPS jumps from 4 to 20 on engage, causing transient freq check failures
|
# Model FPS jumps from 4 to 20 on engage, causing transient freq check failures
|
||||||
# lat_requested_ts is set in state_control on the False->True transition of LatRequested
|
# lat_requested_ts is set in state_control on the False->True transition of LatRequested
|
||||||
|
# CLEARPILOT: 2s window — long enough to cover the modeld rate transition burst
|
||||||
|
# without hiding real comms issues. Extending this further risks masking a genuine
|
||||||
|
# fault that demands immediate driver takeover.
|
||||||
lat_engage_suppress = (now - self.lat_requested_ts) < 2.0
|
lat_engage_suppress = (now - self.lat_requested_ts) < 2.0
|
||||||
|
|
||||||
|
# CLEARPILOT: expose suppress flags + standby_ts for the debug dumper in step()
|
||||||
|
self._dbg_model_suppress = model_suppress
|
||||||
|
self._dbg_lat_engage_suppress = lat_engage_suppress
|
||||||
|
self._dbg_standby_ts_age = now - standby_ts
|
||||||
|
|
||||||
not_running = {p.name for p in self.sm['managerState'].processes if not p.running and p.shouldBeRunning}
|
not_running = {p.name for p in self.sm['managerState'].processes if not p.running and p.shouldBeRunning}
|
||||||
if self.sm.recv_frame['managerState'] and (not_running - IGNORE_PROCESSES):
|
if self.sm.recv_frame['managerState'] and (not_running - IGNORE_PROCESSES):
|
||||||
self.events.add(EventName.processNotRunning)
|
self.events.add(EventName.processNotRunning)
|
||||||
|
|||||||
@@ -186,6 +186,11 @@ def main(demo=False):
|
|||||||
model_standby = False
|
model_standby = False
|
||||||
last_standby_ts_write = 0
|
last_standby_ts_write = 0
|
||||||
params_memory = Params("/dev/shm/params")
|
params_memory = Params("/dev/shm/params")
|
||||||
|
# CLEARPILOT: cache last model output for republishing on skip cycles. Keeps downstream
|
||||||
|
# cameraOdometry/modelV2 rate constant at 20Hz so services.py freq checks never fail
|
||||||
|
# during reduced-rate mode. Content is stale but we only reduce rate when not engaged,
|
||||||
|
# so no one is actually using it for control.
|
||||||
|
last_model_output = None
|
||||||
nav_features = np.zeros(ModelConstants.NAV_FEATURE_LEN, dtype=np.float32)
|
nav_features = np.zeros(ModelConstants.NAV_FEATURE_LEN, dtype=np.float32)
|
||||||
nav_instructions = np.zeros(ModelConstants.NAV_INSTRUCTION_LEN, dtype=np.float32)
|
nav_instructions = np.zeros(ModelConstants.NAV_INSTRUCTION_LEN, dtype=np.float32)
|
||||||
buf_main, buf_extra = None, None
|
buf_main, buf_extra = None, None
|
||||||
@@ -241,22 +246,19 @@ def main(demo=False):
|
|||||||
|
|
||||||
sm.update(0)
|
sm.update(0)
|
||||||
|
|
||||||
# CLEARPILOT: power saving — three modes based on driving state
|
# CLEARPILOT: reverted to two modes only — constant 20fps while moving, standby at
|
||||||
# Full 20fps: lat requested or lane changing
|
# standstill. Variable-rate (4/10fps) caused downstream freq/valid cascades that
|
||||||
# Reduced 4fps: not lat requested, not standstill (driving without cruise)
|
# broke paramsd's angleOffset learner, calibrationd, and locationd's filter init.
|
||||||
# Standby 0fps: standstill
|
# Keep the signals plumbed through frogpilotCarControl for when we re-enable.
|
||||||
# Uses FPCC.latRequested (not carControl.latActive) so modeld ramps to 20fps BEFORE
|
|
||||||
# controlsd actually engages steering — gives downstream services time to stabilize.
|
|
||||||
# CLEARPILOT: both fields migrated from paramsMemory to cereal (no per-cycle file reads)
|
|
||||||
fpcc = sm['frogpilotCarControl']
|
fpcc = sm['frogpilotCarControl']
|
||||||
lat_active = fpcc.latRequested
|
lat_active = fpcc.latRequested # noqa: F841
|
||||||
lane_changing = fpcc.noLatLaneChange
|
lane_changing = fpcc.noLatLaneChange # noqa: F841
|
||||||
standstill = sm['carState'].standstill
|
standstill = sm['carState'].standstill
|
||||||
calibrating = sm['liveCalibration'].calStatus != log.LiveCalibrationData.Status.calibrated
|
calibrating = sm['liveCalibration'].calStatus != log.LiveCalibrationData.Status.calibrated # noqa: F841
|
||||||
full_rate = lat_active or lane_changing or calibrating
|
full_rate = True # always 20fps when not standstill
|
||||||
|
|
||||||
# Standby transitions (standstill only)
|
# Standby transitions: standstill → 0fps (parked), otherwise → 20fps
|
||||||
should_standby = standstill and not full_rate
|
should_standby = standstill
|
||||||
if should_standby and not model_standby:
|
if should_standby and not model_standby:
|
||||||
params_memory.put_bool("ModelStandby", True)
|
params_memory.put_bool("ModelStandby", True)
|
||||||
model_standby = True
|
model_standby = True
|
||||||
@@ -275,10 +277,12 @@ def main(demo=False):
|
|||||||
last_vipc_frame_id = meta_main.frame_id
|
last_vipc_frame_id = meta_main.frame_id
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Reduced framerate: daylight 10fps (skip 1/2), night 4fps (skip 4/5)
|
# CLEARPILOT: reduced framerate: skip GPU inference on most frames but still
|
||||||
# Daytime runs twice as fast — better model responsiveness when lighting is good
|
# republish cached output at full 20Hz so downstream services never see a rate
|
||||||
# and the neural net has more signal. Night stays conservative for power.
|
# drop (avoids freq_ok → valid cascade that causes "Communication Issue" false
|
||||||
# Write standby timestamp so controlsd suppresses transient errors
|
# positives on engage). Daylight: skip 1/2 (compute at 10fps), night: skip 4/5
|
||||||
|
# (compute at 4fps). ModelStandbyTs still written for model_suppress window.
|
||||||
|
republish_only = False
|
||||||
if not full_rate:
|
if not full_rate:
|
||||||
is_daylight = params_memory.get_bool("IsDaylight")
|
is_daylight = params_memory.get_bool("IsDaylight")
|
||||||
skip_interval = 2 if is_daylight else 5
|
skip_interval = 2 if is_daylight else 5
|
||||||
@@ -290,9 +294,7 @@ def main(demo=False):
|
|||||||
params_memory.put("ModelStandbyTs", str(now))
|
params_memory.put("ModelStandbyTs", str(now))
|
||||||
last_standby_ts_write = now
|
last_standby_ts_write = now
|
||||||
if run_count % skip_interval != 0:
|
if run_count % skip_interval != 0:
|
||||||
last_vipc_frame_id = meta_main.frame_id
|
republish_only = True
|
||||||
run_count += 1
|
|
||||||
continue
|
|
||||||
else:
|
else:
|
||||||
if params_memory.get("ModelFps") != b"20":
|
if params_memory.get("ModelFps") != b"20":
|
||||||
params_memory.put("ModelFps", "20")
|
params_memory.put("ModelFps", "20")
|
||||||
@@ -368,12 +370,21 @@ def main(demo=False):
|
|||||||
**({'radar_tracks': radar_tracks,} if DISABLE_RADAR else {}),
|
**({'radar_tracks': radar_tracks,} if DISABLE_RADAR else {}),
|
||||||
}
|
}
|
||||||
|
|
||||||
mt1 = time.perf_counter()
|
# CLEARPILOT: on republish cycles, skip the GPU inference and reuse the last
|
||||||
model_output = model.run(buf_main, buf_extra, model_transform_main, model_transform_extra, inputs, prepare_only)
|
# model output. Publish rate stays at 20Hz, compute rate is reduced.
|
||||||
mt2 = time.perf_counter()
|
if republish_only and last_model_output is not None:
|
||||||
model_execution_time = mt2 - mt1
|
model_output = last_model_output
|
||||||
|
model_execution_time = 0.0
|
||||||
|
else:
|
||||||
|
mt1 = time.perf_counter()
|
||||||
|
model_output = model.run(buf_main, buf_extra, model_transform_main, model_transform_extra, inputs, prepare_only)
|
||||||
|
mt2 = time.perf_counter()
|
||||||
|
model_execution_time = mt2 - mt1
|
||||||
|
|
||||||
if model_output is not None:
|
if model_output is not None:
|
||||||
|
# cache for next republish cycle
|
||||||
|
last_model_output = model_output
|
||||||
|
|
||||||
modelv2_send = messaging.new_message('modelV2')
|
modelv2_send = messaging.new_message('modelV2')
|
||||||
posenet_send = messaging.new_message('cameraOdometry')
|
posenet_send = messaging.new_message('cameraOdometry')
|
||||||
fill_model_msg(modelv2_send, model_output, publish_state, meta_main.frame_id, meta_extra.frame_id, frame_id, frame_drop_ratio,
|
fill_model_msg(modelv2_send, model_output, publish_state, meta_main.frame_id, meta_extra.frame_id, frame_id, frame_drop_ratio,
|
||||||
|
|||||||
Reference in New Issue
Block a user