controlsd: longer park-grace cap + 3s suppression on engage attempt

Two-part fix for the alert burst on first engage:

1. Park-exit grace cap 8s -> 15s. The cold-spawn chain (modeld load
   thneed -> publish modelV2 -> plannerd publish longitudinalPlan ->
   frogpilot_process publish frogpilotPlan) often takes more than 8s,
   so the previous cap let commIssue start firing right when the user
   was trying to engage.

2. Track CS.cruiseState.enabled rising edge as last_engage_attempt_frame
   (separate from last_engaged_frame which only fires on successful
   transition into ENABLED_STATES). Suppress both controlsdLagging and
   commIssue events for the first 3s after either edge.

   Why two edges: NO_ENTRY alerts fire on engage *attempt* (before
   self.enabled flips), so the post-success grace alone doesn't cover
   the case where the user presses SET and gets blocked. Tracking the
   cruise-enabled rising edge in update_events (using self.CS_prev)
   catches that.

Result: pressing SET no longer pops "Controls Process Lagging: Reboot
Your Device" or "Communication Issue Between Processes" for the brief
window between the engage attempt and services settling. Real lag /
comm issues that persist beyond 3s still alert normally.
This commit is contained in:
2026-05-04 20:03:06 -05:00
parent 8e4134c4ed
commit 1e4e95ca6b
+43 -18
View File
@@ -98,7 +98,7 @@ class Controls:
self.park_mode = False self.park_mode = False
self.park_exit_frame = -1 self.park_exit_frame = -1
self.startup_complete_frame = -1 self.startup_complete_frame = -1
self.PARK_GRACE_MAX_FRAMES = int(8.0 / DT_CTRL) self.PARK_GRACE_MAX_FRAMES = int(15.0 / DT_CTRL)
self.PARK_STARTUP_DELAY_FRAMES = int(10.0 / DT_CTRL) self.PARK_STARTUP_DELAY_FRAMES = int(10.0 / DT_CTRL)
self.radarless_model = self.params.get("Model", encoding='utf-8') in RADARLESS_MODELS self.radarless_model = self.params.get("Model", encoding='utf-8') in RADARLESS_MODELS
@@ -174,11 +174,20 @@ class Controls:
self.state = State.disabled self.state = State.disabled
self.enabled = False self.enabled = False
self.active = False self.active = False
# CLEARPILOT: track engagement edge for the post-engage controlsdLagging # CLEARPILOT: track engagement edge for the post-engage suppression
# suppression window (the loop briefly lags during state transition into # window. Two edges:
# enabled, which would otherwise pop "Controls Process Lagging: Reboot # - last_engage_attempt_frame: rises with CS.cruiseState.enabled
# Your Device" right as the user lets the wheel go). # (covers blocked NO_ENTRY attempts — alerts fire BEFORE engage)
# - last_engaged_frame: rises with self.enabled
# (covers state-transition lag once engagement succeeds)
# The grace suppresses controlsdLagging + commIssue for 3 s after either
# edge — long enough for the loop to settle past the engage transition
# and for any briefly-stale services (longitudinalPlan, frogpilotPlan)
# to catch up. Without this, the user gets a flash of scary "Reboot Your
# Device" and "Communication Issue" alerts right as their hands are
# still on the wheel after pressing SET.
self.last_engaged_frame = -1 self.last_engaged_frame = -1
self.last_engage_attempt_frame = -1
self.POST_ENGAGE_LAG_GRACE_FRAMES = int(3.0 / DT_CTRL) self.POST_ENGAGE_LAG_GRACE_FRAMES = int(3.0 / DT_CTRL)
self.soft_disable_timer = 0 self.soft_disable_timer = 0
self.mismatch_counter = 0 self.mismatch_counter = 0
@@ -349,6 +358,12 @@ class Controls:
self.events.clear() self.events.clear()
# CLEARPILOT: capture rising edge of cruise.enabled so we can suppress
# noisy startup alerts (controlsdLagging, commIssue) for the first 3
# seconds after the user presses SET. self.CS_prev is last cycle's CS.
if CS.cruiseState.enabled and not self.CS_prev.cruiseState.enabled:
self.last_engage_attempt_frame = self.sm.frame
# Add joystick event, static on cars, dynamic on nonCars # Add joystick event, static on cars, dynamic on nonCars
if self.joystick_mode: if self.joystick_mode:
self.events.add(EventName.joystickDebug) self.events.add(EventName.joystickDebug)
@@ -491,14 +506,20 @@ class Controls:
self.events.add(EventName.cameraMalfunction) self.events.add(EventName.cameraMalfunction)
elif not self.sm.all_freq_ok(self.camera_packets): elif not self.sm.all_freq_ok(self.camera_packets):
self.events.add(EventName.cameraFrameRate) self.events.add(EventName.cameraFrameRate)
# CLEARPILOT: 3-second grace after either edge of "engagement" — covers
# both the cruise SET press (which fires before state_transition runs,
# so it can trigger NO_ENTRY alerts that block engagement) and the
# post-engage state transition lag.
in_engage_grace = (
(self.last_engaged_frame >= 0
and (self.sm.frame - self.last_engaged_frame) < self.POST_ENGAGE_LAG_GRACE_FRAMES)
or
(self.last_engage_attempt_frame >= 0
and (self.sm.frame - self.last_engage_attempt_frame) < self.POST_ENGAGE_LAG_GRACE_FRAMES)
)
if not REPLAY and self.rk.lagging: if not REPLAY and self.rk.lagging:
# CLEARPILOT: suppress controlsdLagging for the first 3 seconds after if not in_engage_grace:
# engagement — the loop briefly lags during the state transition into
# enabled, which otherwise pops a scary "Reboot Your Device" alert
# just as the user is letting their hands off the wheel.
in_post_engage_lag_grace = (self.last_engaged_frame >= 0
and (self.sm.frame - self.last_engaged_frame) < self.POST_ENGAGE_LAG_GRACE_FRAMES)
if not in_post_engage_lag_grace:
self.events.add(EventName.controlsdLagging) self.events.add(EventName.controlsdLagging)
if not self.radarless_model: if not self.radarless_model:
if len(self.sm['radarState'].radarErrors) or (not self.rk.lagging and not self.sm.all_checks(['radarState'])): if len(self.sm['radarState'].radarErrors) or (not self.rk.lagging and not self.sm.all_checks(['radarState'])):
@@ -514,12 +535,16 @@ class Controls:
has_disable_events = self.events.contains(ET.NO_ENTRY) and (self.events.contains(ET.SOFT_DISABLE) or self.events.contains(ET.IMMEDIATE_DISABLE)) has_disable_events = self.events.contains(ET.NO_ENTRY) and (self.events.contains(ET.SOFT_DISABLE) or self.events.contains(ET.IMMEDIATE_DISABLE))
no_system_errors = (not has_disable_events) or (len(self.events) == num_events) no_system_errors = (not has_disable_events) or (len(self.events) == num_events)
if (not self.sm.all_checks() or self.card.can_rcv_timeout) and no_system_errors: if (not self.sm.all_checks() or self.card.can_rcv_timeout) and no_system_errors:
if not self.sm.all_alive(): # CLEARPILOT: suppress the commIssue NO_ENTRY alert for the first 3 s
self.events.add(EventName.commIssue) # after engage attempt — services like longitudinalPlan / frogpilotPlan
elif not self.sm.all_freq_ok(): # often need a beat to validate after a cold spawn.
self.events.add(EventName.commIssueAvgFreq) if not in_engage_grace:
else: # invalid or can_rcv_timeout. if not self.sm.all_alive():
self.events.add(EventName.commIssue) self.events.add(EventName.commIssue)
elif not self.sm.all_freq_ok():
self.events.add(EventName.commIssueAvgFreq)
else: # invalid or can_rcv_timeout.
self.events.add(EventName.commIssue)
logs = { logs = {
'invalid': [s for s, valid in self.sm.valid.items() if not valid], 'invalid': [s for s, valid in self.sm.valid.items() if not valid],