diff --git a/cereal/services.py b/cereal/services.py index 1769e5a..3677501 100755 --- a/cereal/services.py +++ b/cereal/services.py @@ -30,7 +30,7 @@ services: dict[str, tuple] = { "temperatureSensor": (True, 2., 200), "temperatureSensor2": (True, 2., 200), "gpsNMEA": (True, 9.), - "deviceState": (True, 5., 1), # CLEARPILOT: 5Hz — thermald decimates pandaStates (10Hz) every 2 ticks + "deviceState": (True, 2., 1), # CLEARPILOT: matches DT_TRML=0.5 (thermald at 2Hz) "can": (True, 100., 1223), # decimation gives ~5 msgs in a full segment "controlsState": (True, 100., 10), "pandaStates": (True, 10., 1), @@ -70,7 +70,7 @@ services: dict[str, tuple] = { "wideRoadEncodeIdx": (False, 20., 1), "wideRoadCameraState": (True, 20., 20), "modelV2": (True, 20., 40), - "managerState": (True, 5., 1), # CLEARPILOT: 5Hz — gated by deviceState arrival (see thermald) + "managerState": (True, 2., 1), # CLEARPILOT: gated by deviceState arrival, so matches thermald rate "uploaderState": (True, 0., 1), "navInstruction": (True, 1., 10), "navRoute": (True, 0.), diff --git a/common/params.cc b/common/params.cc index 6b1c8d1..3e236bc 100755 --- a/common/params.cc +++ b/common/params.cc @@ -138,6 +138,7 @@ std::unordered_map keys = { {"GsmRoaming", PERSISTENT}, {"HardwareSerial", PERSISTENT}, {"HasAcceptedTerms", PERSISTENT}, + {"IgnitionOn", CLEAR_ON_MANAGER_START}, {"IMEI", PERSISTENT}, {"InstallDate", PERSISTENT}, {"IsDriverViewEnabled", CLEAR_ON_MANAGER_START}, diff --git a/common/realtime.py b/common/realtime.py index b676cd4..aad4baa 100755 --- a/common/realtime.py +++ b/common/realtime.py @@ -12,7 +12,7 @@ from openpilot.system.hardware import PC # time step for each process DT_CTRL = 0.01 # controlsd DT_MDL = 0.05 # model -DT_TRML = 0.25 # thermald and manager — 4 Hz +DT_TRML = 0.5 # thermald and manager DT_DMON = 0.05 # driver monitoring diff --git a/selfdrive/boardd/boardd.cc b/selfdrive/boardd/boardd.cc index c4db1ea..471e920 100755 --- a/selfdrive/boardd/boardd.cc +++ b/selfdrive/boardd/boardd.cc @@ -415,6 +415,7 @@ void panda_state_thread(std::vector pandas, bool spoofing_started) { Panda *peripheral_panda = pandas[0]; bool is_onroad = false; bool is_onroad_last = false; + bool ignition_last = false; std::future safety_future; std::vector connected_serials; @@ -472,8 +473,14 @@ void panda_state_thread(std::vector pandas, bool spoofing_started) { is_onroad = params.getBool("IsOnroad"); - // set new safety on onroad transition, after params are cleared - if (is_onroad && !is_onroad_last) { + // CLEARPILOT: trigger on ignition rising edge instead of IsOnroad rising edge. + // ClearPilot's parked-mode split breaks the stock assumption that IsOnroad + // rises with ignition: IsOnroad now requires `started`, which requires + // thermald to see carState != park, which requires controlsd_parked to + // finish CarD init, which requires this thread to ack OBD multiplexing. + // Firing on ignition restores the original "set safety as soon as the bus + // is alive" timing for both controlsd variants. + if (ignition && !ignition_last) { if (!safety_future.valid() || safety_future.wait_for(0ms) == std::future_status::ready) { safety_future = std::async(std::launch::async, safety_setter_thread, pandas); } else { @@ -482,6 +489,7 @@ void panda_state_thread(std::vector pandas, bool spoofing_started) { } is_onroad_last = is_onroad; + ignition_last = ignition; sm.update(0); const bool engaged = sm.allAliveAndValid({"controlsState"}) && sm["controlsState"].getControlsState().getEnabled(); diff --git a/selfdrive/car/hyundai/carstate.py b/selfdrive/car/hyundai/carstate.py index 3adca8e..3d4d727 100755 --- a/selfdrive/car/hyundai/carstate.py +++ b/selfdrive/car/hyundai/carstate.py @@ -13,7 +13,6 @@ from openpilot.selfdrive.car.hyundai.hyundaicanfd import CanBus from openpilot.selfdrive.car.hyundai.values import HyundaiFlags, CAR, DBC, CAN_GEARS, CAMERA_SCC_CAR, \ CANFD_CAR, Buttons, CarControllerParams from openpilot.selfdrive.car.interfaces import CarStateBase -from openpilot.selfdrive.clearpilot.telemetry import tlog PREV_BUTTON_SAMPLES = 8 CLUSTER_SAMPLE_RATE = 20 # frames @@ -48,10 +47,6 @@ class CarState(CarStateBase): self.is_metric = False self.buttons_counter = 0 - # CLEARPILOT: cache to avoid per-cycle atomic writes to /dev/shm (eats CPU via fsync/flock) - self._prev_car_speed_limit = None - self._prev_car_is_metric = None - self.cruise_info = {} # On some cars, CLU15->CF_Clu_VehicleSpeed can oscillate faster than the dash updates. Sample at 5 Hz @@ -214,15 +209,10 @@ class CarState(CarStateBase): self.lkas_previously_enabled = self.lkas_enabled self.lkas_enabled = cp.vl["BCM_PO_11"]["LFA_Pressed"] - # CLEARPILOT: gate on change — see same fix in update_canfd - car_speed_limit = self.calculate_speed_limit(cp, cp_cam) * speed_conv - if car_speed_limit != self._prev_car_speed_limit: - self.params_memory.put_float("CarSpeedLimit", car_speed_limit) - self._prev_car_speed_limit = car_speed_limit - if self.is_metric != self._prev_car_is_metric: - self.params_memory.put("CarIsMetric", "1" if self.is_metric else "0") - self._prev_car_is_metric = self.is_metric - + # self.params_memory.put_int("CarSpeedLimitLiteral", self.calculate_speed_limit(cp, cp_cam)) + self.params_memory.put_float("CarSpeedLimit", self.calculate_speed_limit(cp, cp_cam) * speed_conv) + self.params_memory.put_float("CarCruiseDisplayActual", cp_cruise.vl["SCC11"]["VSetDis"]) + return ret @@ -425,23 +415,63 @@ class CarState(CarStateBase): # nonAdaptive = false, # speedCluster = 0 ) - # CLEARPILOT: gate on change — these writes run 100Hz, each is an atomic fsync/flock transaction - car_speed_limit = self.calculate_speed_limit(cp, cp_cam) * speed_factor - if car_speed_limit != self._prev_car_speed_limit: - self.params_memory.put_float("CarSpeedLimit", car_speed_limit) - self._prev_car_speed_limit = car_speed_limit - if self.is_metric != self._prev_car_is_metric: - self.params_memory.put("CarIsMetric", "1" if self.is_metric else "0") - self._prev_car_is_metric = self.is_metric + # print("Set limit") + # print(self.calculate_speed_limit(cp, cp_cam)) + # self.params_memory.put_float("CarSpeedLimitLiteral", self.calculate_speed_limit(cp, cp_cam)) + self.params_memory.put_float("CarSpeedLimit", self.calculate_speed_limit(cp, cp_cam) * speed_factor) - # CLEARPILOT: telemetry logging — disabled, re-enable when needed + # CLEARPILOT: CAN-FD telemetry — preserved but disabled. Re-enable by uncommenting (also restore the import). + # from openpilot.selfdrive.clearpilot.telemetry import tlog + # # speed_limit_bus = cp if self.CP.flags & HyundaiFlags.CANFD_HDA2 else cp_cam # scc = cp_cam.vl["SCC_CONTROL"] if self.CP.flags & HyundaiFlags.CANFD_CAMERA_SCC else cp.vl["SCC_CONTROL"] # cluster = speed_limit_bus.vl["CLUSTER_SPEED_LIMIT"] - # tlog("car", { ... }) - # tlog("cruise", { ... }) - # tlog("speed_limit", { ... }) - # tlog("buttons", { ... }) + # + # tlog("car", { + # "vEgo": round(ret.vEgo, 3), + # "vEgoRaw": round(ret.vEgoRaw, 3), + # "aEgo": round(ret.aEgo, 3), + # "steeringAngleDeg": round(ret.steeringAngleDeg, 1), + # "gear": str(ret.gearShifter), + # "brakePressed": ret.brakePressed, + # "gasPressed": ret.gasPressed, + # "standstill": ret.standstill, + # "leftBlinker": ret.leftBlinker, + # "rightBlinker": ret.rightBlinker, + # }) + # + # tlog("cruise", { + # "enabled": ret.cruiseState.enabled, + # "available": ret.cruiseState.available, + # "speed": round(ret.cruiseState.speed, 3), + # "standstill": ret.cruiseState.standstill, + # "accFaulted": ret.accFaulted, + # "ACCMode": scc.get("ACCMode", 0), + # "VSetDis": scc.get("VSetDis", 0), + # "aReqRaw": round(scc.get("aReqRaw", 0), 3), + # "aReqValue": round(scc.get("aReqValue", 0), 3), + # "DISTANCE_SETTING": scc.get("DISTANCE_SETTING", 0), + # "ACC_ObjDist": round(scc.get("ACC_ObjDist", 0), 1), + # }) + # + # tlog("speed_limit", { + # "SPEED_LIMIT_1": cluster.get("SPEED_LIMIT_1", 0), + # "SPEED_LIMIT_2": cluster.get("SPEED_LIMIT_2", 0), + # "SPEED_LIMIT_3": cluster.get("SPEED_LIMIT_3", 0), + # "SCHOOL_ZONE": cluster.get("SCHOOL_ZONE", 0), + # "CHIME_1": cluster.get("CHIME_1", 0), + # "CHIME_2": cluster.get("CHIME_2", 0), + # "SPEED_CHANGE_BLINKING": cluster.get("SPEED_CHANGE_BLINKING", 0), + # "calculated": self.calculate_speed_limit(cp, cp_cam), + # "is_metric": self.is_metric, + # }) + # + # tlog("buttons", { + # "cruise_button": self.cruise_buttons[-1], + # "main_button": self.main_buttons[-1], + # "lkas_enabled": self.lkas_enabled, + # "main_enabled": self.main_enabled, + # }) return ret diff --git a/selfdrive/car/interfaces.py b/selfdrive/car/interfaces.py index 0c39d12..a46bf44 100755 --- a/selfdrive/car/interfaces.py +++ b/selfdrive/car/interfaces.py @@ -484,19 +484,10 @@ class CarInterfaceBase(ABC): self.silent_steer_warning = True events.add(EventName.steerTempUnavailableSilent) else: - # CLEARPILOT: log once per instance of this warning - if not getattr(self, '_steer_fault_logged', False): - import sys - print(f"CLP steerTempUnavailable: steerFaultTemporary={cs_out.steerFaultTemporary} " - f"steeringPressed={cs_out.steeringPressed} standstill={cs_out.standstill} " - f"steering_unpressed={self.steering_unpressed} steeringAngleDeg={cs_out.steeringAngleDeg:.1f} " - f"steeringTorque={cs_out.steeringTorque:.1f} vEgo={cs_out.vEgo:.2f}", file=sys.stderr) - self._steer_fault_logged = True events.add(EventName.steerTempUnavailable) else: self.no_steer_warning = False self.silent_steer_warning = False - self._steer_fault_logged = False if cs_out.steerFaultPermanent: events.add(EventName.steerUnavailable) diff --git a/selfdrive/clearpilot/dashcamd.cc b/selfdrive/clearpilot/dashcamd.cc index 0e4fdd5..97f84fa 100644 --- a/selfdrive/clearpilot/dashcamd.cc +++ b/selfdrive/clearpilot/dashcamd.cc @@ -12,19 +12,19 @@ * Trip lifecycle state machine: * * WAITING: - * - Process starts in this state. Idle. + * - Process starts in this state * - Waits for valid system time (year >= 2024) AND car in drive gear * - Transitions to RECORDING when both conditions met * * RECORDING: * - Actively encoding frames, car is in drive - * - Gear shift into PARK → close trip immediately → WAITING (idle) - * - Ignition off → close trip → WAITING + * - Car leaves drive → start 10-min idle timer → IDLE_TIMEOUT * - * IDLE_TIMEOUT (deprecated, retained for safety): - * - Was used to keep recording across brief drive-thru / fuel stops. - * - Now unreachable: drive→park transitions close the trip immediately - * and a fresh trip starts on the next drive engagement. + * IDLE_TIMEOUT: + * - Car left drive, still recording with timer running + * - Car re-enters drive → cancel timer → RECORDING + * - Timer expires → close trip → WAITING + * - Ignition off → close trip → WAITING * * Graceful shutdown (DashcamShutdown param): * - thermald sets DashcamShutdown="1" before device power-off @@ -301,11 +301,10 @@ int main(int argc, char *argv[]) { } case RECORDING: - // CLEARPILOT: close trip immediately on park (no idle timer). User wants - // dashcam idle in park, fresh trip on each drive engagement. - if (gear == cereal::CarState::GearShifter::PARK) { - LOGW("dashcamd: gear in park, closing trip"); - close_trip(); + if (!in_drive) { + idle_timer_start = now; + state = IDLE_TIMEOUT; + LOGW("dashcamd: car left drive, starting 10-min idle timer"); } break; diff --git a/selfdrive/controls/controlsd.py b/selfdrive/controls/controlsd.py index 2f0fcdc..b0d6137 100755 --- a/selfdrive/controls/controlsd.py +++ b/selfdrive/controls/controlsd.py @@ -37,10 +37,11 @@ from openpilot.system.version import get_short_branch from openpilot.selfdrive.frogpilot.controls.lib.frogpilot_functions import CRUISING_SPEED, PROBABILITY, MovingAverageCalculator from openpilot.selfdrive.frogpilot.controls.lib.model_manager import RADARLESS_MODELS -from openpilot.selfdrive.clearpilot.telemetry import tlog -from openpilot.selfdrive.clearpilot.speed_logic import SpeedState from openpilot.selfdrive.frogpilot.controls.lib.speed_limit_controller import SpeedLimitController +# CLEARPILOT: UI plumbing for ScreenDisplayMode and the speed/cruise-warning overlay +from openpilot.selfdrive.clearpilot.speed_logic import SpeedState + SOFT_DISABLE_TIME = 3 # seconds LDW_MIN_SPEED = 31 * CV.MPH_TO_MS LANE_DEPARTURE_THRESHOLD = 0.1 @@ -49,7 +50,7 @@ CAMERA_OFFSET = 0.04 REPLAY = "REPLAY" in os.environ SIMULATION = "SIMULATION" in os.environ TESTING_CLOSET = "TESTING_CLOSET" in os.environ -IGNORE_PROCESSES = {"loggerd", "encoderd", "statsd", "telemetryd", "dashcamd"} +IGNORE_PROCESSES = {"loggerd", "encoderd", "statsd"} ThermalStatus = log.DeviceState.ThermalStatus State = log.ControlsState.OpenpilotState @@ -79,13 +80,14 @@ class Controls: self.params_storage = Params("/persist/params") self.params_memory.put_bool("CPTLkasButtonAction", False) + # CLEARPILOT: ScreenDisplayMode is an int (5-state machine: 0..4); UI reads it via getInt self.params_memory.put_int("ScreenDisplayMode", 0) - # CLEARPILOT: speed-limit/cruise-warning state machine + park→drive auto-reset + # CLEARPILOT: speed/cruise-warning overlay state, ticked at ~2Hz from clearpilot_state_control self.speed_state = SpeedState() self.speed_state_frame = 0 + # CLEARPILOT: edge tracking for park->drive auto-wake of screen self.was_driving_gear = False - self.driving_gear = False self.radarless_model = self.params.get("Model", encoding='utf-8') in RADARLESS_MODELS @@ -449,13 +451,6 @@ class Controls: # All events here should at least have NO_ENTRY and SOFT_DISABLE. num_events = len(self.events) - # CLEARPILOT: compute model standby suppression early — used by multiple checks below - try: - standby_ts = float(self.params_memory.get("ModelStandbyTs") or "0") - except (ValueError, TypeError): - standby_ts = 0 - model_suppress = (time.monotonic() - standby_ts) < 2.0 - not_running = {p.name for p in self.sm['managerState'].processes if not p.running and p.shouldBeRunning} if self.sm.recv_frame['managerState'] and (not_running - IGNORE_PROCESSES): self.events.add(EventName.processNotRunning) @@ -469,11 +464,9 @@ class Controls: elif not self.sm.all_freq_ok(self.camera_packets): self.events.add(EventName.cameraFrameRate) if not REPLAY and self.rk.lagging: - import sys - print(f"CLP controlsdLagging: remaining={self.rk.remaining:.4f} standstill={CS.standstill} vEgo={CS.vEgo:.2f}", file=sys.stderr) self.events.add(EventName.controlsdLagging) if not self.radarless_model: - if not model_suppress and (len(self.sm['radarState'].radarErrors) or (not self.rk.lagging and not self.sm.all_checks(['radarState']))): + if len(self.sm['radarState'].radarErrors) or (not self.rk.lagging and not self.sm.all_checks(['radarState'])): self.events.add(EventName.radarFault) if not self.sm.valid['pandaStates']: self.events.add(EventName.usbError) @@ -485,16 +478,13 @@ class Controls: # generic catch-all. ideally, a more specific event should be added above instead has_disable_events = self.events.contains(ET.NO_ENTRY) and (self.events.contains(ET.SOFT_DISABLE) or self.events.contains(ET.IMMEDIATE_DISABLE)) no_system_errors = (not has_disable_events) or (len(self.events) == num_events) - # CLEARPILOT: fire commIssue ONLY when messages actually aren't flowing (not_alive) - # or CAN RX is timing out. Don't fire on self-declared valid=False — that's the - # polling-pattern / all_checks cascade that paramsd/torqued/plannerd/frogpilot - # propagate even while their publish rate and content are fine. - comms_really_broken = (not self.sm.all_alive()) or self.card.can_rcv_timeout - if comms_really_broken and no_system_errors and not model_suppress: + if (not self.sm.all_checks() or self.card.can_rcv_timeout) and no_system_errors: if not self.sm.all_alive(): self.events.add(EventName.commIssue) - else: - self.events.add(EventName.commIssue) # can_rcv_timeout path + elif not self.sm.all_freq_ok(): + self.events.add(EventName.commIssueAvgFreq) + else: # invalid or can_rcv_timeout. + self.events.add(EventName.commIssue) logs = { 'invalid': [s for s, valid in self.sm.valid.items() if not valid], @@ -509,13 +499,13 @@ class Controls: self.logged_comm_issue = None if not (self.CP.notCar and self.joystick_mode): - if not self.sm['liveLocationKalman'].posenetOK and not model_suppress: + if not self.sm['liveLocationKalman'].posenetOK: self.events.add(EventName.posenetInvalid) if not self.sm['liveLocationKalman'].deviceStable: self.events.add(EventName.deviceFalling) - if not self.sm['liveLocationKalman'].inputsOK and not model_suppress: + if not self.sm['liveLocationKalman'].inputsOK: self.events.add(EventName.locationdTemporaryError) - if not self.sm['liveParameters'].valid and not TESTING_CLOSET and (not SIMULATION or REPLAY) and not model_suppress: + if not self.sm['liveParameters'].valid and not TESTING_CLOSET and (not SIMULATION or REPLAY): self.events.add(EventName.paramsdTemporaryError) # conservative HW alert. if the data or frequency are off, locationd will throw an error @@ -561,7 +551,7 @@ class Controls: self.distance_traveled = 0 self.distance_traveled += CS.vEgo * DT_CTRL - if self.sm['modelV2'].frameDropPerc > 20 and not model_suppress: + if self.sm['modelV2'].frameDropPerc > 20: self.events.add(EventName.modeldLagging) @@ -648,14 +638,6 @@ class Controls: # Check if openpilot is engaged and actuators are enabled self.enabled = self.state in ENABLED_STATES self.active = self.state in ACTIVE_STATES - - # CLEARPILOT: engagement telemetry disabled — was running at 100Hz, causing CPU load - # tlog("engage", { - # "state": self.state.name if hasattr(self.state, 'name') else str(self.state), - # "enabled": self.enabled, "active": self.active, - # "cruise_enabled": CS.cruiseState.enabled, "cruise_available": CS.cruiseState.available, - # "brakePressed": CS.brakePressed, - # }) if self.active: self.current_alert_types.append(ET.WARNING) @@ -665,30 +647,6 @@ class Controls: def state_control(self, CS): """Given the state, this function returns a CarControl packet""" - # CLEARPILOT: short-circuit while parked. Skip LaC/LoC PID, MPC, model_v2 - # reads, lane-change logic — none of it matters when the car isn't moving. - # publish_logs still runs and still triggers carcontroller.apply via - # card.controls_update, so the sendcan heartbeats / tester-present messages - # keep flowing at 100Hz and the car doesn't fault. Saves ~30% controlsd CPU - # in park. - if CS.gearShifter == car.CarState.GearShifter.park: - CC = car.CarControl.new_message() - CC.enabled = False - CC.latActive = False - CC.longActive = False - CC.actuators.longControlState = self.LoC.long_control_state - self.LaC.reset() - self.LoC.reset(v_pid=CS.vEgo) - self.frogpilot_variables.no_lat_lane_change = False - self.FPCC.noLatLaneChange = False - # Call LaC.update with active=False so we get the right lac_log subtype - # for this car's lateralTuning (torque vs pid vs angle). Internally it - # early-returns when active is False — cheap. - lp = self.sm['liveParameters'] - _, _, lac_log = self.LaC.update(False, CS, self.VM, lp, self.steer_limited, 0.0, - self.sm['liveLocationKalman'], model_data=self.sm['modelV2']) - return CC, lac_log - # Update VehicleModel lp = self.sm['liveParameters'] x = max(lp.stiffnessFactor, 0.1) @@ -1287,40 +1245,47 @@ class Controls: self.frogpilot_variables.use_ev_tables = self.params.get_bool("EVTable") def update_clearpilot_events(self, CS): - if (len(CS.buttonEvents) > 0): + if (len(CS.buttonEvents) > 0): print (CS.buttonEvents) - if any(be.pressed and be.type == FrogPilotButtonType.lkas for be in CS.buttonEvents): - self.events.add(EventName.clpDebug) + + # Uncomment to alert when lkas button pressed + # if any(be.pressed and be.type == FrogPilotButtonType.lkas for be in CS.buttonEvents): + # self.events.add(EventName.clpDebug) def clearpilot_state_control(self, CC, CS): - # CLEARPILOT: auto-reset display when shifting into drive from screen-off - if self.driving_gear and not self.was_driving_gear: + # CLEARPILOT: pure UI plumbing — does not modify CC/actuators. Maintains + # ScreenDisplayMode (5-state machine driven by the LFA/debug button + gear + # edges) and ticks the speed/cruise-warning overlay at ~2Hz. + driving_gear = CS.gearShifter not in (GearShifter.neutral, GearShifter.park, + GearShifter.reverse, GearShifter.unknown) + + # Auto-wake screen when shifting into drive from screen-off + if driving_gear and not self.was_driving_gear: if self.params_memory.get_int("ScreenDisplayMode") == 3: self.params_memory.put_int("ScreenDisplayMode", 0) - self.was_driving_gear = self.driving_gear + self.was_driving_gear = driving_gear + # LFA/debug button cycles ScreenDisplayMode. Onroad and offroad use + # different transition tables. if any(be.pressed and be.type == FrogPilotButtonType.lkas for be in CS.buttonEvents): current = self.params_memory.get_int("ScreenDisplayMode") - - if self.driving_gear: + if driving_gear: # Onroad: 0→4, 1→2, 2→3, 3→4, 4→2 (never back to auto via button) transitions = {0: 4, 1: 2, 2: 3, 3: 4, 4: 2} new_mode = transitions.get(current, 0) else: - # Not in drive: any except 3 → 3, state 3 → 0 + # Not in drive: anything except 3 → 3 (screen off), state 3 → 0 (auto) new_mode = 0 if current == 3 else 3 - self.params_memory.put_int("ScreenDisplayMode", new_mode) - # ClearPilot speed processing (~2 Hz at 100 Hz loop) — drives speed-limit sign - # and cruise over/under warning sign via memory params read by the UI. + # Speed/cruise-warning overlay tick (~2Hz at 100Hz loop) self.speed_state_frame += 1 if self.speed_state_frame % 50 == 0: gps = self.sm['gpsLocation'] has_gps = self.sm.valid['gpsLocation'] and gps.hasFix speed_ms = gps.speed if has_gps else 0.0 - speed_limit_ms = self.params_memory.get_float("CarSpeedLimit") - is_metric = (self.params_memory.get("CarIsMetric", encoding="utf-8") or "0") == "1" + speed_limit_ms = self.params_memory.get_float("CarSpeedLimit") or 0.0 + is_metric = self.is_metric cruise_speed_ms = CS.cruiseState.speed cruise_active = CS.cruiseState.enabled cruise_standstill = CS.cruiseState.standstill diff --git a/selfdrive/controls/controlsd_parked.py b/selfdrive/controls/controlsd_parked.py new file mode 100644 index 0000000..d70e259 --- /dev/null +++ b/selfdrive/controls/controlsd_parked.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python3 +""" +CLEARPILOT: minimal controlsd variant that runs while ignition is on but the +car is in Park. Keeps CAN parsing and carState publishing alive (so thermald +can see gearShifter and decide when to swap us out for the full controlsd), +but skips all of the heavy onroad work — no model, no planner, no lateral or +longitudinal control, no actuator commands. + +Manager swaps between this and the full controlsd via predicate flips: + - this runs when: ignition AND not started + - full runs when: started (which requires ignition AND not_parked) + +The two are mutually exclusive — only one publishes carState at a time. +""" +from types import SimpleNamespace + +from openpilot.common.realtime import Priority, config_realtime_process +from openpilot.selfdrive.car.card import CarD + + +def _make_default_frogpilot_variables() -> SimpleNamespace: + """Safe defaults for fields read inside CarInterface.update / CarState.update. + + We're not actuating anything here; these only need to keep the update path + from raising AttributeError. False/0 across the board is the safe baseline.""" + fv = SimpleNamespace() + fv.conditional_experimental_mode = False + fv.experimental_mode_via_distance = False + fv.traffic_mode = False + fv.sport_plus = False + fv.long_pitch = False + fv.no_lat_lane_change = False + return fv + + +def main(): + config_realtime_process(4, Priority.CTRL_HIGH) + + # CarD's __init__ blocks until it sees CAN + a pandaState, then calls get_car + # to fingerprint and write CarParams. Same path the full controlsd takes. + card = CarD() + card.initialize() + + fv = _make_default_frogpilot_variables() + + # state_update drains CAN, parses carState, publishes carState/carOutput/carParams. + # Internally blocks via drain_sock_raw(wait_for_one=True), so the loop is + # naturally paced by CAN traffic — no extra sleep needed. + while True: + card.state_update(fv) + + +if __name__ == "__main__": + main() diff --git a/selfdrive/controls/lib/events.py b/selfdrive/controls/lib/events.py index d0805fc..6d23406 100755 --- a/selfdrive/controls/lib/events.py +++ b/selfdrive/controls/lib/events.py @@ -778,8 +778,8 @@ EVENTS: dict[int, dict[str, Alert | AlertCallbackType]] = { ET.SOFT_DISABLE: soft_disable_alert("Sensor Data Invalid"), }, - # CLEARPILOT: alert suppressed — event still fires for screen toggle and future actions EventName.clpDebug: { + ET.PERMANENT: clp_debug_notice, }, EventName.noGps: { diff --git a/selfdrive/controls/plannerd.py b/selfdrive/controls/plannerd.py index 94770c6..71dc5fe 100755 --- a/selfdrive/controls/plannerd.py +++ b/selfdrive/controls/plannerd.py @@ -34,21 +34,9 @@ def plannerd_thread(): while True: sm.update() if sm.updated['modelV2']: - # CLEARPILOT: skip the planning compute while parked, but KEEP publishing - # at the normal cadence so consumers' alive flags stay healthy. Skipping - # publishes entirely caused longitudinalPlan to go alive=False at - # controlsd, which fires a real commIssue the moment we shift out of park. - # Stale published values are fine — controlsd's own park short-circuit - # ignores the longitudinalPlan content while parked anyway. - parked = sm['carState'].gearShifter == car.CarState.GearShifter.park - if not parked: - longitudinal_planner.update(sm) + longitudinal_planner.update(sm) longitudinal_planner.publish(sm, pm) - # publish_ui_plan reads longitudinal_planner.a_desired_trajectory_full - # which is only set inside update(). Skip it while parked — uiPlan is - # UI-only, not on controlsd's commIssue check list, so going silent is fine. - if not parked: - publish_ui_plan(sm, pm, longitudinal_planner) + publish_ui_plan(sm, pm, longitudinal_planner) def main(): plannerd_thread() diff --git a/selfdrive/frogpilot/controls/frogpilot_planner.py b/selfdrive/frogpilot/controls/frogpilot_planner.py index a04c4bf..78bf726 100755 --- a/selfdrive/frogpilot/controls/frogpilot_planner.py +++ b/selfdrive/frogpilot/controls/frogpilot_planner.py @@ -58,9 +58,6 @@ class FrogPilotPlanner: self.params = Params() self.params_memory = Params("/dev/shm/params") - # CLEARPILOT: track valid transitions so we only log when it flips, not every cycle - self._dbg_prev_valid = True - self.cem = ConditionalExperimentalMode() self.lead_one = Lead() self.mtsc = MapTurnSpeedController() @@ -245,18 +242,7 @@ class FrogPilotPlanner: def publish(self, sm, pm): frogpilot_plan_send = messaging.new_message('frogpilotPlan') - valid = sm.all_checks(service_list=['carState', 'controlsState']) - # CLEARPILOT: log on transition into invalid — stderr goes to our plannerd.log - if valid != self._dbg_prev_valid and not valid: - import sys - print( - "CLP frogpilotPlan valid=False: " - f"carState(a={sm.alive['carState']},v={sm.valid['carState']},f={sm.freq_ok['carState']}) " - f"controlsState(a={sm.alive['controlsState']},v={sm.valid['controlsState']},f={sm.freq_ok['controlsState']})", - file=sys.stderr, flush=True - ) - self._dbg_prev_valid = valid - frogpilot_plan_send.valid = valid + frogpilot_plan_send.valid = sm.all_checks(service_list=['carState', 'controlsState']) frogpilotPlan = frogpilot_plan_send.frogpilotPlan frogpilotPlan.accelerationJerk = A_CHANGE_COST * (float(self.jerk) if self.lead_one.status else 1) diff --git a/selfdrive/frogpilot/frogpilot_process.py b/selfdrive/frogpilot/frogpilot_process.py index fbe3d3b..c6b4dfb 100755 --- a/selfdrive/frogpilot/frogpilot_process.py +++ b/selfdrive/frogpilot/frogpilot_process.py @@ -85,15 +85,9 @@ def frogpilot_thread(): frogpilot_planner = FrogPilotPlanner(CP) frogpilot_planner.update_frogpilot_params() - # CLEARPILOT: skip planner compute while parked, but KEEP publishing at - # the normal cadence so frogpilotPlan stays alive at consumers. Skipping - # publishes entirely caused commIssue ("not_alive: frogpilotPlan") at - # controlsd the moment we shifted out of park. - parked = sm['carState'].gearShifter == car.CarState.GearShifter.park if sm.updated['modelV2']: - if not parked: - frogpilot_planner.update(sm['carState'], sm['controlsState'], sm['frogpilotCarControl'], sm['frogpilotNavigation'], - sm['liveLocationKalman'], sm['modelV2'], sm['radarState']) + frogpilot_planner.update(sm['carState'], sm['controlsState'], sm['frogpilotCarControl'], sm['frogpilotNavigation'], + sm['liveLocationKalman'], sm['modelV2'], sm['radarState']) frogpilot_planner.publish(sm, pm) if not time_validated: diff --git a/selfdrive/locationd/calibrationd.py b/selfdrive/locationd/calibrationd.py index 1a23162..6e154bf 100755 --- a/selfdrive/locationd/calibrationd.py +++ b/selfdrive/locationd/calibrationd.py @@ -284,14 +284,7 @@ def main() -> NoReturn: # 4Hz driven by cameraOdometry if sm.frame % 5 == 0: - # CLEARPILOT: publish valid based on calibration status, not upstream sm.all_checks(). - # The original gate cascaded upstream freq glitches into liveCalibration.valid=False, - # which kept locationd.filterInitialized False, which fed garbage into paramsd, which - # corrupted steerRatio and caused erratic steering (and controlsd commIssue banners). - # "valid" here semantically means "the calibration data is trustworthy" — a question - # about convergence, not input freshness. - cal_valid = calibrator.cal_status == log.LiveCalibrationData.Status.calibrated - calibrator.send_data(pm, cal_valid) + calibrator.send_data(pm, sm.all_checks()) if __name__ == "__main__": diff --git a/selfdrive/manager/manager.py b/selfdrive/manager/manager.py index 94710fe..74584b9 100755 --- a/selfdrive/manager/manager.py +++ b/selfdrive/manager/manager.py @@ -363,6 +363,9 @@ def manager_init(frogpilot_functions) -> None: params.put("GitRemote", get_origin()) params.put_bool("IsTestedBranch", is_tested_branch()) params.put_bool("IsReleaseBranch", is_release_branch()) + # CLEARPILOT: thermald is the source of truth for IgnitionOn; seed False so + # the parked-controlsd predicate evaluates to False before thermald's first tick. + params.put_bool("IgnitionOn", False) # set dongle id reg_res = register(show_spinner=True) diff --git a/selfdrive/manager/process_config.py b/selfdrive/manager/process_config.py index 0302fb5..97ba0bc 100755 --- a/selfdrive/manager/process_config.py +++ b/selfdrive/manager/process_config.py @@ -43,6 +43,12 @@ def only_onroad(started: bool, params, CP: car.CarParams) -> bool: def only_offroad(started, params, CP: car.CarParams) -> bool: return not started +# CLEARPILOT: predicate for the parked controlsd variant. Runs while ignition +# is on but the car is in Park (so started=False because thermald has gated it +# off). Mutually exclusive with the full controlsd, which uses only_onroad. +def parked_only(started, params, CP: car.CarParams) -> bool: + return params.get_bool("IgnitionOn") and not started + # FrogPilot functions def allow_logging(started, params, CP: car.CarParams) -> bool: allow_logging = not (params.get_bool("DeviceManagement") and params.get_bool("NoLogging")) @@ -82,6 +88,11 @@ procs = [ PythonProcess("calibrationd", "selfdrive.locationd.calibrationd", only_onroad), PythonProcess("torqued", "selfdrive.locationd.torqued", only_onroad), PythonProcess("controlsd", "selfdrive.controls.controlsd", only_onroad), + # CLEARPILOT: lightweight CAN listener that runs while ignition is on and the + # car is parked. Publishes carState (so thermald can see gear); does no model, + # planner, or actuator work. Manager swaps it out for the full controlsd as + # soon as gear leaves Park. + PythonProcess("controlsd_parked", "selfdrive.controls.controlsd_parked", parked_only), PythonProcess("deleter", "system.loggerd.deleter", always_run), PythonProcess("dmonitoringd", "selfdrive.monitoring.dmonitoringd", driverview, enabled=(not PC or WEBCAM)), # PythonProcess("qcomgpsd", "system.qcomgpsd.qcomgpsd", qcomgps, enabled=TICI), diff --git a/selfdrive/modeld/dmonitoringmodeld.py b/selfdrive/modeld/dmonitoringmodeld.py index 5b9c9dd..ef403b4 100755 --- a/selfdrive/modeld/dmonitoringmodeld.py +++ b/selfdrive/modeld/dmonitoringmodeld.py @@ -7,7 +7,7 @@ import ctypes import numpy as np from pathlib import Path -from cereal import car, messaging +from cereal import messaging from cereal.messaging import PubMaster, SubMaster from cereal.visionipc import VisionIpcClient, VisionStreamType, VisionBuf from openpilot.common.swaglog import cloudlog @@ -128,15 +128,10 @@ def main(): assert vipc_client.is_connected() cloudlog.warning(f"connected with buffer size: {vipc_client.buffer_len}") - sm = SubMaster(["liveCalibration", "carState"]) + sm = SubMaster(["liveCalibration"]) pm = PubMaster(["driverStateV2"]) calib = np.zeros(CALIB_LEN, dtype=np.float32) - # CLEARPILOT: cache last model output to serve while gear is in park — - # mirrors the same trick modeld uses. Skips DSP inference on the driver - # camera when the car is stationary; downstream dmonitoringd still gets - # a fresh publish each frame. - last_model_output = None # last = 0 while True: @@ -148,13 +143,8 @@ def main(): if sm.updated["liveCalibration"]: calib[:] = np.array(sm["liveCalibration"].rpyCalib) - parked = sm["carState"].gearShifter == car.CarState.GearShifter.park t1 = time.perf_counter() - if parked and last_model_output is not None: - model_output, dsp_execution_time = last_model_output - else: - model_output, dsp_execution_time = model.run(buf, calib) - last_model_output = (model_output, dsp_execution_time) + model_output, dsp_execution_time = model.run(buf, calib) t2 = time.perf_counter() pm.send("driverStateV2", get_driverstate_packet(model_output, vipc_client.frame_id, vipc_client.timestamp_sof, t2 - t1, dsp_execution_time)) diff --git a/selfdrive/modeld/modeld.py b/selfdrive/modeld/modeld.py index 6f7a0de..abbc39a 100755 --- a/selfdrive/modeld/modeld.py +++ b/selfdrive/modeld/modeld.py @@ -134,15 +134,11 @@ def main(demo=False): setproctitle(PROCESS_NAME) config_realtime_process(7, 54) - import time as _time cloudlog.warning("setting up CL context") - _t0 = _time.monotonic() cl_context = CLContext() - _t1 = _time.monotonic() - cloudlog.warning("CL context ready in %.3fs; loading model", _t1 - _t0) + cloudlog.warning("CL context ready; loading model") model = ModelState(cl_context) - _t2 = _time.monotonic() - cloudlog.warning("model loaded in %.3fs (total init %.3fs), modeld starting", _t2 - _t1, _t2 - _t0) + cloudlog.warning("models loaded, modeld starting") # visionipc clients while True: @@ -183,10 +179,6 @@ def main(demo=False): model_transform_main = np.zeros((3, 3), dtype=np.float32) model_transform_extra = np.zeros((3, 3), dtype=np.float32) live_calib_seen = False - # CLEARPILOT: cache last model output to serve while gear is in park — saves - # GPU inference cost while still giving downstream a constant publish rate so - # freq_ok / valid checks don't cascade. - last_model_output = None nav_features = np.zeros(ModelConstants.NAV_FEATURE_LEN, dtype=np.float32) nav_instructions = np.zeros(ModelConstants.NAV_INSTRUCTION_LEN, dtype=np.float32) buf_main, buf_extra = None, None @@ -241,12 +233,6 @@ def main(demo=False): meta_extra = meta_main sm.update(0) - - # CLEARPILOT: constant 20fps. Variable-rate + standby logic removed — the - # variable-rate path caused freq_ok cascades in downstream consumers - # (calibrationd/locationd/paramsd). Running at the camera's native rate is - # simpler and keeps the full-stack localization chain happy. - desire = DH.desire is_rhd = sm["driverMonitoringState"].isRHD frame_id = sm["roadCameraState"].frameId @@ -318,21 +304,12 @@ def main(demo=False): **({'radar_tracks': radar_tracks,} if DISABLE_RADAR else {}), } - # CLEARPILOT: in park, serve the cached last model output instead of running - # GPU inference. First cycle (no cache yet) still runs once so we have - # something to serve. Out-of-park resumes fresh inference every frame. - parked = sm['carState'].gearShifter == car.CarState.GearShifter.park - if parked and last_model_output is not None: - model_output = last_model_output - model_execution_time = 0.0 - else: - mt1 = time.perf_counter() - model_output = model.run(buf_main, buf_extra, model_transform_main, model_transform_extra, inputs, prepare_only) - mt2 = time.perf_counter() - model_execution_time = mt2 - mt1 + mt1 = time.perf_counter() + model_output = model.run(buf_main, buf_extra, model_transform_main, model_transform_extra, inputs, prepare_only) + mt2 = time.perf_counter() + model_execution_time = mt2 - mt1 if model_output is not None: - last_model_output = model_output modelv2_send = messaging.new_message('modelV2') posenet_send = messaging.new_message('cameraOdometry') fill_model_msg(modelv2_send, model_output, publish_state, meta_main.frame_id, meta_extra.frame_id, frame_id, frame_drop_ratio, diff --git a/selfdrive/monitoring/dmonitoringd.py b/selfdrive/monitoring/dmonitoringd.py index 020a201..579e790 100755 --- a/selfdrive/monitoring/dmonitoringd.py +++ b/selfdrive/monitoring/dmonitoringd.py @@ -21,7 +21,6 @@ def dmonitoringd_thread(): v_cruise_last = 0 driver_engaged = False - dbg_prev_valid = True # CLEARPILOT: track valid transitions # 10Hz <- dmonitoringmodeld while True: @@ -44,18 +43,7 @@ def dmonitoringd_thread(): # Get data from dmonitoringmodeld events = Events() - # CLEARPILOT: narrow update_states gate. The original sm.all_checks() also - # required modelV2 fresh (stops at standstill in two-state modeld) and - # liveCalibration.valid (calibrationd cascades its own freq_ok to valid, which - # flaps). Both made DM freeze pose → face_detected stuck False → awareness - # decayed to 0 within 6s of engagement. Narrow the gate to the subs - # update_states actually reads, and only to alive+valid (skip freq_ok and - # skip liveCalibration.valid). rpyCalib presence is sufficient to know - # calibration has produced output. - if (sm.alive['driverStateV2'] and sm.valid['driverStateV2'] and - sm.alive['carState'] and sm.valid['carState'] and - sm.alive['controlsState'] and sm.valid['controlsState'] and - sm.alive['liveCalibration'] and len(sm['liveCalibration'].rpyCalib) > 0): + if sm.all_checks() and len(sm['liveCalibration'].rpyCalib): driver_status.update_states(sm['driverStateV2'], sm['liveCalibration'].rpyCalib, sm['carState'].vEgo, sm['controlsState'].enabled) # Block engaging after max number of distrations @@ -66,16 +54,8 @@ def dmonitoringd_thread(): # Update events from driver state driver_status.update_events(events, driver_engaged, sm['controlsState'].enabled, sm['carState'].standstill) - # CLEARPILOT: log on transition to invalid so we can see which sub caused the cascade - dm_valid = sm.all_checks() - if dm_valid != dbg_prev_valid and not dm_valid: - import sys - bad = [s for s in sm.alive if not (sm.alive[s] and sm.valid[s] and sm.freq_ok.get(s, True))] - details = [f"{s}(a={sm.alive[s]},v={sm.valid[s]},f={sm.freq_ok[s]})" for s in bad] - print(f"CLP driverMonitoringState valid=False: {' '.join(details)}", file=sys.stderr, flush=True) - dbg_prev_valid = dm_valid # build driverMonitoringState packet - dat = messaging.new_message('driverMonitoringState', valid=dm_valid) + dat = messaging.new_message('driverMonitoringState', valid=sm.all_checks()) dat.driverMonitoringState = { "events": events.to_msg(), "faceDetected": driver_status.face_detected, diff --git a/selfdrive/thermald/thermald.py b/selfdrive/thermald/thermald.py index ac2f845..c9dd499 100755 --- a/selfdrive/thermald/thermald.py +++ b/selfdrive/thermald/thermald.py @@ -170,7 +170,17 @@ def thermald_thread(end_event, hw_queue) -> None: onroad_conditions: dict[str, bool] = { "ignition": False, + # CLEARPILOT: park-aware gating. When False, manager runs controlsd_parked + # (CAN listener only) instead of the full onroad stack. Latched + hysteresis + # on going-into-parked to avoid R↔P↔D thrash; flips out of parked instantly. + # Initialized False (assume parked) so the full stack waits for carState + # to confirm gear has actually left Park before spinning up. + "not_parked": False, } + is_parked = True + parked_since: float | None = None # monotonic ts when gear first read as Park + PARKED_HYSTERESIS_S = 1.5 + ignition_param_prev: bool | None = None startup_conditions: dict[str, bool] = {} startup_conditions_prev: dict[str, bool] = {} @@ -247,6 +257,32 @@ def thermald_thread(end_event, hw_queue) -> None: onroad_conditions["ignition"] = False cloudlog.error("panda timed out onroad") + # CLEARPILOT: derive is_parked from carState gearShifter. Whichever controlsd + # variant is currently running publishes carState; we just read the gear. + # Going INTO parked has hysteresis (PARKED_HYSTERESIS_S) so brief P touches + # during low-speed parking don't kick the heavy stack off. Going OUT of + # parked is instant so the full stack starts spinning up the moment the + # driver shifts to D/R/N. + if sm.updated['carState']: + gear = sm['carState'].gearShifter + gear_is_park = gear == car.CarState.GearShifter.park + now_mono = time.monotonic() + if gear_is_park: + if parked_since is None: + parked_since = now_mono + if (not is_parked) and (now_mono - parked_since) >= PARKED_HYSTERESIS_S: + is_parked = True + else: + parked_since = None + is_parked = False + onroad_conditions["not_parked"] = not is_parked + + # CLEARPILOT: expose ignition as a Param so manager predicates (which only + # see persistent Params, not pandaStates) can gate controlsd_parked. + if ignition_param_prev != onroad_conditions["ignition"]: + params.put_bool("IgnitionOn", onroad_conditions["ignition"]) + ignition_param_prev = onroad_conditions["ignition"] + try: last_hw_state = hw_queue.get_nowait() except queue.Empty: diff --git a/sessions/2026-04-26-0914-baseline-revert-and-parked-mode/README.md b/sessions/2026-04-26-0914-baseline-revert-and-parked-mode/README.md new file mode 100644 index 0000000..c2e677a --- /dev/null +++ b/sessions/2026-04-26-0914-baseline-revert-and-parked-mode/README.md @@ -0,0 +1,355 @@ +# Session: 2026-04-26 — Baseline Revert + Parked-Controlsd Mode + +## Context + +This session was driven by a regression: the steering wheel "feels like +it pulls right" during normal driving, with no clear smoking gun. The +suspicion was that one of the variable-FPS / standstill-throttling +changes (added to reduce parked-state fan noise and CPU load) bled into +on-road driving behavior in a hard-to-isolate way. + +Strategy: revert all driving-relevant logic to a known-good baseline +captured in `/projects/openpilot/archive/clearpilot` (HEAD `980f0aa`, +July 2024), keep all of the ClearPilot UI/dashcam/telemetry/bench-mode +infrastructure intact on top, then attack the parked-fan-noise problem +fresh from a different angle that doesn't touch driving logic at all. + +Three commits landed in this order on branch `clearpilot`: + +| SHA | Title | +|---|---| +| `47321e3` | restore driving logic to pre-variable-fps baseline | +| `f7e602c` | controlsd: re-wire UI hooks on top of restored baseline | +| `887b9c9` | parked-controlsd mode: shut down heavy stack while ignition+park | + +Pre-revert tip was `62a403d`. The on-device agent should treat +**`62a403d` as "the broken version"** when looking at history. + +--- + +## Commit 1: `47321e3` — Baseline restore + +Reverted the following files wholesale to their `980f0aa` archive copy: + +- `selfdrive/controls/controlsd.py` +- `selfdrive/controls/lib/events.py` +- `selfdrive/controls/lib/longitudinal_planner.py` +- `selfdrive/modeld/modeld.py` +- `selfdrive/modeld/dmonitoringmodeld.py` +- `selfdrive/locationd/calibrationd.py` +- `selfdrive/locationd/paramsd.py` +- `selfdrive/locationd/torqued.py` +- `selfdrive/car/interfaces.py` +- `selfdrive/car/hyundai/carstate.py` (CAN-FD telemetry preserved as a + commented block at the bottom of `update_canfd` — re-enable by + uncommenting; the `tlog` import is also commented out). +- `selfdrive/monitoring/dmonitoringd.py` +- `selfdrive/frogpilot/controls/frogpilot_planner.py` +- `common/realtime.py` + +### Intentionally NOT restored (kept as the post-`62a403d` version) + +- `selfdrive/thermald/*` — fan/power tuning kept as-is. +- `selfdrive/car/hyundai/carcontroller.py` and `hyundaicanfd.py` — + reviewed; the only delta vs baseline is hoisting the + `no_lat_lane_change` Params read out of the 100Hz hot path + (~5% carcontroller CPU) by passing the bit as an argument. This is + a perf-only change with no behavioral effect outside lane changes. +- `cereal/services.py`, `cereal/custom.capnp` — additive only. + `custom.capnp` adds `latRequested @3` and `noLatLaneChange @4` fields + to `FrogPilotCarControl`; capnp tag numbers are append-only so this is + safe to leave in even though baseline code doesn't write to them. +- `selfdrive/manager/*`, `common/params.cc` — heavy ClearPilot + infrastructure (bench mode, log dir, dashcamd, gpsd, ClearPilot params). +- All `selfdrive/ui/`, `selfdrive/clearpilot/`, `system/clearpilot/`. + +### Things removed by the restore (no longer in the tree) + +- Standstill frame skipping in modeld (was: skip GPU inference 19/20 + frames at standstill, report 0 dropped frames to fool controlsd). +- Standstill frame skipping in dmonitoringmodeld. +- Model standby logic + `ModelStandby`/`ModelStandbyTs` reads in + controlsd's comm-issue suppression path. +- Parked-cycle skip in `state_control()` (10Hz vs 100Hz when in Park). +- Calibrationd validity decoupling from `sm.all_checks()`. +- Post-engage 2s commIssue/location/params suppression window. +- Per-cycle carstate write-gating for `CarSpeedLimit`/`CarIsMetric` in + carstate.py. +- The diff-based carstate telemetry calls (preserved commented out). + +These were the candidates for the steering-pull regression. The on-device +agent should **not re-introduce any of these** without a deliberate plan +and a test session. The user's intent is to get baseline driving feel +confirmed first, then re-introduce optimizations one at a time and drive +each. + +--- + +## Commit 2: `f7e602c` — UI hooks on top of baseline + +Baseline `controlsd.py` doesn't have the UI plumbing the ClearPilot UI +expects. This commit re-adds only the things needed for the existing UI +to keep working — pure params-write plumbing, no actuator effect. + +### Changes (all in `selfdrive/controls/controlsd.py`) + +1. **Import `SpeedState`** from `openpilot.selfdrive.clearpilot.speed_logic`. +2. **`Controls.__init__`**: + - `params_memory.put_bool("ScreenDisplayMode", 0)` → + `params_memory.put_int("ScreenDisplayMode", 0)` (UI reads it as int). + - Added `self.speed_state = SpeedState()`, `self.speed_state_frame = 0`, + `self.was_driving_gear = False`. +3. **SubMaster** — added `gpsLocation` to the subscriber list, with + `ignore_alive` / `ignore_avg_freq` / `ignore_valid` so missing GPS + doesn't trigger commIssue. +4. **`clearpilot_state_control(...)`** rewritten from a simple 3-state + cycle into the documented 5-state ScreenDisplayMode machine: + - Auto-wake on park→drive edge if currently in screen-off (state 3). + - LFA button transitions in drive: `0→4, 1→2, 2→3, 3→4, 4→2`. + - LFA button transitions outside drive: any except 3 → 3, state 3 → 0. + - Speed/cruise-warning overlay tick at ~2Hz (`speed_state.update(...)`) + reading `gpsLocation`, `CarSpeedLimit` param, `self.is_metric`, + `CS.cruiseState`. This is what writes + `ClearpilotSpeedDisplay`/`ClearpilotSpeedLimitDisplay`/ + `ClearpilotCruiseWarning` for the UI overlay. +5. **Lane-change suppression sync** — at the existing baseline + `clearpilot_disable_lat_on_lane_change` block (around line 687), in + addition to the existing `params_memory.put_bool("no_lat_lane_change", ...)`, + also set `self.frogpilot_variables.no_lat_lane_change = True/False`. + This is required because the kept (post-`62a403d`) carcontroller + reads off `frogpilot_variables`, not Params. + +### What does NOT change + +No edits to lateral or longitudinal control paths. No new actuator-side +behavior. The UI features (nightrider/screen-off/auto day-night, speed +overlay, cruise warning chime) get wired back up purely through param +writes. + +--- + +## Commit 3: `887b9c9` — Parked-controlsd mode + +Architectural fix for the original problem this whole session was +chasing: while ignition is on but the car is in Park, the entire onroad +stack (modeld, planner, control, locationd, calibrationd, paramsd, +torqued, dmonitoring*, soundd, loggerd) is running and burning CPU/fan +even though none of it is needed. + +Solution: redefine "onroad" as **ignition AND not parked** instead of +just **ignition**. Reuse the existing `started`-based process gating in +manager. Add a tiny second controlsd variant that runs while parked, +just to keep CAN flowing so thermald can see when gear leaves Park. + +### Files + +#### NEW: `selfdrive/controls/controlsd_parked.py` + +Minimal entry point. Roughly: + +```python +def main(): + config_realtime_process(4, Priority.CTRL_HIGH) + card = CarD() # blocks until first CAN, fingerprints car + card.initialize() + fv = _make_default_frogpilot_variables() # safe False/0 SimpleNamespace + while True: + card.state_update(fv) # publishes carState/carOutput/carParams +``` + +`CarD.state_update` blocks via `drain_sock_raw(wait_for_one=True)`, so +the loop is paced by CAN traffic — no extra sleep, no CPU spin. + +The default `frogpilot_variables` sets these to safe values so +`CarInterfaceBase.update` doesn't `AttributeError`: +`conditional_experimental_mode`, `experimental_mode_via_distance`, +`traffic_mode`, `sport_plus`, `long_pitch`, `no_lat_lane_change` — all +False. + +#### `selfdrive/thermald/thermald.py` + +- `onroad_conditions` now also has `"not_parked"`. Initialized to + `False` (assume parked at boot) so the heavy stack waits for carState + to confirm gear has left Park before spinning up. +- New module-level loop variables: `is_parked = True`, + `parked_since: float | None = None`, `PARKED_HYSTERESIS_S = 1.5`, + `ignition_param_prev: bool | None = None`. +- New block right after the panda-disconnect check (around line 258 in + current state) reads `sm['carState'].gearShifter`: + - Gear == Park: latch `parked_since`, flip `is_parked = True` after + 1.5s of continuous Park (hysteresis). + - Gear != Park: clear `parked_since`, `is_parked = False` immediately + (no hysteresis going out). + - Reverse is treated as **not parked** — driver is moving. +- `onroad_conditions["not_parked"] = not is_parked` every tick. +- New `IgnitionOn` Params write, edge-driven (only on change of + `onroad_conditions["ignition"]`) so we don't hammer the persistent + filesystem 2x/sec. + +#### `selfdrive/manager/process_config.py` + +- New predicate: + ```python + def parked_only(started, params, CP): + return params.get_bool("IgnitionOn") and not started + ``` +- New process entry directly after the existing `controlsd` entry: + ```python + PythonProcess("controlsd_parked", "selfdrive.controls.controlsd_parked", parked_only), + ``` +- `controlsd` is unchanged (still `only_onroad`). Mutually exclusive + with `parked_only` because `started` is the negation of the relevant + condition. + +#### `selfdrive/manager/manager.py` + +- Single line in `manager_init()` seeding `IgnitionOn=False` so the + predicate evaluates correctly before thermald's first tick. + +#### `common/params.cc` + +- New entry in the alphabetical I-block: + ```cpp + {"IgnitionOn", CLEAR_ON_MANAGER_START}, + ``` +- Manager predicates can only see persistent Params (not pandaStates + or `/dev/shm/params`), which is why thermald has to expose ignition + this way. + +### State machine summary + +| Ignition | Gear | Predicates true | What runs | +|----------|-------------|-----------------------------|-------------------------------| +| off | any | none | always_run only (ui, thermald, pandad, deleter, …) | +| on | Park (>1.5s)| parked_only | always_run + controlsd_parked | +| on | not Park | only_onroad (=> all `started`) | full onroad stack | + +The transition between rows 2 and 3 is purely manager process +swapping driven by predicate flips — no IPC handshake, no +self-termination. Thermald sees gear change, flips `not_parked`, +`should_start = all(onroad_conditions.values())` flips, manager kills +the wrong variant and spawns the right one on its next tick. + +--- + +## What the on-device agent needs to know / debug + +### Build prerequisites + +A new param key was added (`IgnitionOn`), so the C++ params whitelist +needs a fresh build. Per `CLAUDE.md` "Adding New Params": + +```bash +chown -R comma:comma /data/openpilot +rm -f /data/openpilot/prebuilt /data/openpilot/common/params.o /data/openpilot/common/libcommon.a +su - comma -c "bash /data/openpilot/build_only.sh" +``` + +`build_only.sh` already deletes `prebuilt` but **does not** delete +`params.o` / `libcommon.a` — verify those are gone before building or +the new key won't be picked up and `Params().put_bool("IgnitionOn", …)` +will throw `UnknownKeyName` in manager_init or thermald. + +### How to verify the swap is working + +```bash +# 1. Watch which controlsd variant is running +watch -n 1 'ps -ef | grep -E "controlsd(_parked)?" | grep -v grep' + +# 2. Watch the gating signals +watch -n 1 'echo "IgnitionOn:"; cat /data/params/d/IgnitionOn 2>/dev/null; \ + echo; echo "deviceState.started:" ; \ + python3 -c "import cereal.messaging as m; \ + s=m.sub_sock(\"deviceState\", timeout=1000); \ + print(m.recv_one(s).deviceState.started)"' + +# 3. Watch gear via carState +python3 -c " +import cereal.messaging as m +s = m.sub_sock('carState', timeout=2000) +while True: + msg = m.recv_one(s) + if msg: print(msg.carState.gearShifter) +" +``` + +Expected behavior: +- Ignition off: neither variant in `ps`. `IgnitionOn` is `0`/missing. +- Ignition on, in Park: `controlsd_parked` in `ps`, `controlsd` is not. + `started` is False. After ~1.5s of confirmed Park, modeld and friends + should have stopped. +- Shift to Drive: `controlsd_parked` disappears within ~500ms, full + `controlsd` appears, all the onroad processes spin up. There will be + a brief carState gap during the swap (~0.5–2s). + +### What to suspect first if something breaks + +1. **Manager crash on startup** with `UnknownKeyName: IgnitionOn`. Means + `params.o`/`libcommon.a` weren't rebuilt. Delete them and rebuild. +2. **`controlsd_parked` keeps respawning / dying.** Check + `/data/log2/current/controlsd_parked.log`. Most likely either: + - `CarD.__init__` is hanging on `get_one_can` because pandad isn't + up yet — should only matter on the very first boot. + - A `frogpilot_variables` attribute we missed defaulting; add it to + `_make_default_frogpilot_variables` in `controlsd_parked.py`. +3. **Full controlsd never spawns after shift to Drive.** Check + `IgnitionOn` (should be `1`), check carState.gearShifter (should not + be `park`/`unknown`), check thermald.log for `should_start` logic. + Also check that `carState` is actually being published by + `controlsd_parked`. +4. **Steering still pulls right.** The whole point of commit 1 is to + rule out the variable-FPS work as the cause. If the symptom persists + on baseline-restored driving logic, the suspect list shifts to: + - Anything still on the kept side (carcontroller's + `no_lat_lane_change` plumbing, thermald-related changes, + custom.capnp additions). + - Hardware: a calibration that drifted, an alignment issue, panda + CAN bus issue, or torque tuning that was modified outside of these + files. + - The custom driving model selected (`Params("Model")`). Confirm + which `.thneed`/`.onnx` is loaded — none of the model files + themselves were changed in this session. +5. **Panda safety alerts during park transition.** If panda logs a + "lost heartbeat" or drops to NOOUTPUT mode in the swap window, we + need controlsd_parked to issue a no-op carcontrol heartbeat to keep + panda happy. Not implemented in this session — flag for follow-up. + +### Open follow-up items (NOT done in this session) + +- **Cold-start latency on shift-from-Park.** Modeld load + calibration + warmup may produce a noticeable gap before lateral/long are ready. + Anticipatory wake on `CS.brakePressed && in_park` is the planned + mitigation if needed. +- **Methodically reintroduce optimizations.** Once baseline driving + feels right, the standstill optimizations (modeld 1fps, fan clamps, + etc.) can come back one at a time, each with a drive test. +- **The CAN-FD telemetry block in `selfdrive/car/hyundai/carstate.py`** + is preserved as a commented block at the bottom of `update_canfd()`. + Re-enabling requires uncommenting + restoring the `tlog` import at + the top of the file. + +--- + +## Key file index for fast navigation + +``` +selfdrive/controls/controlsd.py # full controlsd, restored to baseline + UI hooks re-added +selfdrive/controls/controlsd_parked.py # NEW: parked-only CAN listener +selfdrive/controls/clearpilot_state_control # in controlsd.py, ~line 1255 — 5-state ScreenDisplayMode + speed_state tick +selfdrive/thermald/thermald.py # gear-aware not_parked + IgnitionOn writer (~line 258) +selfdrive/manager/process_config.py # parked_only predicate + new entry +selfdrive/manager/manager.py # IgnitionOn seed in manager_init +common/params.cc # IgnitionOn registered (CLEAR_ON_MANAGER_START) +selfdrive/car/card.py # CarD class — used by both controlsd variants, unchanged +selfdrive/clearpilot/speed_logic.py # SpeedState class — unchanged, called from controlsd.clearpilot_state_control +selfdrive/car/hyundai/carstate.py # restored baseline + commented telemetry block at end of update_canfd +``` + +## Reproducing the diff per commit + +```bash +git show 47321e3 # baseline restore +git show f7e602c # UI hooks +git show 887b9c9 # parked mode +git diff 62a403d..887b9c9 # full session delta +``` diff --git a/sessions/2026-04-26-1143-gps-disable-and-calibrationd-stale/README.md b/sessions/2026-04-26-1143-gps-disable-and-calibrationd-stale/README.md new file mode 100644 index 0000000..b8d606e --- /dev/null +++ b/sessions/2026-04-26-1143-gps-disable-and-calibrationd-stale/README.md @@ -0,0 +1,358 @@ +# Session: 2026-04-26 — GPS disabled in locationd; calibrationd-still-stale notes + +## Context + +Followup session to `2026-04-26-0914-baseline-revert-and-parked-mode`. +After the baseline restore, the manager wouldn't start cleanly and the car +exhibited a "drifting right on straight roads, model rescues us mid-curve" +symptom. This session unblocked the startup chain end-to-end so the car can +boot and run, disabled GPS as an input to locationd (the actual fix that +made the drift go away), and pinned down — but did **not** solve — why +`liveCalibration.valid` is still stuck at `False` and what that latently +breaks downstream. + +Pre-session tip: `27cad05`. Single combined commit covering four code +changes plus this README: + +- `selfdrive/boardd/boardd.cc` — `safety_setter_thread` on ignition edge +- `selfdrive/controls/controlsd.py` — drop unregistered + `no_lat_lane_change` Params write, wire `FPCC.noLatLaneChange` for UI +- `cereal/services.py` — deviceState/managerState back to 2Hz to match + restored `DT_TRML` +- `selfdrive/locationd/locationd.cc` — ignore GPS as Kalman input + (reversible flag) + +--- + +## Commit 1: boardd — safety_setter_thread on ignition edge + +### Symptom + +Manager started fine but `controlsd_parked` blocked indefinitely at: + +``` +set_obd_multiplexing (selfdrive/car/fw_versions.py:236) + fingerprint (selfdrive/car/car_helpers.py:149) + get_car (selfdrive/car/car_helpers.py:210) + __init__ (selfdrive/car/card.py:60) + main (selfdrive/controls/controlsd_parked.py:41) +``` + +`set_obd_multiplexing` does `params.get_bool("ObdMultiplexingChanged", block=True)` — +it waits for **boardd's `safety_setter_thread`** to ack. That thread spawns +only on the **rising edge of `IsOnroad`** (`boardd.cc:476`). `IsOnroad` is set +by manager from the `started` flag (`helpers.py:49`). With the parked-mode split +from the prior session, `started` requires `not_parked`, which requires thermald +to see `carState.gearShifter != park`, which requires `controlsd_parked` to +publish carState — which it can't do until OBD multiplexing is acked. + +Classic deadlock: ignition rising no longer implies IsOnroad rising. + +### Fix + +`selfdrive/boardd/boardd.cc:476` — change the trigger from IsOnroad rising +edge to **ignition rising edge**. Adds `bool ignition_last = false;` next to +`is_onroad_last`, swaps the gate variable, sets `ignition_last = ignition;` +after. Restores stock openpilot's intent: set safety as soon as the bus is +alive. Both controlsd variants need it; the thread's phase 2 (waiting on +`ControlsReady`) is harmless in parked mode — it just sits. + +`is_onroad`/`is_onroad_last` left in place; they're now unused beyond this +gate but the read of `params.getBool("IsOnroad")` still happens, in case any +future logic wants it. + +--- + +## Commit 2: controlsd — drop unregistered Params write + +### Symptom + +After the boardd fix, controlsd_parked progressed but full controlsd +crashed at first `state_control` cycle: + +``` +File "selfdrive/controls/controlsd.py", line 693, in state_control + self.params_memory.put_bool("no_lat_lane_change", False) +common.params_pyx.UnknownKeyName: b'no_lat_lane_change' +``` + +The baseline-restored controlsd unconditionally writes `no_lat_lane_change` +to memory params on every state_control cycle. That key was never +registered in `common/params.cc`. Pre-revert (`62a403d`) controlsd didn't +write the param at all — it set `self.FPCC.noLatLaneChange` (capnp field). +The baseline brought back a code path the fork's params.cc never matched. + +### Fix + +In `selfdrive/controls/controlsd.py:687-694`, remove the two +`params_memory.put_bool("no_lat_lane_change", ...)` calls and replace with +`self.FPCC.noLatLaneChange = True/False` (matches what the kept UI code in +`onroad.cc:897` and `ui.cc:122` is reading from cereal). The +`frogpilot_variables.no_lat_lane_change` writes were already there — those +are what the kept Hyundai carcontroller actually reads at 100Hz. + +No actuator change. Pure plumbing. + +--- + +## Commit 3: services.py — restore deviceState/managerState rates + +### Symptom + +After fixing the controlsd crash, controlsd booted but immediately fired +**continuous `commIssue`** with: + +``` +"not_freq_ok": ["deviceState", "managerState"] +``` + +### Diagnosis + +`common/realtime.py` was reverted to `DT_TRML = 0.5` (thermald → 2Hz) in +the baseline restore. But `cereal/services.py` still declared `deviceState` +and `managerState` at 5Hz from earlier 4Hz-fan-control work. The freq window +is `[0.8 × min, 1.2 × max]` — declared 5Hz means [4.0, 6.0]Hz. Thermald +publishing at 2Hz fell well below 4.0 → freq_ok=False every cycle → +commIssue every cycle. + +Already documented as a known footgun in `CLAUDE.md` ("Changing a Service's +Publish Rate"). + +### Fix + +`cereal/services.py:33,73` — set both back to 2Hz with a comment pointing +at `DT_TRML`. After this, `not_freq_ok=[]` and the continuous commIssue +stopped — only one transient commIssue at startup remained (warmup). + +--- + +## Commit 4: locationd — ignore GPS as Kalman input (reversible) + +### Why + +The car was drifting right on straight roads. Pre-revert, the user +reported this had been working for years; the only practically-new thing +is that `system/clearpilot/gpsd.py` (AT-command-based GPS) had recently +**started actually getting fixes**, where for a long time it wasn't. + +Two concrete data problems with the GPS feed for selfdrive purposes: + +- `gpsd.py:221` hard-codes `gps.vNED = [0.0, 0.0, 0.0]` while the user is + moving 28 m/s. locationd's `handle_gps` derives `OBSERVATION_ECEF_VEL` + from this; the Kalman gets "GPS says you're stopped" while accelerometer + says otherwise. +- `gpsd.py:216,222` populate horizontalAccuracy/verticalAccuracy from + `hdop * 5` (a rough conversion); `bearingAccuracyDeg = 10.0` and + `speedAccuracy = 1.0` are constants. None of these match what a real + GNSS chip reports. + +These flow into the latcontrol_torque pipeline indirectly through +`liveLocationKalman.angularVelocityCalibrated` (used as +`actual_curvature_llk = ... / CS.vEgo` blended with steering-angle-derived +curvature) and through `liveLocationKalman.calibratedOrientationNED` +(pitch). When the Kalman has wrong velocity observations, those derived +fields go wrong — and on a straight crowned road, the controller's +"actual_curvature" picture is off-center, biasing torque output. + +### What the user does NOT want disabled + +gpsd publishes are still consumed by: +- UI speed indicator and the ClearPilot status overlay +- `dashcamd` for `.srt` subtitle sidecars (lat/lon per segment) +- `timed.py` for system-clock setting via `unixTimestampMillis` +- `gpsd.py` itself for sunset/sunrise → `IsDaylight` (auto night mode) +- `telemetryd.py` for the `gps` group in the CSV + +So we cannot just stop publishing — only locationd should ignore. + +### Fix + +`selfdrive/locationd/locationd.cc:310-323` — add a `clearpilot_disable_gps` +const at the top of `Localizer::handle_gps` and OR it into the existing +reject condition. With it true, every gpsLocation message falls through to +`determine_gps_mode(current_time)` (openpilot's stock no-GPS path: +`input_fake_gps_observations` once position uncertainty grows past +`SANE_GPS_UNCERTAINTY`, otherwise nothing). `last_gps_msg` never updates, +`is_gps_ok()` returns False, `liveLocationKalman.gpsOK = false`. + +Effect verified by user: drift improved noticeably. The torque controller +is no longer fed contradictory GPS-vs-IMU velocity observations through +the Kalman. + +To re-enable GPS as a Kalman input: flip the `clearpilot_disable_gps` +constant to `false` and rebuild. Self-contained edit. + +--- + +## Calibrationd: still stale, root-cause partially understood + +### What is happening + +`calibrationd` publishes `liveCalibration.valid = sm.all_checks()` on every +cycle, where `sm` polls `cameraOdometry` and non-polls `carState` and +`carParams`. We measured: **120 publishes in 30 seconds, every single one +`valid=False`** with `calStatus=calibrated, calPerc=100, validBlocks=50`. +The body of the calibration is converged — the validity flag is stuck off. + +### Why this matters + +`liveCalibration.valid=False` cascades: + +1. `locationd.cc:715` — `filterInitialized = sm.allAliveAndValid()`. + liveCalibration is in the sub list and not in `ignore_alive` / + `ignore_valid`. So filterInitialized stays False forever. +2. With Kalman uninitialized, `liveLocationKalman` still publishes but the + body fields are empty/default. `liveLocationKalman.status = uninitialized`. +3. `paramsd.py` subscribes to `liveLocationKalman` with `poll='liveLocationKalman'` + and gates its update logic on `sm.all_checks()`. When liveLocationKalman + itself isn't in a sane state, paramsd's `roll`, `angleOffsetDeg`, + `steerRatio`, `stiffnessFactor` either never converge or converge to bad + values. +4. `latcontrol_torque.py:135-136` uses `params.angleOffsetDeg` to compute + `actual_curvature_vm` and `params.roll` for `roll_compensation`. With + `roll=0`, no compensation for a crowned/banked road. With wrong + `angleOffsetDeg`, the closed-loop "actual curvature" measurement is + biased. + +So the latent risk is: even with our GPS fix, the controller is running +**without learned roll compensation and without a learned steering-angle +offset**. Symptom-free on straight, level pavement; biased on banked roads. + +### Why `valid=False` + +Inside calibrationd's SubMaster, `sm.all_checks() = all_alive AND all_freq_ok +AND all_valid`. We measured each: + +- `cameraOdometry`: alive=True, valid=True, freq_ok=True ✓ +- `carState`: alive flickers True/False, valid=True, **freq_ok=False (every cycle)** +- `carParams`: alive=True after first arrival, valid=True, freq_ok=False + but excluded from `all_freq_ok` because `_check_avg_freq` skips services + with `frequency < 0.99` Hz (carParams is 0.02 Hz declared) — so it doesn't + fail the gate. + +The smoking gun is **`carState.freq_ok = False` from inside calibrationd's +`poll='cameraOdometry'` SubMaster**. + +Direct measurement (`/tmp/test_subs.py`): + +| poll arg | carState observed rate | freq_ok | +|---|---:|:-:| +| `None` (all polled) | 97.40 Hz | True | +| `'cameraOdometry'` | **2.24 Hz** | **False** | +| `'carState'` | 98.58 Hz | True | + +carState is published at 100 Hz to a 10MB shared-memory MSGQ queue. From +inside `poll='cameraOdometry'`, our non-blocking `recv_one_or_none(carState)` +returns None ~88% of the time. `/tmp/diag_recv.py`: + +``` +cameraOdometry msgs received: 121 +carState recv calls: 121, hits: 14 (11.6% hit rate) +avg cycle duration: 50.0ms +``` + +So we're calling `recv` 20× per second on carState's queue, and finding +the queue empty 9 out of 10 times — even though carState is being published +at 100 Hz to that same queue with a `conflate=True` socket option. + +### The MSGQ NUM_READERS = 12 hypothesis + +`cereal/messaging/msgq.h:9` defines `#define NUM_READERS 12`. When a 13th +subscriber tries to subscribe to a queue, `msgq.cc:182-197` **invalidates +ALL readers simultaneously** (`*q->read_valids[i] = false` for all i) +to "reset and re-register". On the next read, an invalidated reader's +`msgq_msg_recv` jumps to `msgq_reset_reader` (line 347) and ends up with +`read_pointer == write_pointer` (caught up to current), returning size 0. + +Subscribers to `carState` in our running system include: controlsd, +plannerd, locationd, calibrationd, paramsd, dmonitoringd, frogpilot_process, +telemetryd, statsd. That's already nine. The introspection scripts +(`/tmp/check_*.py`, `/tmp/measure_freq.py`, `/tmp/diag_recv.py`, +`/tmp/cal_view.py`) and any UI-side subscribers add more, can easily +push past 12 and trigger global eviction. Once evicted, a long-running +subscriber stays in the "find empty queue / reset / try again" loop, which +is what we measured. + +This is **partially confirmed** but not proven definitively. The +investigation was paused before instrumenting the queue header to count +slot churn. The smoking-gun would be: print `q->num_readers` from inside +calibrationd at boot vs. during steady state and watch it tick up to 12+. + +### Things to consider for the actual fix + +1. **`7ee923b` already solved this exact problem.** It changed + calibrationd's publish to: + ```python + # was: calibrator.send_data(pm, sm.all_checks()) + # to: calibrator.send_data(pm, calibrator.cal_status == log.LiveCalibrationData.Status.calibrated) + ``` + The commit message documented the exact failure mode (cascade through + locationd uninitialized → paramsd steerRatio≈0 / stiffnessFactor≈0 → + nonsense curvature commands). It was reverted by `47321e3` as part of + "restore driving logic to pre-variable-fps baseline" — but that revert + was about isolating the drift cause, and the calibrationd change here + is *not* in the variable-FPS family. **Re-applying `7ee923b` is + probably the right next move**, narrowly scoped to calibrationd.py. +2. Less attractive alternatives: bump `NUM_READERS`; switch + carState to `poll=None` in calibrationd (more cycles per update, + higher CPU); add `ignore_average_freq=['carState']` to calibrationd's + SubMaster (treats freq glitches as benign, but keeps the cascade for + alive/valid). + +--- + +## Steer-fault alert investigation (separate symptom) + +User saw "Steering Temporarily Unavailable" alerts during a test drive +even though we hadn't touched lateral-control code. Captured in +`realdata/00000081--528e2aa03a--0/rlog`: + +- Faults occur in **brief 50–100 ms pulses** clustered while moving slowly + (`vEgo` 2.7–5.2 m/s ≈ 6–12 mph). +- Each pulse correlates with **large driver wheel torque** (-250, +273, + -187, +119 Nm) — i.e. the user actively turning the wheel during a + parking-lot maneuver. +- `cruise.enabled = False` throughout — openpilot was not engaged. +- The car's MDPS sets `LKA_FAULT` at low speeds when torque is high; that + bit maps directly to `cs_out.steerFaultTemporary` (`carstate.py:259`), + which fires `steerTempUnavailableSilent` regardless of engagement + (`ET.WARNING` displays unconditionally). + +User reports they've "never had this issue before" — implying earlier +ClearPilot revisions either gated the fault on speed or used a different +no-lateral path. **Not confirmed which.** Open follow-up. + +--- + +## Open follow-ups (ordered by likely return) + +1. **Re-apply `7ee923b`** — gate `liveCalibration.valid` on `calStatus`, + not `sm.all_checks()`. Unblocks locationd init → paramsd convergence → + real `params.roll` and `params.angleOffsetDeg` for the torque + controller. Latent benefit beyond what GPS-disable alone gave us. +2. **Investigate the persistent low-speed `steerTempUnavailable` alert.** + Either (a) gate `steerFaultTemporary` on `vEgo > ~8 m/s` in + `carstate.py:259`, or (b) find what the previous fork did — possibly + stopped sending tester CAN messages on park, possibly suppressed the + alert specifically during a park transition window. +3. **Suppress LKAS fault display when shifting drive → park.** The user + reports the car shows an LKAS-fault icon when openpilot keeps publishing + tester-present CAN messages after entering park. Investigation needed + in `selfdrive/car/hyundai/carcontroller.py` and `hyundaicanfd.py` to + gate tester messages on gear ≠ park. +4. **Wheel-torque headroom edit.** User mentioned a community-known edit + that allows slightly higher steering torque on the Hyundai panda safety + model. Research target: panda safety code for HYUNDAI_CANFD safety + model and the `MAX_TORQUE` / per-cycle delta limits. +5. **Single startup `commIssue` event.** Even with all our fixes, controlsd + logs one transient commIssue right after `controlsd.initialized` + (timeout=true after 6s). The `invalid` set at that moment is + downstream services still warming up (liveCalibration, liveLocationKalman, + liveParameters, liveTorqueParameters, frogpilotPlan, longitudinalPlan, + driverMonitoringState). Most should clear once the calibrationd issue + is fixed; remaining ones are normal warmup. +6. **gpsd.py vNED / accuracy fields.** Out of scope for this session + (we disabled GPS in locationd instead), but if GPS is ever re-enabled, + `gpsd.py:216,221-224` need real values: vNED from + `speed × {cos(bearing), sin(bearing), 0}`, and accuracy fields from + actual modem reports rather than hard-coded constants.