clearpilot: initial commit of full source

This commit is contained in:
2026-04-11 06:25:25 +00:00
commit e2a0c1894a
3383 changed files with 834683 additions and 0 deletions

0
selfdrive/thermald/__init__.py Executable file
View File

View File

@@ -0,0 +1,38 @@
#!/usr/bin/env python3
from abc import ABC, abstractmethod
from openpilot.common.realtime import DT_TRML
from openpilot.common.numpy_fast import interp
from openpilot.common.swaglog import cloudlog
from openpilot.selfdrive.controls.lib.pid import PIDController
class BaseFanController(ABC):
@abstractmethod
def update(self, cur_temp: float, ignition: bool) -> int:
pass
class TiciFanController(BaseFanController):
def __init__(self) -> None:
super().__init__()
cloudlog.info("Setting up TICI fan handler")
self.last_ignition = False
self.controller = PIDController(k_p=0, k_i=4e-3, k_f=1, rate=(1 / DT_TRML))
def update(self, cur_temp: float, ignition: bool) -> int:
self.controller.neg_limit = -(100 if ignition else 30)
self.controller.pos_limit = -(30 if ignition else 0)
if ignition != self.last_ignition:
self.controller.reset()
error = 70 - cur_temp
fan_pwr_out = -int(self.controller.update(
error=error,
feedforward=interp(cur_temp, [60.0, 100.0], [0, -100])
))
self.last_ignition = ignition
return fan_pwr_out

View File

@@ -0,0 +1,135 @@
import time
import threading
from openpilot.common.params import Params
from openpilot.system.hardware import HARDWARE
from openpilot.common.swaglog import cloudlog
from openpilot.selfdrive.statsd import statlog
CAR_VOLTAGE_LOW_PASS_K = 0.011 # LPF gain for 45s tau (dt/tau / (dt/tau + 1))
# While driving, a battery charges completely in about 30-60 minutes
CAR_BATTERY_CAPACITY_uWh = 30e6
CAR_CHARGING_RATE_W = 45
VBATT_PAUSE_CHARGING = 11.8 # Lower limit on the LPF car battery voltage
MAX_TIME_OFFROAD_S = 30*3600
MIN_ON_TIME_S = 3600
DELAY_SHUTDOWN_TIME_S = 300 # Wait at least DELAY_SHUTDOWN_TIME_S seconds after offroad_time to shutdown.
VOLTAGE_SHUTDOWN_MIN_OFFROAD_TIME_S = 60
class PowerMonitoring:
def __init__(self):
self.params = Params()
self.last_measurement_time = None # Used for integration delta
self.last_save_time = 0 # Used for saving current value in a param
self.power_used_uWh = 0 # Integrated power usage in uWh since going into offroad
self.next_pulsed_measurement_time = None
self.car_voltage_mV = 12e3 # Low-passed version of peripheralState voltage
self.car_voltage_instant_mV = 12e3 # Last value of peripheralState voltage
self.integration_lock = threading.Lock()
car_battery_capacity_uWh = self.params.get("CarBatteryCapacity")
if car_battery_capacity_uWh is None:
car_battery_capacity_uWh = 0
# Reset capacity if it's low
self.car_battery_capacity_uWh = max((CAR_BATTERY_CAPACITY_uWh / 10), int(car_battery_capacity_uWh))
# FrogPilot variables
device_management = self.params.get_bool("DeviceManagement")
device_shutdown_setting = self.params.get_int("DeviceShutdown") if device_management else 33
# If the toggle is set for < 1 hour, configure by 15 minute increments
self.device_shutdown_time = (device_shutdown_setting - 3) * 3600 if device_shutdown_setting >= 4 else device_shutdown_setting * (60 * 15)
self.low_voltage_shutdown = self.params.get_float("LowVoltageShutdown") if device_management else VBATT_PAUSE_CHARGING
# Calculation tick
def calculate(self, voltage: int | None, ignition: bool):
try:
now = time.monotonic()
# If peripheralState is None, we're probably not in a car, so we don't care
if voltage is None:
with self.integration_lock:
self.last_measurement_time = None
self.next_pulsed_measurement_time = None
self.power_used_uWh = 0
return
# Low-pass battery voltage
self.car_voltage_instant_mV = voltage
self.car_voltage_mV = ((voltage * CAR_VOLTAGE_LOW_PASS_K) + (self.car_voltage_mV * (1 - CAR_VOLTAGE_LOW_PASS_K)))
statlog.gauge("car_voltage", self.car_voltage_mV / 1e3)
# Cap the car battery power and save it in a param every 10-ish seconds
self.car_battery_capacity_uWh = max(self.car_battery_capacity_uWh, 0)
self.car_battery_capacity_uWh = min(self.car_battery_capacity_uWh, CAR_BATTERY_CAPACITY_uWh)
if now - self.last_save_time >= 10:
self.params.put_nonblocking("CarBatteryCapacity", str(int(self.car_battery_capacity_uWh)))
self.last_save_time = now
# First measurement, set integration time
with self.integration_lock:
if self.last_measurement_time is None:
self.last_measurement_time = now
return
if ignition:
# If there is ignition, we integrate the charging rate of the car
with self.integration_lock:
self.power_used_uWh = 0
integration_time_h = (now - self.last_measurement_time) / 3600
if integration_time_h < 0:
raise ValueError(f"Negative integration time: {integration_time_h}h")
self.car_battery_capacity_uWh += (CAR_CHARGING_RATE_W * 1e6 * integration_time_h)
self.last_measurement_time = now
else:
# Get current power draw somehow
current_power = HARDWARE.get_current_power_draw()
# Do the integration
self._perform_integration(now, current_power)
except Exception:
cloudlog.exception("Power monitoring calculation failed")
def _perform_integration(self, t: float, current_power: float) -> None:
with self.integration_lock:
try:
if self.last_measurement_time:
integration_time_h = (t - self.last_measurement_time) / 3600
power_used = (current_power * 1000000) * integration_time_h
if power_used < 0:
raise ValueError(f"Negative power used! Integration time: {integration_time_h} h Current Power: {power_used} uWh")
self.power_used_uWh += power_used
self.car_battery_capacity_uWh -= power_used
self.last_measurement_time = t
except Exception:
cloudlog.exception("Integration failed")
# Get the power usage
def get_power_used(self) -> int:
return int(self.power_used_uWh)
def get_car_battery_capacity(self) -> int:
return int(self.car_battery_capacity_uWh)
# See if we need to shutdown
def should_shutdown(self, ignition: bool, in_car: bool, offroad_timestamp: float | None, started_seen: bool):
if offroad_timestamp is None:
return False
now = time.monotonic()
should_shutdown = False
offroad_time = (now - offroad_timestamp)
low_voltage_shutdown = (self.car_voltage_mV < (self.low_voltage_shutdown * 1e3) and
offroad_time > VOLTAGE_SHUTDOWN_MIN_OFFROAD_TIME_S)
should_shutdown |= offroad_time > self.device_shutdown_time
should_shutdown |= low_voltage_shutdown
should_shutdown |= (self.car_battery_capacity_uWh <= 0)
should_shutdown &= not ignition
should_shutdown &= (not self.params.get_bool("DisablePowerDown"))
should_shutdown &= in_car
should_shutdown &= offroad_time > DELAY_SHUTDOWN_TIME_S
should_shutdown |= self.params.get_bool("ForcePowerDown")
should_shutdown &= started_seen or (now > MIN_ON_TIME_S)
return should_shutdown

View File

View File

@@ -0,0 +1,56 @@
#!/usr/bin/env python3
import unittest
from unittest.mock import Mock, patch
from parameterized import parameterized
from openpilot.selfdrive.thermald.fan_controller import TiciFanController
ALL_CONTROLLERS = [(TiciFanController,)]
def patched_controller(controller_class):
with patch("os.system", new=Mock()):
return controller_class()
class TestFanController(unittest.TestCase):
def wind_up(self, controller, ignition=True):
for _ in range(1000):
controller.update(100, ignition)
def wind_down(self, controller, ignition=False):
for _ in range(1000):
controller.update(10, ignition)
@parameterized.expand(ALL_CONTROLLERS)
def test_hot_onroad(self, controller_class):
controller = patched_controller(controller_class)
self.wind_up(controller)
self.assertGreaterEqual(controller.update(100, True), 70)
@parameterized.expand(ALL_CONTROLLERS)
def test_offroad_limits(self, controller_class):
controller = patched_controller(controller_class)
self.wind_up(controller)
self.assertLessEqual(controller.update(100, False), 30)
@parameterized.expand(ALL_CONTROLLERS)
def test_no_fan_wear(self, controller_class):
controller = patched_controller(controller_class)
self.wind_down(controller)
self.assertEqual(controller.update(10, False), 0)
@parameterized.expand(ALL_CONTROLLERS)
def test_limited(self, controller_class):
controller = patched_controller(controller_class)
self.wind_up(controller, True)
self.assertEqual(controller.update(100, True), 100)
@parameterized.expand(ALL_CONTROLLERS)
def test_windup_speed(self, controller_class):
controller = patched_controller(controller_class)
self.wind_down(controller, True)
for _ in range(10):
controller.update(90, True)
self.assertGreaterEqual(controller.update(90, True), 60)
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,200 @@
#!/usr/bin/env python3
import unittest
from unittest.mock import patch
from openpilot.common.params import Params
from openpilot.selfdrive.thermald.power_monitoring import PowerMonitoring, CAR_BATTERY_CAPACITY_uWh, \
CAR_CHARGING_RATE_W, VBATT_PAUSE_CHARGING, DELAY_SHUTDOWN_TIME_S
# Create fake time
ssb = 0.
def mock_time_monotonic():
global ssb
ssb += 1.
return ssb
TEST_DURATION_S = 50
GOOD_VOLTAGE = 12 * 1e3
VOLTAGE_BELOW_PAUSE_CHARGING = (VBATT_PAUSE_CHARGING - 1) * 1e3
def pm_patch(name, value, constant=False):
if constant:
return patch(f"openpilot.selfdrive.thermald.power_monitoring.{name}", value)
return patch(f"openpilot.selfdrive.thermald.power_monitoring.{name}", return_value=value)
@patch("time.monotonic", new=mock_time_monotonic)
class TestPowerMonitoring(unittest.TestCase):
def setUp(self):
self.params = Params()
# Test to see that it doesn't do anything when pandaState is None
def test_pandaState_present(self):
pm = PowerMonitoring()
for _ in range(10):
pm.calculate(None, None)
self.assertEqual(pm.get_power_used(), 0)
self.assertEqual(pm.get_car_battery_capacity(), (CAR_BATTERY_CAPACITY_uWh / 10))
# Test to see that it doesn't integrate offroad when ignition is True
def test_offroad_ignition(self):
pm = PowerMonitoring()
for _ in range(10):
pm.calculate(GOOD_VOLTAGE, True)
self.assertEqual(pm.get_power_used(), 0)
# Test to see that it integrates with discharging battery
def test_offroad_integration_discharging(self):
POWER_DRAW = 4
with pm_patch("HARDWARE.get_current_power_draw", POWER_DRAW):
pm = PowerMonitoring()
for _ in range(TEST_DURATION_S + 1):
pm.calculate(GOOD_VOLTAGE, False)
expected_power_usage = ((TEST_DURATION_S/3600) * POWER_DRAW * 1e6)
self.assertLess(abs(pm.get_power_used() - expected_power_usage), 10)
# Test to check positive integration of car_battery_capacity
def test_car_battery_integration_onroad(self):
POWER_DRAW = 4
with pm_patch("HARDWARE.get_current_power_draw", POWER_DRAW):
pm = PowerMonitoring()
pm.car_battery_capacity_uWh = 0
for _ in range(TEST_DURATION_S + 1):
pm.calculate(GOOD_VOLTAGE, True)
expected_capacity = ((TEST_DURATION_S/3600) * CAR_CHARGING_RATE_W * 1e6)
self.assertLess(abs(pm.get_car_battery_capacity() - expected_capacity), 10)
# Test to check positive integration upper limit
def test_car_battery_integration_upper_limit(self):
POWER_DRAW = 4
with pm_patch("HARDWARE.get_current_power_draw", POWER_DRAW):
pm = PowerMonitoring()
pm.car_battery_capacity_uWh = CAR_BATTERY_CAPACITY_uWh - 1000
for _ in range(TEST_DURATION_S + 1):
pm.calculate(GOOD_VOLTAGE, True)
estimated_capacity = CAR_BATTERY_CAPACITY_uWh + (CAR_CHARGING_RATE_W / 3600 * 1e6)
self.assertLess(abs(pm.get_car_battery_capacity() - estimated_capacity), 10)
# Test to check negative integration of car_battery_capacity
def test_car_battery_integration_offroad(self):
POWER_DRAW = 4
with pm_patch("HARDWARE.get_current_power_draw", POWER_DRAW):
pm = PowerMonitoring()
pm.car_battery_capacity_uWh = CAR_BATTERY_CAPACITY_uWh
for _ in range(TEST_DURATION_S + 1):
pm.calculate(GOOD_VOLTAGE, False)
expected_capacity = CAR_BATTERY_CAPACITY_uWh - ((TEST_DURATION_S/3600) * POWER_DRAW * 1e6)
self.assertLess(abs(pm.get_car_battery_capacity() - expected_capacity), 10)
# Test to check negative integration lower limit
def test_car_battery_integration_lower_limit(self):
POWER_DRAW = 4
with pm_patch("HARDWARE.get_current_power_draw", POWER_DRAW):
pm = PowerMonitoring()
pm.car_battery_capacity_uWh = 1000
for _ in range(TEST_DURATION_S + 1):
pm.calculate(GOOD_VOLTAGE, False)
estimated_capacity = 0 - ((1/3600) * POWER_DRAW * 1e6)
self.assertLess(abs(pm.get_car_battery_capacity() - estimated_capacity), 10)
# Test to check policy of stopping charging after MAX_TIME_OFFROAD_S
def test_max_time_offroad(self):
MOCKED_MAX_OFFROAD_TIME = 3600
POWER_DRAW = 0 # To stop shutting down for other reasons
with pm_patch("MAX_TIME_OFFROAD_S", MOCKED_MAX_OFFROAD_TIME, constant=True), pm_patch("HARDWARE.get_current_power_draw", POWER_DRAW):
pm = PowerMonitoring()
pm.car_battery_capacity_uWh = CAR_BATTERY_CAPACITY_uWh
start_time = ssb
ignition = False
while ssb <= start_time + MOCKED_MAX_OFFROAD_TIME:
pm.calculate(GOOD_VOLTAGE, ignition)
if (ssb - start_time) % 1000 == 0 and ssb < start_time + MOCKED_MAX_OFFROAD_TIME:
self.assertFalse(pm.should_shutdown(ignition, True, start_time, False))
self.assertTrue(pm.should_shutdown(ignition, True, start_time, False))
def test_car_voltage(self):
POWER_DRAW = 0 # To stop shutting down for other reasons
TEST_TIME = 350
VOLTAGE_SHUTDOWN_MIN_OFFROAD_TIME_S = 50
with pm_patch("VOLTAGE_SHUTDOWN_MIN_OFFROAD_TIME_S", VOLTAGE_SHUTDOWN_MIN_OFFROAD_TIME_S, constant=True), \
pm_patch("HARDWARE.get_current_power_draw", POWER_DRAW):
pm = PowerMonitoring()
pm.car_battery_capacity_uWh = CAR_BATTERY_CAPACITY_uWh
ignition = False
start_time = ssb
for i in range(TEST_TIME):
pm.calculate(VOLTAGE_BELOW_PAUSE_CHARGING, ignition)
if i % 10 == 0:
self.assertEqual(pm.should_shutdown(ignition, True, start_time, True),
(pm.car_voltage_mV < VBATT_PAUSE_CHARGING * 1e3 and
(ssb - start_time) > VOLTAGE_SHUTDOWN_MIN_OFFROAD_TIME_S and
(ssb - start_time) > DELAY_SHUTDOWN_TIME_S))
self.assertTrue(pm.should_shutdown(ignition, True, start_time, True))
# Test to check policy of not stopping charging when DisablePowerDown is set
def test_disable_power_down(self):
POWER_DRAW = 0 # To stop shutting down for other reasons
TEST_TIME = 100
self.params.put_bool("DisablePowerDown", True)
with pm_patch("HARDWARE.get_current_power_draw", POWER_DRAW):
pm = PowerMonitoring()
pm.car_battery_capacity_uWh = CAR_BATTERY_CAPACITY_uWh
ignition = False
for i in range(TEST_TIME):
pm.calculate(VOLTAGE_BELOW_PAUSE_CHARGING, ignition)
if i % 10 == 0:
self.assertFalse(pm.should_shutdown(ignition, True, ssb, False))
self.assertFalse(pm.should_shutdown(ignition, True, ssb, False))
# Test to check policy of not stopping charging when ignition
def test_ignition(self):
POWER_DRAW = 0 # To stop shutting down for other reasons
TEST_TIME = 100
with pm_patch("HARDWARE.get_current_power_draw", POWER_DRAW):
pm = PowerMonitoring()
pm.car_battery_capacity_uWh = CAR_BATTERY_CAPACITY_uWh
ignition = True
for i in range(TEST_TIME):
pm.calculate(VOLTAGE_BELOW_PAUSE_CHARGING, ignition)
if i % 10 == 0:
self.assertFalse(pm.should_shutdown(ignition, True, ssb, False))
self.assertFalse(pm.should_shutdown(ignition, True, ssb, False))
# Test to check policy of not stopping charging when harness is not connected
def test_harness_connection(self):
POWER_DRAW = 0 # To stop shutting down for other reasons
TEST_TIME = 100
with pm_patch("HARDWARE.get_current_power_draw", POWER_DRAW):
pm = PowerMonitoring()
pm.car_battery_capacity_uWh = CAR_BATTERY_CAPACITY_uWh
ignition = False
for i in range(TEST_TIME):
pm.calculate(VOLTAGE_BELOW_PAUSE_CHARGING, ignition)
if i % 10 == 0:
self.assertFalse(pm.should_shutdown(ignition, False, ssb, False))
self.assertFalse(pm.should_shutdown(ignition, False, ssb, False))
def test_delay_shutdown_time(self):
pm = PowerMonitoring()
pm.car_battery_capacity_uWh = 0
ignition = False
in_car = True
offroad_timestamp = ssb
started_seen = True
pm.calculate(VOLTAGE_BELOW_PAUSE_CHARGING, ignition)
while ssb < offroad_timestamp + DELAY_SHUTDOWN_TIME_S:
self.assertFalse(pm.should_shutdown(ignition, in_car,
offroad_timestamp,
started_seen),
f"Should not shutdown before {DELAY_SHUTDOWN_TIME_S} seconds offroad time")
self.assertTrue(pm.should_shutdown(ignition, in_car,
offroad_timestamp,
started_seen),
f"Should shutdown after {DELAY_SHUTDOWN_TIME_S} seconds offroad time")
if __name__ == "__main__":
unittest.main()

496
selfdrive/thermald/thermald.py Executable file
View File

@@ -0,0 +1,496 @@
#!/usr/bin/env python3
import os
import json
import queue
import threading
import time
from collections import OrderedDict, namedtuple
from pathlib import Path
import psutil
import cereal.messaging as messaging
from cereal import log
from cereal.services import SERVICE_LIST
from openpilot.common.dict_helpers import strip_deprecated_keys
from openpilot.common.filter_simple import FirstOrderFilter
from openpilot.common.params import Params
from openpilot.common.realtime import DT_TRML
from openpilot.selfdrive.controls.lib.alertmanager import set_offroad_alert
from openpilot.system.hardware import HARDWARE, TICI, AGNOS
from openpilot.system.loggerd.config import get_available_bytes, get_available_percent, get_used_bytes
from openpilot.selfdrive.statsd import statlog
from openpilot.common.swaglog import cloudlog
from openpilot.selfdrive.thermald.power_monitoring import PowerMonitoring
from openpilot.selfdrive.thermald.fan_controller import TiciFanController
from openpilot.system.version import terms_version, training_version
ThermalStatus = log.DeviceState.ThermalStatus
NetworkType = log.DeviceState.NetworkType
NetworkStrength = log.DeviceState.NetworkStrength
CURRENT_TAU = 15. # 15s time constant
TEMP_TAU = 5. # 5s time constant
DISCONNECT_TIMEOUT = 5. # wait 5 seconds before going offroad after disconnect so you get an alert
PANDA_STATES_TIMEOUT = round(1000 / SERVICE_LIST['pandaStates'].frequency * 1.5) # 1.5x the expected pandaState frequency
ThermalBand = namedtuple("ThermalBand", ['min_temp', 'max_temp'])
HardwareState = namedtuple("HardwareState", ['network_type', 'network_info', 'network_strength', 'network_stats',
'network_metered', 'nvme_temps', 'modem_temps'])
# List of thermal bands. We will stay within this region as long as we are within the bounds.
# When exiting the bounds, we'll jump to the lower or higher band. Bands are ordered in the dict.
THERMAL_BANDS = OrderedDict({
ThermalStatus.green: ThermalBand(None, 80.0),
ThermalStatus.yellow: ThermalBand(75.0, 96.0),
ThermalStatus.red: ThermalBand(88.0, 107.),
ThermalStatus.danger: ThermalBand(94.0, None),
})
# Override to highest thermal band when offroad and above this temp
OFFROAD_DANGER_TEMP = 75
prev_offroad_states: dict[str, tuple[bool, str | None]] = {}
tz_by_type: dict[str, int] | None = None
def populate_tz_by_type():
global tz_by_type
tz_by_type = {}
for n in os.listdir("/sys/devices/virtual/thermal"):
if not n.startswith("thermal_zone"):
continue
with open(os.path.join("/sys/devices/virtual/thermal", n, "type")) as f:
tz_by_type[f.read().strip()] = int(n.removeprefix("thermal_zone"))
def read_tz(x):
if x is None:
return 0
if isinstance(x, str):
if tz_by_type is None:
populate_tz_by_type()
x = tz_by_type[x]
try:
with open(f"/sys/devices/virtual/thermal/thermal_zone{x}/temp") as f:
return int(f.read())
except FileNotFoundError:
return 0
def read_thermal(thermal_config):
dat = messaging.new_message('deviceState', valid=True)
dat.deviceState.cpuTempC = [read_tz(z) / thermal_config.cpu[1] for z in thermal_config.cpu[0]]
dat.deviceState.gpuTempC = [read_tz(z) / thermal_config.gpu[1] for z in thermal_config.gpu[0]]
dat.deviceState.memoryTempC = read_tz(thermal_config.mem[0]) / thermal_config.mem[1]
dat.deviceState.pmicTempC = [read_tz(z) / thermal_config.pmic[1] for z in thermal_config.pmic[0]]
return dat
def set_offroad_alert_if_changed(offroad_alert: str, show_alert: bool, extra_text: str | None=None):
if prev_offroad_states.get(offroad_alert, None) == (show_alert, extra_text):
return
prev_offroad_states[offroad_alert] = (show_alert, extra_text)
set_offroad_alert(offroad_alert, show_alert, extra_text)
def hw_state_thread(end_event, hw_queue):
"""Handles non critical hardware state, and sends over queue"""
count = 0
prev_hw_state = None
modem_version = None
modem_nv = None
modem_configured = False
modem_restarted = False
modem_missing_count = 0
while not end_event.is_set():
# these are expensive calls. update every 10s
if (count % int(10. / DT_TRML)) == 0:
try:
network_type = HARDWARE.get_network_type()
modem_temps = HARDWARE.get_modem_temperatures()
if len(modem_temps) == 0 and prev_hw_state is not None:
modem_temps = prev_hw_state.modem_temps
# Log modem version once
if AGNOS and ((modem_version is None) or (modem_nv is None)):
modem_version = HARDWARE.get_modem_version()
modem_nv = HARDWARE.get_modem_nv()
if (modem_version is not None) and (modem_nv is not None):
cloudlog.event("modem version", version=modem_version, nv=modem_nv)
else:
if not modem_restarted:
# TODO: we may be able to remove this with a MM update
# ModemManager's probing on startup can fail
# rarely, restart the service to probe again.
modem_missing_count += 1
if modem_missing_count > 3:
modem_restarted = True
cloudlog.event("restarting ModemManager")
os.system("sudo systemctl restart --no-block ModemManager")
tx, rx = HARDWARE.get_modem_data_usage()
hw_state = HardwareState(
network_type=network_type,
network_info=HARDWARE.get_network_info(),
network_strength=HARDWARE.get_network_strength(network_type),
network_stats={'wwanTx': tx, 'wwanRx': rx},
network_metered=HARDWARE.get_network_metered(network_type),
nvme_temps=HARDWARE.get_nvme_temperatures(),
modem_temps=modem_temps,
)
try:
hw_queue.put_nowait(hw_state)
except queue.Full:
pass
# TODO: remove this once the config is in AGNOS
if not modem_configured and len(HARDWARE.get_sim_info().get('sim_id', '')) > 0:
cloudlog.warning("configuring modem")
HARDWARE.configure_modem()
modem_configured = True
prev_hw_state = hw_state
except Exception:
cloudlog.exception("Error getting hardware state")
count += 1
time.sleep(DT_TRML)
def thermald_thread(end_event, hw_queue) -> None:
pm = messaging.PubMaster(['deviceState', 'frogpilotDeviceState'])
sm = messaging.SubMaster(["peripheralState", "gpsLocationExternal", "controlsState", "pandaStates"], poll="pandaStates")
count = 0
onroad_conditions: dict[str, bool] = {
"ignition": False,
}
startup_conditions: dict[str, bool] = {}
startup_conditions_prev: dict[str, bool] = {}
off_ts: float | None = None
started_ts: float | None = None
started_seen = False
startup_blocked_ts: float | None = None
thermal_status = ThermalStatus.yellow
last_hw_state = HardwareState(
network_type=NetworkType.none,
network_info=None,
network_metered=False,
network_strength=NetworkStrength.unknown,
network_stats={'wwanTx': -1, 'wwanRx': -1},
nvme_temps=[],
modem_temps=[],
)
all_temp_filter = FirstOrderFilter(0., TEMP_TAU, DT_TRML, initialized=False)
offroad_temp_filter = FirstOrderFilter(0., TEMP_TAU, DT_TRML, initialized=False)
should_start_prev = False
in_car = False
engaged_prev = False
params = Params()
power_monitor = PowerMonitoring()
HARDWARE.initialize_hardware()
thermal_config = HARDWARE.get_thermal_config()
fan_controller = None
# FrogPilot variables
device_management = params.get_bool("DeviceManagement")
offline_mode = device_management and params.get_bool("OfflineMode")
# For testing a temporary overtemp notice
# set to 20 to show
test_temp_count = 0
while not end_event.is_set():
sm.update(PANDA_STATES_TIMEOUT)
# Run at 2Hz
if sm.frame % round(SERVICE_LIST['pandaStates'].frequency * DT_TRML) != 0:
continue
pandaStates = sm['pandaStates']
peripheralState = sm['peripheralState']
peripheral_panda_present = peripheralState.pandaType != log.PandaState.PandaType.unknown
msg = read_thermal(thermal_config)
msg.deviceState.deviceType = HARDWARE.get_device_type()
if sm.updated['pandaStates'] and len(pandaStates) > 0:
# Set ignition based on any panda connected
onroad_conditions["ignition"] = any(ps.ignitionLine or ps.ignitionCan for ps in pandaStates if ps.pandaType != log.PandaState.PandaType.unknown)
pandaState = pandaStates[0]
in_car = pandaState.harnessStatus != log.PandaState.HarnessStatus.notConnected
# Setup fan handler on first connect to panda
if fan_controller is None and peripheral_panda_present:
if TICI:
fan_controller = TiciFanController()
elif (time.monotonic() - sm.recv_time['pandaStates']) > DISCONNECT_TIMEOUT:
if onroad_conditions["ignition"]:
onroad_conditions["ignition"] = False
cloudlog.error("panda timed out onroad")
try:
last_hw_state = hw_queue.get_nowait()
except queue.Empty:
pass
fpmsg = messaging.new_message('frogpilotDeviceState')
fpmsg.frogpilotDeviceState.freeSpace = round(get_available_bytes(default=32.0 * (2 ** 30)) / (2 ** 30))
fpmsg.frogpilotDeviceState.usedSpace = round(get_used_bytes(default=0.0 * (2 ** 30)) / (2 ** 30))
pm.send("frogpilotDeviceState", fpmsg)
msg.deviceState.freeSpacePercent = get_available_percent(default=100.0)
msg.deviceState.memoryUsagePercent = int(round(psutil.virtual_memory().percent))
msg.deviceState.gpuUsagePercent = int(round(HARDWARE.get_gpu_usage_percent()))
online_cpu_usage = [int(round(n)) for n in psutil.cpu_percent(percpu=True)]
offline_cpu_usage = [0., ] * (len(msg.deviceState.cpuTempC) - len(online_cpu_usage))
msg.deviceState.cpuUsagePercent = online_cpu_usage + offline_cpu_usage
msg.deviceState.networkType = last_hw_state.network_type
msg.deviceState.networkMetered = last_hw_state.network_metered
msg.deviceState.networkStrength = last_hw_state.network_strength
msg.deviceState.networkStats = last_hw_state.network_stats
if last_hw_state.network_info is not None:
msg.deviceState.networkInfo = last_hw_state.network_info
msg.deviceState.nvmeTempC = last_hw_state.nvme_temps
msg.deviceState.modemTempC = last_hw_state.modem_temps
msg.deviceState.screenBrightnessPercent = HARDWARE.get_screen_brightness()
# this subset is only used for offroad
temp_sources = [
msg.deviceState.memoryTempC,
max(msg.deviceState.cpuTempC),
max(msg.deviceState.gpuTempC),
]
offroad_comp_temp = offroad_temp_filter.update(max(temp_sources))
# this drives the thermal status while onroad
temp_sources.append(max(msg.deviceState.pmicTempC))
all_comp_temp = all_temp_filter.update(max(temp_sources))
msg.deviceState.maxTempC = all_comp_temp
if fan_controller is not None:
msg.deviceState.fanSpeedPercentDesired = fan_controller.update(all_comp_temp, onroad_conditions["ignition"])
is_offroad_for_5_min = (started_ts is None) and ((not started_seen) or (off_ts is None) or (time.monotonic() - off_ts > 60 * 5))
if is_offroad_for_5_min and offroad_comp_temp > OFFROAD_DANGER_TEMP:
# if device is offroad and already hot without the extra onroad load,
# we want to cool down first before increasing load
thermal_status = ThermalStatus.danger
else:
current_band = THERMAL_BANDS[thermal_status]
band_idx = list(THERMAL_BANDS.keys()).index(thermal_status)
if current_band.min_temp is not None and all_comp_temp < current_band.min_temp:
thermal_status = list(THERMAL_BANDS.keys())[band_idx - 1]
elif current_band.max_temp is not None and all_comp_temp > current_band.max_temp:
thermal_status = list(THERMAL_BANDS.keys())[band_idx + 1]
# **** starting logic ****
startup_conditions["up_to_date"] = params.get("Offroad_ConnectivityNeeded") is None or params.get_bool("DisableUpdates") or params.get_bool("SnoozeUpdate") or offline_mode
startup_conditions["not_uninstalling"] = not params.get_bool("DoUninstall")
startup_conditions["accepted_terms"] = params.get("HasAcceptedTerms") == terms_version
# with 2% left, we killall, otherwise the phone will take a long time to boot
startup_conditions["free_space"] = msg.deviceState.freeSpacePercent > 2
startup_conditions["completed_training"] = params.get("CompletedTrainingVersion") == training_version
startup_conditions["not_driver_view"] = not params.get_bool("IsDriverViewEnabled")
startup_conditions["not_taking_snapshot"] = not params.get_bool("IsTakingSnapshot")
# must be at an engageable thermal band to go onroad
startup_conditions["device_temp_engageable"] = thermal_status < ThermalStatus.red
# ensure device is fully booted
startup_conditions["device_booted"] = startup_conditions.get("device_booted", False) or HARDWARE.booted()
# if the temperature enters the danger zone, go offroad to cool down
onroad_conditions["device_temp_good"] = thermal_status < ThermalStatus.danger
extra_text = f"{offroad_comp_temp:.1f}C"
show_alert = (not onroad_conditions["device_temp_good"] or not startup_conditions["device_temp_engageable"]) and onroad_conditions["ignition"]
# Test temperature warning
if test_temp_count > 0:
show_alert = True
test_temp_count = test_temp_count - 1
print (test_temp_count)
set_offroad_alert_if_changed("Offroad_TemperatureTooHigh", show_alert, extra_text=extra_text)
# TODO: this should move to TICI.initialize_hardware, but we currently can't import params there
if TICI:
if not os.path.isfile("/persist/comma/living-in-the-moment"):
if not Path("/data/media").is_mount():
set_offroad_alert_if_changed("Offroad_StorageMissing", True)
else:
# check for bad NVMe
try:
with open("/sys/block/nvme0n1/device/model") as f:
model = f.read().strip()
if not model.startswith("Samsung SSD 980") and params.get("Offroad_BadNvme") is None:
set_offroad_alert_if_changed("Offroad_BadNvme", True)
cloudlog.event("Unsupported NVMe", model=model, error=True)
except Exception:
pass
# Handle offroad/onroad transition
should_start = all(onroad_conditions.values())
if started_ts is None:
should_start = should_start and all(startup_conditions.values())
if should_start != should_start_prev or (count == 0):
params.put_bool("IsEngaged", False)
engaged_prev = False
HARDWARE.set_power_save(not should_start)
if sm.updated['controlsState']:
engaged = sm['controlsState'].enabled
if engaged != engaged_prev:
params.put_bool("IsEngaged", engaged)
engaged_prev = engaged
try:
with open('/dev/kmsg', 'w') as kmsg:
kmsg.write(f"<3>[thermald] engaged: {engaged}\n")
except Exception:
pass
if should_start:
off_ts = None
if started_ts is None:
started_ts = time.monotonic()
started_seen = True
if startup_blocked_ts is not None:
cloudlog.event("Startup after block", block_duration=(time.monotonic() - startup_blocked_ts),
startup_conditions=startup_conditions, onroad_conditions=onroad_conditions,
startup_conditions_prev=startup_conditions_prev, error=True)
startup_blocked_ts = None
else:
if onroad_conditions["ignition"] and (startup_conditions != startup_conditions_prev):
cloudlog.event("Startup blocked", startup_conditions=startup_conditions, onroad_conditions=onroad_conditions, error=True)
startup_conditions_prev = startup_conditions.copy()
startup_blocked_ts = time.monotonic()
started_ts = None
if off_ts is None:
off_ts = time.monotonic()
# Offroad power monitoring
voltage = None if peripheralState.pandaType == log.PandaState.PandaType.unknown else peripheralState.voltage
power_monitor.calculate(voltage, onroad_conditions["ignition"])
msg.deviceState.offroadPowerUsageUwh = power_monitor.get_power_used()
msg.deviceState.carBatteryCapacityUwh = max(0, power_monitor.get_car_battery_capacity())
current_power_draw = HARDWARE.get_current_power_draw()
statlog.sample("power_draw", current_power_draw)
msg.deviceState.powerDrawW = current_power_draw
som_power_draw = HARDWARE.get_som_power_draw()
statlog.sample("som_power_draw", som_power_draw)
msg.deviceState.somPowerDrawW = som_power_draw
# Check if we need to shut down
if power_monitor.should_shutdown(onroad_conditions["ignition"], in_car, off_ts, started_seen):
cloudlog.warning(f"shutting device down, offroad since {off_ts}")
params.put_bool("DoShutdown", True)
msg.deviceState.started = started_ts is not None
msg.deviceState.startedMonoTime = int(1e9*(started_ts or 0))
last_ping = params.get("LastAthenaPingTime")
if last_ping is not None:
msg.deviceState.lastAthenaPingTime = int(last_ping)
msg.deviceState.thermalStatus = thermal_status
pm.send("deviceState", msg)
# Log to statsd
statlog.gauge("free_space_percent", msg.deviceState.freeSpacePercent)
statlog.gauge("gpu_usage_percent", msg.deviceState.gpuUsagePercent)
statlog.gauge("memory_usage_percent", msg.deviceState.memoryUsagePercent)
for i, usage in enumerate(msg.deviceState.cpuUsagePercent):
statlog.gauge(f"cpu{i}_usage_percent", usage)
for i, temp in enumerate(msg.deviceState.cpuTempC):
statlog.gauge(f"cpu{i}_temperature", temp)
for i, temp in enumerate(msg.deviceState.gpuTempC):
statlog.gauge(f"gpu{i}_temperature", temp)
statlog.gauge("memory_temperature", msg.deviceState.memoryTempC)
for i, temp in enumerate(msg.deviceState.pmicTempC):
statlog.gauge(f"pmic{i}_temperature", temp)
for i, temp in enumerate(last_hw_state.nvme_temps):
statlog.gauge(f"nvme_temperature{i}", temp)
for i, temp in enumerate(last_hw_state.modem_temps):
statlog.gauge(f"modem_temperature{i}", temp)
statlog.gauge("fan_speed_percent_desired", msg.deviceState.fanSpeedPercentDesired)
statlog.gauge("screen_brightness_percent", msg.deviceState.screenBrightnessPercent)
# # report to server once every 10 minutes
# rising_edge_started = should_start and not should_start_prev
# if rising_edge_started or (count % int(600. / DT_TRML)) == 0:
# dat = {
# 'count': count,
# 'pandaStates': [strip_deprecated_keys(p.to_dict()) for p in pandaStates],
# 'peripheralState': strip_deprecated_keys(peripheralState.to_dict()),
# 'location': (strip_deprecated_keys(sm["gpsLocationExternal"].to_dict()) if sm.alive["gpsLocationExternal"] else None),
# 'deviceState': strip_deprecated_keys(msg.to_dict())
# }
# cloudlog.event("STATUS_PACKET", **dat)
# # save last one before going onroad
# if rising_edge_started:
# try:
# params.put("LastOffroadStatusPacket", json.dumps(dat))
# except Exception:
# cloudlog.exception("failed to save offroad status")
params.put_bool_nonblocking("NetworkMetered", msg.deviceState.networkMetered)
count += 1
should_start_prev = should_start
def main():
hw_queue = queue.Queue(maxsize=1)
end_event = threading.Event()
threads = [
threading.Thread(target=hw_state_thread, args=(end_event, hw_queue)),
threading.Thread(target=thermald_thread, args=(end_event, hw_queue)),
]
for t in threads:
t.start()
try:
while True:
time.sleep(1)
if not all(t.is_alive() for t in threads):
break
finally:
end_event.set()
for t in threads:
t.join()
if __name__ == "__main__":
main()