397 lines
10 KiB
Plaintext
397 lines
10 KiB
Plaintext
# Copyright 2012 The Chromium OS Authors. All rights reserved.
|
|
# Use of this source code is governed by a BSD-style license that can be
|
|
# found in the LICENSE file.
|
|
|
|
description "Temporary, quick-hack metrics collection & thermal daemon"
|
|
author "chromium-os-dev@chromium.org"
|
|
|
|
# This is for quickly adding UMA stats that we may need for
|
|
# short-term experiments, when we don't have the time to add
|
|
# stuff to metrics_daemon. That's where it should go in the
|
|
# long term.
|
|
#
|
|
# This is also currently doing a userland thermal loop to allow
|
|
# for quick experimentation. This thermal loop will eventually
|
|
# move to the BIOS once the data from experiments help prove its
|
|
# efficacy.
|
|
|
|
start on started system-services
|
|
stop on stopping system-services
|
|
respawn
|
|
|
|
script
|
|
TEMP_OFFSET=273 # difference between K (reported by EC) and C (used in UMA)
|
|
|
|
# Thermal loop fields
|
|
CPU_MAX_FREQ_FIELD=1
|
|
CPU_MIN_FREQ_FIELD=2
|
|
GPU_MAX_FREQ_FIELD=3
|
|
CPU_DUTY_CYCLE_FIELD=4
|
|
PKG_POWER_LIMIT_FIELD=5
|
|
|
|
# Thermal loop steps
|
|
all_steps="
|
|
1801000 800000 1150 0 0x180aa00dd8088 # no throttling
|
|
1801000 800000 1150 0 0x180aa00dd8080 # cap pkg to 16W
|
|
1801000 800000 1150 0 0x180aa00dd8078 # cap pkg to 15W
|
|
1801000 800000 1150 0 0x180aa00dd8070 # cap pkg to 14W
|
|
1801000 800000 1150 0 0x180aa00dd8068 # cap pkg to 13W
|
|
1800000 800000 900 0 0x180aa00dd8068 # disable turbo
|
|
1600000 800000 800 0 0x180aa00dd8068 # cap CPU & GPU frequency
|
|
1400000 800000 700 0 0x180aa00dd8068 # cap CPU & GPU frequency
|
|
1200000 800000 600 0 0x180aa00dd8068 # cap CPU & GPU frequency
|
|
1000000 800000 500 0 0x180aa00dd8068 # cap CPU & GPU frequency
|
|
800000 800000 400 0 0x180aa00dd8068 # cap CPU & GPU frequency
|
|
800000 800000 350 0 0x180aa00dd8068 # cap CPU & GPU frequency
|
|
800000 800000 350 0x1c 0x180aa00dd8068 # duty cycle CPU
|
|
800000 800000 350 0x18 0x180aa00dd8068 # duty cycle CPU
|
|
"
|
|
max_steps=$(($(echo "$all_steps" | wc -l) - 3))
|
|
|
|
get_step() {
|
|
row=$(($1 + 2))
|
|
out=$(echo "$all_steps" | awk "{if (NR==$row) print}")
|
|
echo "$out"
|
|
}
|
|
|
|
get_field() {
|
|
out=$(echo "$2" | awk "{print \$$1}")
|
|
echo $out
|
|
}
|
|
|
|
get_peci_temp() {
|
|
tempk=$(ectool temps 9 | sed 's/[^0-9]//g')
|
|
tempc=$((tempk - $TEMP_OFFSET))
|
|
echo $tempc
|
|
}
|
|
|
|
get_sensor_temp() {
|
|
s=$1
|
|
tempc=0
|
|
if out=$(ectool temps $s); then
|
|
tempk=$(echo $out | sed 's/[^0-9]//g')
|
|
tempc=$((tempk - $TEMP_OFFSET))
|
|
fi
|
|
echo $tempc
|
|
}
|
|
|
|
get_sensor_list() {
|
|
# USB C-Object: 1 or 13
|
|
# PCH D-Object: 3
|
|
# Hinge C-Object: 5 or 15
|
|
# Charger D-Object: 7
|
|
if ectool tempsinfo 1 | grep -q "USB C-Object"; then
|
|
usb_c_object=1
|
|
else
|
|
usb_c_object=13
|
|
fi
|
|
charger_d_object=7
|
|
echo $usb_c_object $charger_d_object
|
|
}
|
|
|
|
set_calibration_data() {
|
|
B0='-2.94e-5'
|
|
B1='-5.7e-7'
|
|
B2='4.63e-9'
|
|
|
|
USB_C_S0='2.712e-14'
|
|
PCH_D_S0='9.301e-14'
|
|
HINGE_C_S0='-11.000e-14'
|
|
CHARGER_D_S0='5.141e-14'
|
|
|
|
# Note that the sensor numbering is different between the ectool tmp006
|
|
# and temps/tempsinfo commands.
|
|
USB_C="0 $USB_C_S0 $B0 $B1 $B2"
|
|
PCH_D="1 $PCH_D_S0 $B0 $B1 $B2"
|
|
HINGE_C="2 $HINGE_C_S0 $B0 $B1 $B2"
|
|
CHARGER_D="3 $CHARGER_D_S0 $B0 $B1 $B2"
|
|
|
|
for i in "$USB_C" "$PCH_D" "$HINGE_C" "$CHARGER_D"; do
|
|
# Add "--" otherwise ectool will barf when trying to parse negative
|
|
# coefficients.
|
|
ectool tmp006cal -- $i
|
|
done
|
|
}
|
|
|
|
max_skin_temp=0
|
|
sensor_temperatures=
|
|
|
|
get_max_skin_temp() {
|
|
sensor_temperatures=
|
|
max_skin_temp=0
|
|
for i in $*; do
|
|
t=$(get_sensor_temp $i)
|
|
sensor_temperatures=$sensor_temperatures$i:$t:
|
|
if [ $t -gt $max_skin_temp ]; then
|
|
max_skin_temp=$t
|
|
fi
|
|
done
|
|
|
|
# Record the PECI CPU temperature also.
|
|
i=9
|
|
t=$(get_sensor_temp $i)
|
|
sensor_temperatures=$sensor_temperatures$i:$t:
|
|
}
|
|
|
|
set_cpu_freq() {
|
|
max_freq=$1
|
|
min_freq=$2
|
|
for cpu in /sys/devices/system/cpu/cpu?/cpufreq; do
|
|
echo 800000 > $cpu/scaling_min_freq
|
|
echo 800000 > $cpu/scaling_max_freq
|
|
echo $max_freq > $cpu/scaling_max_freq
|
|
echo $min_freq > $cpu/scaling_min_freq
|
|
done
|
|
}
|
|
|
|
set_gpu_min_freq() {
|
|
GPU_MIN_FREQ=450
|
|
echo $GPU_MIN_FREQ > /sys/kernel/debug/dri/0/i915_min_freq
|
|
}
|
|
|
|
set_gpu_max_freq() {
|
|
gpu_max_freq=$1
|
|
if [ $GPU_MIN_FREQ -gt $gpu_max_freq ]; then
|
|
gpu_max_freq=$GPU_MIN_FREQ
|
|
fi
|
|
echo $gpu_max_freq > /sys/kernel/debug/dri/0/i915_max_freq
|
|
}
|
|
|
|
set_duty_cycle() {
|
|
duty_cycle=$1
|
|
for i in 0 1 2 3; do
|
|
iotools wrmsr $i 0x19a $duty_cycle
|
|
done
|
|
}
|
|
|
|
set_pkg_power_limit() {
|
|
pwr_limit=$1
|
|
iotools wrmsr 0 0x610 $pwr_limit
|
|
}
|
|
|
|
log_message() {
|
|
logger -t temp_metrics "$*"
|
|
}
|
|
|
|
TEMP_THRESHOLD_1=38
|
|
TEMP_THRESHOLD_1_WM=40
|
|
TEMP_THRESHOLD_2=45
|
|
TEMP_THRESHOLD_2_WM=47
|
|
TEMP_THRESHOLD_3=50
|
|
TEMP_THRESHOLD_3_WM=50
|
|
|
|
TEMP_THRESHOLD_0_MIN_STEP=0
|
|
TEMP_THRESHOLD_0_MAX_STEP=0
|
|
TEMP_THRESHOLD_1_MIN_STEP=1
|
|
TEMP_THRESHOLD_1_MAX_STEP=5
|
|
TEMP_THRESHOLD_2_MIN_STEP=6
|
|
TEMP_THRESHOLD_2_MAX_STEP=9
|
|
TEMP_THRESHOLD_3_MIN_STEP=10
|
|
TEMP_THRESHOLD_3_MAX_STEP=13
|
|
|
|
current_step=1
|
|
new_step=0
|
|
|
|
thermal_loop() {
|
|
# Hack to reset turbo activation threshold since BIOS can change it
|
|
# underneath us.
|
|
iotools wrmsr 0 0x64c 0x12
|
|
|
|
skin_temp=$1
|
|
if [ $skin_temp -gt $TEMP_THRESHOLD_3 ]; then
|
|
temp_watermark=$TEMP_THRESHOLD_3_WM
|
|
min_step=$TEMP_THRESHOLD_3_MIN_STEP
|
|
max_step=$TEMP_THRESHOLD_3_MAX_STEP
|
|
elif [ $skin_temp -gt $TEMP_THRESHOLD_2 ]; then
|
|
temp_watermark=$TEMP_THRESHOLD_2_WM
|
|
min_step=$TEMP_THRESHOLD_2_MIN_STEP
|
|
max_step=$TEMP_THRESHOLD_2_MAX_STEP
|
|
elif [ $skin_temp -gt $TEMP_THRESHOLD_1 ]; then
|
|
temp_watermark=$TEMP_THRESHOLD_1_WM
|
|
min_step=$TEMP_THRESHOLD_1_MIN_STEP
|
|
max_step=$TEMP_THRESHOLD_1_MAX_STEP
|
|
else
|
|
temp_watermark=0
|
|
min_step=$TEMP_THRESHOLD_0_MIN_STEP
|
|
max_step=$TEMP_THRESHOLD_0_MAX_STEP
|
|
fi
|
|
|
|
if [ $skin_temp -gt $temp_watermark ]; then
|
|
if [ $current_step -ne $max_step ]; then
|
|
new_step=$(($current_step + 1))
|
|
fi
|
|
elif [ $skin_temp -lt $temp_watermark ]; then
|
|
if [ $current_step -gt $min_step ]; then
|
|
new_step=$(($current_step - 1))
|
|
fi
|
|
else
|
|
new_step=$current_step
|
|
fi
|
|
|
|
if [ $new_step -gt $max_step ]; then
|
|
new_step=$max_step
|
|
elif [ $new_step -lt $min_step ]; then
|
|
new_step=$min_step
|
|
fi
|
|
|
|
if [ $new_step -eq $current_step ]; then
|
|
return
|
|
fi
|
|
|
|
current_step=$new_step
|
|
step=$(get_step $new_step)
|
|
|
|
log_message "Throttling (temps: $sensor_temperatures):" $step
|
|
|
|
cpu_max_freq=$(get_field $CPU_MAX_FREQ_FIELD "$step")
|
|
cpu_min_freq=$(get_field $CPU_MIN_FREQ_FIELD "$step")
|
|
gpu_max_freq=$(get_field $GPU_MAX_FREQ_FIELD "$step")
|
|
cpu_duty_cycle=$(get_field $CPU_DUTY_CYCLE_FIELD "$step")
|
|
pkg_power_limit=$(get_field $PKG_POWER_LIMIT_FIELD "$step")
|
|
|
|
set_cpu_freq $cpu_max_freq $cpu_min_freq
|
|
set_gpu_max_freq $gpu_max_freq
|
|
set_duty_cycle $cpu_duty_cycle
|
|
set_pkg_power_limit $pkg_power_limit
|
|
}
|
|
|
|
get_fan_rpm() {
|
|
echo $(ectool pwmgetfanrpm | sed 's/[^0-9]//g')
|
|
}
|
|
|
|
set_fan_rpm() {
|
|
ectool pwmsetfanrpm $1
|
|
}
|
|
|
|
reset_fan_thresholds() {
|
|
temp_low1=105
|
|
temp_low2=105
|
|
temp_low3=105
|
|
temp_low4=105
|
|
temp_low5=105
|
|
temp_low6=105
|
|
}
|
|
|
|
last_rpm=10
|
|
temp_low1=105
|
|
temp_low2=105
|
|
temp_low3=105
|
|
temp_low4=105
|
|
temp_low5=105
|
|
temp_low6=105
|
|
|
|
fan_loop() {
|
|
skin_temp=$1
|
|
|
|
if [ $skin_temp -gt 48 ] || [ $skin_temp -gt $temp_low1 ]; then
|
|
rpm=9300
|
|
reset_fan_thresholds
|
|
temp_low1=46
|
|
elif [ $skin_temp -gt 44 ] || [ $skin_temp -gt $temp_low2 ]; then
|
|
rpm=8000
|
|
reset_fan_thresholds
|
|
temp_low2=43
|
|
elif [ $skin_temp -gt 42 ] || [ $skin_temp -gt $temp_low3 ]; then
|
|
rpm=7000
|
|
reset_fan_thresholds
|
|
temp_low3=41
|
|
elif [ $skin_temp -gt 40 ] || [ $skin_temp -gt $temp_low4 ]; then
|
|
rpm=5500
|
|
reset_fan_thresholds
|
|
temp_low4=39
|
|
elif [ $skin_temp -gt 38 ] || [ $skin_temp -gt $temp_low5 ]; then
|
|
rpm=4000
|
|
reset_fan_thresholds
|
|
temp_low5=34
|
|
elif [ $skin_temp -gt 33 ] || [ $skin_temp -gt $temp_low6 ]; then
|
|
rpm=3000
|
|
reset_fan_thresholds
|
|
temp_low6=30
|
|
else
|
|
rpm=0
|
|
reset_fan_thresholds
|
|
fi
|
|
|
|
# During S0->S3->S0 transitions, the EC sets the fan RPM to 0. This script
|
|
# isn't aware of such transitions. Read the current fan RPM again to see
|
|
# if it got set to 0. Note that comparing the current fan RPM against last
|
|
# requested RPM won't suffice since the actual fan RPM may not be exactly
|
|
# what was requested.
|
|
cur_rpm=$(get_fan_rpm)
|
|
if ([ $cur_rpm -ne 0 ] && [ $last_rpm -eq $rpm ]) || \
|
|
([ $cur_rpm -eq 0 ] && [ $rpm -eq 0 ]); then
|
|
last_rpm=$rpm
|
|
return
|
|
fi
|
|
|
|
log_message "Setting fan RPM (temps: $sensor_temperatures): $last_rpm -> $rpm"
|
|
|
|
last_rpm=$rpm
|
|
set_fan_rpm $rpm
|
|
}
|
|
|
|
# Thermal zone 1 is for operating systems where a userland thermal loop
|
|
# doesn't exist. Disable it.
|
|
if [ -e /sys/class/thermal/thermal_zone1/mode ]; then
|
|
echo -n 'disabled' > /sys/class/thermal/thermal_zone1/mode
|
|
fi
|
|
|
|
# Enable the fan in case no other code has enabled it.
|
|
ectool fanduty 0
|
|
|
|
# Get list of sensors to monitor.
|
|
sensor_list=$(get_sensor_list)
|
|
|
|
# Set sensor calibration data.
|
|
set_calibration_data
|
|
|
|
# Set minimum GPU frequency.
|
|
set_gpu_min_freq
|
|
|
|
loop_count=0
|
|
ec_fan_loop=0
|
|
|
|
while true; do
|
|
sleep 10
|
|
loop_count=$(($loop_count + 1))
|
|
|
|
# Read the max skin temperature.
|
|
get_max_skin_temp $sensor_list
|
|
|
|
if [ $max_skin_temp -eq 0 ]; then
|
|
if [ $ec_fan_loop -eq 0 ]; then
|
|
log_message "Invalid max skin temp. Switching to EC fan loop."
|
|
ectool autofanctrl
|
|
ec_fan_loop=1
|
|
last_rpm=10
|
|
fi
|
|
else
|
|
# Run the fan loop.
|
|
fan_loop $max_skin_temp
|
|
ec_fan_loop=0
|
|
|
|
# Run the thermal loop.
|
|
thermal_loop $max_skin_temp
|
|
fi
|
|
|
|
# Report the metrics once every 30 seconds.
|
|
if [ $loop_count -lt 3 ]; then
|
|
continue
|
|
fi
|
|
loop_count=0
|
|
|
|
ectool temps all | while read line; do
|
|
index=$(printf "%02d" "${line%%:*}")
|
|
tempk="${line##* }"
|
|
tempc=$(($tempk - $TEMP_OFFSET))
|
|
# ignore values below freezing
|
|
if [ $tempc -lt 0 ]; then
|
|
tempc=0
|
|
fi
|
|
# Use a linear histogram with 1 C buckets starting at 0.
|
|
N_SLOTS=180
|
|
metrics_client -e Platform.Temperature.Sensor$index $tempc $N_SLOTS
|
|
done
|
|
done
|
|
end script
|