Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add power_envelope & soc_power sensors #116

Merged
merged 1 commit into from
Jan 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions lanserv/mellanox-bf/mlx-bf-base.emu
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,16 @@ sensor_add 0x30 0 9 0x01 0x01 \
poll 5000 \
file "/run/emu_param/ddr_temp"

#Add the soc_power sensor
sensor_add 0x30 0 0x0a 0x02 0x01 \
poll 5000 \
file "/run/emu_param/soc_power"

#Add the power_envelope sensor
sensor_add 0x30 0 0xb 0x02 0x01 \
poll 5000 \
file "/run/emu_param/power_envelope"

# Add the RTC battery voltage sensor
sensor_add 0x30 0 0xc 0x02 0x01 \
poll 5000 \
Expand Down
136 changes: 136 additions & 0 deletions lanserv/mellanox-bf/mlx-bf.sdrs
Original file line number Diff line number Diff line change
Expand Up @@ -615,6 +615,142 @@ sdr type 1
id_string "ddr_temp"
endsdr

#soc_power
sdr type 1
sensor_owner_id 0x30
sensor_owner_lun 0
channel_number 0
sensor_number 0x0a
entity_id system_board
entity_instance 2
sensor_type Voltage
event_reading_type_code 1
init_scanning true
init_events true
init_thresholds true
init_hysteresis false
init_sensor_type true
default_event_gen_on true
default_sensor_scan_on true
sensor_auto_rearm true
sensor_event_msg_ctrl per_state
sensor_threshold_access settable
return_lnc false
return_lc true
return_unc false
return_uc false
deassert_lncgl false
deassert_uncgh false
deassert_lcgl true
deassert_ucgh false
assert_lncgl false
assert_uncgh false
assert_lcgl true
assert_ucgh false
lnc_thrsh_settable false
unc_thrsh_settable false
lc_thrsh_settable true
uc_thrsh_settable false
lnc_thrsh_readable false
unc_thrsh_readable false
lc_thrsh_readable true
uc_thrsh_readable false
analog_data_format unsigned
rate_unit none
modifier_unit none
percentage false
base_unit Watts
modifier_unit_code unspecified
linearization linear
nominal_specified false
nominal_reading 0
m 1
tolerance 0
b 0
accuracy 1
accuracy_exp 0
sensor_direction input
r_exp 0
b_exp 0
sensor_maximum 255
sensor_minimum 0
lc_fthresh 5
lnc_fthresh 0
unc_fthresh 0
uc_fthresh 0
positive_hysteresis 0
negative_hysteresis 0
id_string "soc_power"
endsdr

#power_envelope
sdr type 1
sensor_owner_id 0x30
sensor_owner_lun 0
channel_number 0
sensor_number 0xb
entity_id system_board
entity_instance 2
sensor_type Voltage
event_reading_type_code 1
init_scanning true
init_events true
init_thresholds true
init_hysteresis false
init_sensor_type true
default_event_gen_on true
default_sensor_scan_on true
sensor_auto_rearm true
sensor_event_msg_ctrl per_state
sensor_threshold_access settable
return_lnc true
return_lc false
return_unc true
return_uc false
deassert_lncgl true
deassert_uncgh true
deassert_lcgl false
deassert_ucgh false
assert_lncgl true
assert_uncgh true
assert_lcgl false
assert_ucgh false
lnc_thrsh_settable true
unc_thrsh_settable true
lc_thrsh_settable false
uc_thrsh_settable false
lnc_thrsh_readable true
unc_thrsh_readable true
lc_thrsh_readable false
uc_thrsh_readable false
analog_data_format unsigned
rate_unit none
modifier_unit none
percentage false
base_unit Watts
modifier_unit_code unspecified
linearization linear
nominal_specified false
nominal_reading 0
m 1
tolerance 0
b 0
accuracy 1
accuracy_exp 0
sensor_direction input
r_exp 0
b_exp 0
sensor_maximum 255
sensor_minimum 0
lc_fthresh 0
lnc_fthresh 10
unc_fthresh 150
uc_fthresh 0
positive_hysteresis 0
negative_hysteresis 0
id_string "power_envelope"
endsdr

sdr type 1
sensor_owner_id 0x30
sensor_owner_lun 0
Expand Down
2 changes: 2 additions & 0 deletions lanserv/mellanox-bf/sdr.30.main
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
last_add_time:i:1605050738

35:d:\23\00\51\01\36\30\00\0c\07\02\77\48\02\01\04\20\04\00\02\02\00\04\00\00\01\00\00\01\02\f0\00\00\00\00\ff\00\00\00\00\00\17\00\00\00\00\00\00\cbrtc_voltage
34:d:\22\00\51\01\39\30\00\0b\07\02\77\48\02\01\c3\10\c3\10\09\09\00\06\00\00\01\00\00\05\01\00\00\00\00\00\ff\00\00\00\96\00\00\0a\00\00\00\00\00\cepower_envelope
33:d:\21\00\51\01\34\30\00\0a\07\02\77\48\02\01\0c\20\0c\00\02\02\00\06\00\00\01\00\00\05\01\00\00\00\00\00\ff\00\00\00\00\00\05\00\00\00\00\00\00\c9soc_power
32:d:\20\00\51\01\39\30\00\09\07\02\7f\48\01\01\85\32\85\32\1b\1b\00\01\00\00\01\00\00\05\01\00\01\50\00\00\ff\00\00\69\5f\00\00\05\00\00\00\00\00\c8ddr_temp
31:d:\1f\00Q\11\190\14\80\00\00\10\00\01\03\00\cedmidecode_info
30:d:\1e\00Q\11\170\13\80\00\00\10\00\01\03\00\ccproduct_name
Expand Down
66 changes: 66 additions & 0 deletions lanserv/mellanox-bf/set_emu_param.sh
Original file line number Diff line number Diff line change
Expand Up @@ -712,6 +712,72 @@ else
remove_sensor "rtc_voltage"
fi

###################################
# load mlxbf-ptm module #
###################################
#Check for Linux Lockdown
#If the system is in Linux lockdown, the file
#"/sys/kernel/security/lockdown" will contain either:
#"none integrity [confidentiality]"
#"none [integrity] confidentiality"

LOCKDOWN_STATUS_PATH="/sys/kernel/security/lockdown"
LOCKDOWN_STATUS="unlock"
if [ -f "$LOCKDOWN_STATUS_PATH" ]; then
lockdown_status_string=$(cat "$LOCKDOWN_STATUS_PATH")
if [[ "$lockdown_status_string" == *"[integrity]"* || "$lockdown_status_string" == *"[confidentiality]"* ]]; then
echo "Error: Linux lockdown: system can't load mlxbf_ptm module; soc_power and power_envelope sensors unavailable."
LOCKDOWN_STATUS="lockdown"
fi
fi
# Check if mlxbf_ptm module is loaded
if ! lsmod | grep -q mlxbf_ptm && [ "$LOCKDOWN_STATUS" = "unlock" ]; then
echo "mlxbf_ptm module not loaded, loading now."
modprobe mlxbf_ptm
fi

###################################
# Get SOC power info #
###################################
SOC_POWER_PATH="/sys/kernel/debug/mlxbf-ptm/monitors/status/total_power"
if [ ! -f "$SOC_POWER_PATH" ]; then
echo "Error: soc_power file not found try to load the driver with: modprobe mlxbf-ptm"
remove_sensor "soc_power"
else
soc_power=$(cat "$SOC_POWER_PATH")
#check of soc_power is decimal number.
if ! [[ "$soc_power" =~ ^([0-9]+(\.[0-9]+)?|0)$ ]]; then
echo "Error: soc_power is not a valid number"
remove_sensor "soc_power"
else
# Remove all the number after the decimal point – it can cause issues in the ipmb
soc_power=$((${soc_power%.*}))
# echo the soc_power value in to /run/emu_param/soc_power
echo "$soc_power" > "${EMU_PARAM_DIR}/soc_power"
fi
fi

###################################
# Get power envelope info #
###################################
POWER_ENVELOPE_PATH="/sys/kernel/debug/mlxbf-ptm/monitors/status/power_envelope"
if [ ! -f "$POWER_ENVELOPE_PATH" ]; then
echo "Error: power_envelope file not found"
remove_sensor "power_envelope"
else
power_envelope=$(cat "$POWER_ENVELOPE_PATH")
#check of power_envelope is decimal number.
if ! [[ "$power_envelope" =~ ^-?[0-9]+(\.[0-9]+)?$ ]]; then
echo "Error: power_envelope is not a valid number"
remove_sensor "power_envelope"
else
# Remove all the number after the decimal point – it can cause issues in the ipmb
power_envelope=$((${power_envelope%.*}))
# echo the power_envelope value in to /run/emu_param/power_envelope
echo "$power_envelope" > "${EMU_PARAM_DIR}/power_envelope"
fi
fi

###################################
# Get FW info #
###################################
Expand Down