在Driver中以PMU測試RPi 4B效能
在Driver中以PMU測試RPi 4B效能
本文將提供一個進行PMU(Performance Monitor Unit)測試的driver範例程式,以執行矩陣乘法時的CPU cycle count為例。
有關driver編譯,請參考這篇。
所需的函式庫:
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/smp.h>
需要用到的Registers
PMUSERENR_EL0
全稱:Performance Monitors USER ENable Register[1]
用途:啟用user read權限。
#define ARMV8_PMUSERENR_EN (1 << 0) // EN, bit [0] -> 0b1 [Traps access enable]
#define ARMV8_PMUSERENR_CR (1 << 2) // CR, bit [2] -> 0b1 [Cycle counter read access enable]
#define ARMV8_PMUSERENR_ER (1 << 3) // ER, bit [3] -> 0b1 [Event counter read access enable]
PMCR_EL0
全稱:Performance Monitors Control Register[2]
用途:控制PMU,此處用到開啟及重置event counters。
*MASK為寫入時使用。
#define ARMV8_PMCR_MASK 0x3f
#define ARMV8_PMCR_E (1 << 0) // E, bit [0] -> 0b1 [Enable all event counters, including PMCCNTR_EL0]
#define ARMV8_PMCR_P (1 << 1) // P, bit [1] -> 0b1 [Reset all event counters except PMCCNTR_EL0]
#define ARMV8_PMCR_C (1 << 2) // C, bit [2] -> 0b1 [Reset PMCCNTR_EL0 counter]
PMCNTENSET_EL0
全稱:Performance Monitors CouNT ENable SET register[3]
用途:啟用(1)/停用(0) cycle counters。
#define ARMV8_PMCNTENSET_DISABLE (0 << 31) // C, bit[31] -> 0b0 [cycle counter disabled]
#define ARMV8_PMCNTENSET_ENABLE (1 << 31) // C, bit[31] -> 0b1 [cycle counter enabled]
PMINTENSET_EL1
全稱:Performance Monitors INTerrupt ENable SET register[4]
用途:啟用(1)/停用(0) cycle count的溢位中斷請求。
#define ARMV8_PMINTENSET_DISABLE (0 << 31) // C, bit[31] -> 0b0 [cycle counter overflow interrupt disabled]
PMCCNTR_EL0
全稱:Performance Monitors Cycle CouNT Register[5]
用途:讀取CPU cycle count。
*稍後的步驟將有讀取範例。
啟動PMU counter並取得讀取權限
static void pmu_pmcr_write(u32 value)
{
value &= ARMV8_PMCR_MASK;
asm volatile("isb" : : : "memory");
asm volatile("MSR PMCR_EL0, %0" : : "r"((u64)value));
}
static void enable_cpu_counter(void)
{
// Init & reset PMU control
pmu_pmcr_write(ARMV8_PMCR_P | ARMV8_PMCR_C);
// Disable PMU cycle counter overflow interrupt
asm volatile("MSR PMINTENSET_EL1, %0" : : "r"((u64)ARMV8_PMINTENSET_DISABLE));
// Enable PMU cycle counter
asm volatile("MSR PMCNTENSET_EL0, %0" : : "r"((u64)ARMV8_PMCNTENSET_ENABLE));
// Enable PMU control
pmu_pmcr_write(ARMV8_PMCR_E);
// Enable PMU user read access
asm volatile("MSR PMUSERENR_EL0, %0" : : "r"((u64)ARMV8_PMUSERENR_EN | ARMV8_PMUSERENR_ER | ARMV8_PMUSERENR_CR));
printk(KERN_INFO "PMU access enabled.");
}
讀取CPU cycle(PMCCNTR_EL0)
使用PMCCNTR_EL0讀取目前的CPU cycle count。
static u32 pmu_pmccntr_read(void)
{
u32 value;
// Read the cycle counter
asm volatile("MRS %0, PMCCNTR_EL0" : "=r"(value));
return value;
}
撰寫測試用work function
這裡以3x3矩陣乘法為例,數字將隨機初始化。
可以替換為任何你想要測試的function。
#define SIZE 3
static void matrix_mul(void)
{
u8 rand_num;
u8 a[SIZE][SIZE], b[SIZE][SIZE], result[SIZE][SIZE];
u8 i, j, k;
// random init matrix A
for (i = 0; i < SIZE; i++)
{
for (j = 0; j < SIZE; j++)
{
get_random_bytes(&rand_num, sizeof(rand_num));
a[i][j] = rand_num % 10; // 0~9
}
}
// random init matrix B
for (i = 0; i < SIZE; i++)
{
for (j = 0; j < SIZE; j++)
{
get_random_bytes(&rand_num, sizeof(rand_num));
b[i][j] = rand_num % 10; // 0~9
}
}
// Init result(all set to 0)
for (i = 0; i < SIZE; i++)
{
for (j = 0; j < SIZE; j++)
{
result[i][j] = 0;
}
}
// Matrix Multiplication
for (i = 0; i < SIZE; i++)
{
for (j = 0; j < SIZE; j++)
{
for (k = 0; k < SIZE; k++)
{
result[i][j] += a[i][k] * b[k][j];
}
}
}
}
將work function所耗的CPU cycle印出
static void cpu_cycle_test_pmu(void)
{
u32 cycles_before, cycles_after;
// Save current cpu cycle
cycles_before = pmu_pmccntr_read();
// Simulate some work
matrix_mul();
// Save current cpu cycle(after run function)
cycles_after = pmu_pmccntr_read();
printk(KERN_INFO "PMU Test - CPU cycle count: %u\n", cycles_after - cycles_before);
}
關閉PMU
static void disable_cpu_counter(void)
{
// Disable PMU cycle counter
asm volatile("MSR PMCNTENSET_EL0, %0" : : "r"((u64)ARMV8_PMCNTENSET_DISABLE));
// Disable PMU control
pmu_pmcr_write(~ARMV8_PMCR_E); // '~'=not
// Disable PMU user read access
asm volatile("MSR PMUSERENR_EL0, %0" : : "r"((u64)0)); // all set 0
printk(KERN_INFO "PMU access disabled.");
}
Driver init & exit function
static int __init init_pmu(void)
{
enable_cpu_counter();
cpu_cycle_test_pmu();
return 0;
}
static void __exit exit_pmu(void)
{
disable_cpu_counter();
}
module_init(init_pmu);
module_exit(exit_pmu);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Enables PMU CPU cycle counter and test matrix multiplication function");
MODULE_VERSION("1:0.0");
MODULE_AUTHOR("MAPLELEAF3659");
測試
可反覆insmod、rmmod來確認每次的CPU cycle是否不同。
[ 31.624275] PMU access enabled.
[ 31.624313] PMU Test - Cycle count: 17440
[ 45.484819] PMU access disabled.
[ 66.832694] PMU access enabled.
[ 66.832742] PMU Test - Cycle count: 16462
完整程式碼
GitHub: https://github.com/MAPLELEAF3659/armv8-pmu-cpu-cycle-test
參考
PMUSERENR_EL0, Performance Monitors User Enable Register. (March, 2021). Arm Developer. Retrieved November 15, 2024, from https://developer.arm.com/documentation/ddi0595/2021-03/AArch64-Registers/PMUSERENR-EL0--Performance-Monitors-User-Enable-Register ↩︎
PMCR_EL0, Performance Monitors Control Register. (March, 2021). Arm Developer. Retrieved November 15, 2024, from https://developer.arm.com/documentation/ddi0595/2021-03/AArch64-Registers/PMCR-EL0--Performance-Monitors-Control-Register ↩︎
PMCNTENSET_EL0, Performance Monitors Count Enable Set register. (March, 2021). Arm Developer. Retrieved November 15, 2024, from https://developer.arm.com/documentation/ddi0595/2021-03/AArch64-Registers/PMCNTENSET-EL0--Performance-Monitors-Count-Enable-Set-register ↩︎
PMINTENSET_EL1, Performance Monitors Interrupt Enable Set register. (March, 2021). Arm Developer. Retrieved November 15, 2024, from https://developer.arm.com/documentation/ddi0595/2021-03/AArch64-Registers/PMINTENSET-EL1--Performance-Monitors-Interrupt-Enable-Set-register ↩︎
PMCCNTR_EL0, Performance Monitors Cycle Count Register. (March, 2021). Arm Developer. Retrieved November 15, 2024, from https://developer.arm.com/documentation/ddi0595/2021-03/AArch64-Registers/PMCCNTR-EL0--Performance-Monitors-Cycle-Count-Register ↩︎
- 0
- 0
- 0
- 0
- 0
- 0
Preview: