f-stack/dpdk/examples/vm_power_manager/oob_monitor_x86.c

300 lines
6.6 KiB
C
Raw Normal View History

2019-06-25 11:12:58 +00:00
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2018 Intel Corporation
*/
#include <unistd.h>
#include <fcntl.h>
#include <rte_log.h>
#include "oob_monitor.h"
#include "power_manager.h"
#include "channel_manager.h"
static volatile unsigned run_loop = 1;
static uint64_t g_branches, g_branch_misses;
static int g_active;
void branch_monitor_exit(void)
{
run_loop = 0;
}
/* Number of microseconds between each poll */
#define INTERVAL 100
#define PRINT_LOOP_COUNT (1000000/INTERVAL)
#define IA32_PERFEVTSEL0 0x186
#define IA32_PERFEVTSEL1 0x187
#define IA32_PERFCTR0 0xc1
#define IA32_PERFCTR1 0xc2
#define IA32_PERFEVT_BRANCH_HITS 0x05300c4
#define IA32_PERFEVT_BRANCH_MISS 0x05300c5
static float
apply_policy(int core)
{
struct core_info *ci;
2019-06-26 10:17:41 +00:00
uint64_t counter = 0;
2019-06-25 11:12:58 +00:00
uint64_t branches, branch_misses;
2019-06-26 10:17:41 +00:00
uint64_t last_branches, last_branch_misses;
int64_t hits_diff, miss_diff;
2019-06-25 11:12:58 +00:00
float ratio;
int ret;
2020-06-18 16:55:50 +00:00
int freq_window_idx, up_count = 0, i;
2019-06-25 11:12:58 +00:00
g_active = 0;
ci = get_core_info();
last_branches = ci->cd[core].last_branches;
last_branch_misses = ci->cd[core].last_branch_misses;
ret = pread(ci->cd[core].msr_fd, &counter,
sizeof(counter), IA32_PERFCTR0);
if (ret < 0)
RTE_LOG(ERR, POWER_MANAGER,
"unable to read counter for core %u\n",
core);
branches = counter;
2019-06-26 10:17:41 +00:00
counter = 0;
2019-06-25 11:12:58 +00:00
ret = pread(ci->cd[core].msr_fd, &counter,
sizeof(counter), IA32_PERFCTR1);
if (ret < 0)
RTE_LOG(ERR, POWER_MANAGER,
"unable to read counter for core %u\n",
core);
branch_misses = counter;
ci->cd[core].last_branches = branches;
ci->cd[core].last_branch_misses = branch_misses;
2019-06-26 10:17:41 +00:00
/*
* Intentional right shift to make MSB 0 to avoid
* possible signed overflow or truncation.
*/
branches >>= 1;
last_branches >>= 1;
hits_diff = (int64_t)branches - (int64_t)last_branches;
2019-06-25 11:12:58 +00:00
if (hits_diff <= 0) {
/* Likely a counter overflow condition, skip this round */
return -1.0;
}
2019-06-26 10:17:41 +00:00
/*
* Intentional right shift to make MSB 0 to avoid
* possible signed overflow or truncation.
*/
branch_misses >>= 1;
last_branch_misses >>= 1;
miss_diff = (int64_t)branch_misses - (int64_t)last_branch_misses;
2019-06-25 11:12:58 +00:00
if (miss_diff <= 0) {
/* Likely a counter overflow condition, skip this round */
return -1.0;
}
g_branches = hits_diff;
g_branch_misses = miss_diff;
if (hits_diff < (INTERVAL*100)) {
/* Likely no workload running on this core. Skip. */
return -1.0;
}
ratio = (float)miss_diff * (float)100 / (float)hits_diff;
2020-06-18 16:55:50 +00:00
/*
* Store the last few directions that the ratio indicates
* we should take. If there's on 'up', then we scale up
* quickly. If all indicate 'down', only then do we scale
* down. Each core_details struct has it's own array.
*/
freq_window_idx = ci->cd[core].freq_window_idx;
if (ratio > ci->branch_ratio_threshold)
ci->cd[core].freq_directions[freq_window_idx] = 1;
2019-06-25 11:12:58 +00:00
else
2020-06-18 16:55:50 +00:00
ci->cd[core].freq_directions[freq_window_idx] = 0;
freq_window_idx++;
freq_window_idx = freq_window_idx & (FREQ_WINDOW_SIZE-1);
ci->cd[core].freq_window_idx = freq_window_idx;
up_count = 0;
for (i = 0; i < FREQ_WINDOW_SIZE; i++)
up_count += ci->cd[core].freq_directions[i];
if (up_count == 0) {
if (ci->cd[core].freq_state != FREQ_MIN) {
power_manager_scale_core_min(core);
ci->cd[core].freq_state = FREQ_MIN;
}
} else {
if (ci->cd[core].freq_state != FREQ_MAX) {
power_manager_scale_core_max(core);
ci->cd[core].freq_state = FREQ_MAX;
}
}
2019-06-25 11:12:58 +00:00
g_active = 1;
return ratio;
}
int
add_core_to_monitor(int core)
{
struct core_info *ci;
char proc_file[UNIX_PATH_MAX];
int ret;
ci = get_core_info();
if (core < ci->core_count) {
long setup;
snprintf(proc_file, UNIX_PATH_MAX, "/dev/cpu/%d/msr", core);
ci->cd[core].msr_fd = open(proc_file, O_RDWR | O_SYNC);
if (ci->cd[core].msr_fd < 0) {
RTE_LOG(ERR, POWER_MANAGER,
"Error opening MSR file for core %d "
"(is msr kernel module loaded?)\n",
core);
return -1;
}
/*
* Set up branch counters
*/
setup = IA32_PERFEVT_BRANCH_HITS;
ret = pwrite(ci->cd[core].msr_fd, &setup,
sizeof(setup), IA32_PERFEVTSEL0);
if (ret < 0) {
RTE_LOG(ERR, POWER_MANAGER,
"unable to set counter for core %u\n",
core);
return ret;
}
setup = IA32_PERFEVT_BRANCH_MISS;
ret = pwrite(ci->cd[core].msr_fd, &setup,
sizeof(setup), IA32_PERFEVTSEL1);
if (ret < 0) {
RTE_LOG(ERR, POWER_MANAGER,
"unable to set counter for core %u\n",
core);
return ret;
}
/*
* Close the file and re-open as read only so
* as not to hog the resource
*/
close(ci->cd[core].msr_fd);
ci->cd[core].msr_fd = open(proc_file, O_RDONLY);
if (ci->cd[core].msr_fd < 0) {
RTE_LOG(ERR, POWER_MANAGER,
"Error opening MSR file for core %d "
"(is msr kernel module loaded?)\n",
core);
return -1;
}
ci->cd[core].oob_enabled = 1;
}
return 0;
}
int
remove_core_from_monitor(int core)
{
struct core_info *ci;
char proc_file[UNIX_PATH_MAX];
int ret;
ci = get_core_info();
if (ci->cd[core].oob_enabled) {
long setup;
/*
* close the msr file, then reopen rw so we can
* disable the counters
*/
if (ci->cd[core].msr_fd != 0)
close(ci->cd[core].msr_fd);
snprintf(proc_file, UNIX_PATH_MAX, "/dev/cpu/%d/msr", core);
ci->cd[core].msr_fd = open(proc_file, O_RDWR | O_SYNC);
if (ci->cd[core].msr_fd < 0) {
RTE_LOG(ERR, POWER_MANAGER,
"Error opening MSR file for core %d "
"(is msr kernel module loaded?)\n",
core);
return -1;
}
setup = 0x0; /* clear event */
ret = pwrite(ci->cd[core].msr_fd, &setup,
sizeof(setup), IA32_PERFEVTSEL0);
if (ret < 0) {
RTE_LOG(ERR, POWER_MANAGER,
"unable to set counter for core %u\n",
core);
return ret;
}
setup = 0x0; /* clear event */
ret = pwrite(ci->cd[core].msr_fd, &setup,
sizeof(setup), IA32_PERFEVTSEL1);
if (ret < 0) {
RTE_LOG(ERR, POWER_MANAGER,
"unable to set counter for core %u\n",
core);
return ret;
}
close(ci->cd[core].msr_fd);
ci->cd[core].msr_fd = 0;
ci->cd[core].oob_enabled = 0;
}
return 0;
}
int
branch_monitor_init(void)
{
return 0;
}
void
run_branch_monitor(void)
{
struct core_info *ci;
int print = 0;
float ratio;
int printed;
int reads = 0;
ci = get_core_info();
while (run_loop) {
if (!run_loop)
break;
usleep(INTERVAL);
int j;
print++;
printed = 0;
for (j = 0; j < ci->core_count; j++) {
if (ci->cd[j].oob_enabled) {
ratio = apply_policy(j);
if ((print > PRINT_LOOP_COUNT) && (g_active)) {
printf(" %d: %.4f {%lu} {%d}", j,
ratio, g_branches,
reads);
printed = 1;
reads = 0;
} else {
reads++;
}
}
}
if (print > PRINT_LOOP_COUNT) {
if (printed)
printf("\n");
print = 0;
}
}
}