mirror of https://github.com/F-Stack/f-stack.git
280 lines
8.5 KiB
C
280 lines
8.5 KiB
C
/* SPDX-License-Identifier: BSD-3-Clause
|
|
* Copyright(c) 2018 Ericsson AB
|
|
*/
|
|
|
|
#ifndef _DSW_EVDEV_H_
|
|
#define _DSW_EVDEV_H_
|
|
|
|
#include <rte_event_ring.h>
|
|
#include <rte_eventdev.h>
|
|
|
|
#define DSW_PMD_NAME RTE_STR(event_dsw)
|
|
|
|
/* Code changes are required to allow more ports. */
|
|
#define DSW_MAX_PORTS (64)
|
|
#define DSW_MAX_PORT_DEQUEUE_DEPTH (128)
|
|
#define DSW_MAX_PORT_ENQUEUE_DEPTH (128)
|
|
#define DSW_MAX_PORT_OUT_BUFFER (32)
|
|
|
|
#define DSW_MAX_QUEUES (16)
|
|
|
|
#define DSW_MAX_EVENTS (16384)
|
|
|
|
/* Code changes are required to allow more flows than 32k. */
|
|
#define DSW_MAX_FLOWS_BITS (15)
|
|
#define DSW_MAX_FLOWS (1<<(DSW_MAX_FLOWS_BITS))
|
|
#define DSW_MAX_FLOWS_MASK (DSW_MAX_FLOWS-1)
|
|
|
|
/* Eventdev RTE_SCHED_TYPE_PARALLEL doesn't have a concept of flows,
|
|
* but the 'dsw' scheduler (more or less) randomly assign flow id to
|
|
* events on parallel queues, to be able to reuse some of the
|
|
* migration mechanism and scheduling logic from
|
|
* RTE_SCHED_TYPE_ATOMIC. By moving one of the parallel "flows" from a
|
|
* particular port, the likely-hood of events being scheduled to this
|
|
* port is reduced, and thus a kind of statistical load balancing is
|
|
* achieved.
|
|
*/
|
|
#define DSW_PARALLEL_FLOWS (1024)
|
|
|
|
/* 'Background tasks' are polling the control rings for *
|
|
* migration-related messages, or flush the output buffer (so
|
|
* buffered events doesn't linger too long). Shouldn't be too low,
|
|
* since the system won't benefit from the 'batching' effects from
|
|
* the output buffer, and shouldn't be too high, since it will make
|
|
* buffered events linger too long in case the port goes idle.
|
|
*/
|
|
#define DSW_MAX_PORT_OPS_PER_BG_TASK (128)
|
|
|
|
/* Avoid making small 'loans' from the central in-flight event credit
|
|
* pool, to improve efficiency.
|
|
*/
|
|
#define DSW_MIN_CREDIT_LOAN (64)
|
|
#define DSW_PORT_MAX_CREDITS (2*DSW_MIN_CREDIT_LOAN)
|
|
#define DSW_PORT_MIN_CREDITS (DSW_MIN_CREDIT_LOAN)
|
|
|
|
/* The rings are dimensioned so that all in-flight events can reside
|
|
* on any one of the port rings, to avoid the trouble of having to
|
|
* care about the case where there's no room on the destination port's
|
|
* input ring.
|
|
*/
|
|
#define DSW_IN_RING_SIZE (DSW_MAX_EVENTS)
|
|
|
|
#define DSW_MAX_LOAD (INT16_MAX)
|
|
#define DSW_LOAD_FROM_PERCENT(x) ((int16_t)(((x)*DSW_MAX_LOAD)/100))
|
|
#define DSW_LOAD_TO_PERCENT(x) ((100*x)/DSW_MAX_LOAD)
|
|
|
|
/* The thought behind keeping the load update interval shorter than
|
|
* the migration interval is that the load from newly migrated flows
|
|
* should 'show up' on the load measurement before new migrations are
|
|
* considered. This is to avoid having too many flows, from too many
|
|
* source ports, to be migrated too quickly to a lightly loaded port -
|
|
* in particular since this might cause the system to oscillate.
|
|
*/
|
|
#define DSW_LOAD_UPDATE_INTERVAL (DSW_MIGRATION_INTERVAL/4)
|
|
#define DSW_OLD_LOAD_WEIGHT (1)
|
|
|
|
/* The minimum time (in us) between two flow migrations. What puts an
|
|
* upper limit on the actual migration rate is primarily the pace in
|
|
* which the ports send and receive control messages, which in turn is
|
|
* largely a function of how much cycles are spent the processing of
|
|
* an event burst.
|
|
*/
|
|
#define DSW_MIGRATION_INTERVAL (1000)
|
|
#define DSW_MIN_SOURCE_LOAD_FOR_MIGRATION (DSW_LOAD_FROM_PERCENT(70))
|
|
#define DSW_MAX_TARGET_LOAD_FOR_MIGRATION (DSW_LOAD_FROM_PERCENT(95))
|
|
|
|
#define DSW_MAX_EVENTS_RECORDED (128)
|
|
|
|
/* Only one outstanding migration per port is allowed */
|
|
#define DSW_MAX_PAUSED_FLOWS (DSW_MAX_PORTS)
|
|
|
|
/* Enough room for paus request/confirm and unpaus request/confirm for
|
|
* all possible senders.
|
|
*/
|
|
#define DSW_CTL_IN_RING_SIZE ((DSW_MAX_PORTS-1)*4)
|
|
|
|
/* With DSW_SORT_DEQUEUED enabled, the scheduler will, at the point of
|
|
* dequeue(), arrange events so that events with the same flow id on
|
|
* the same queue forms a back-to-back "burst", and also so that such
|
|
* bursts of different flow ids, but on the same queue, also come
|
|
* consecutively. All this in an attempt to improve data and
|
|
* instruction cache usage for the application, at the cost of a
|
|
* scheduler overhead increase.
|
|
*/
|
|
|
|
/* #define DSW_SORT_DEQUEUED */
|
|
|
|
struct dsw_queue_flow {
|
|
uint8_t queue_id;
|
|
uint16_t flow_hash;
|
|
};
|
|
|
|
enum dsw_migration_state {
|
|
DSW_MIGRATION_STATE_IDLE,
|
|
DSW_MIGRATION_STATE_PAUSING,
|
|
DSW_MIGRATION_STATE_FORWARDING,
|
|
DSW_MIGRATION_STATE_UNPAUSING
|
|
};
|
|
|
|
struct dsw_port {
|
|
uint16_t id;
|
|
|
|
/* Keeping a pointer here to avoid container_of() calls, which
|
|
* are expensive since they are very frequent and will result
|
|
* in an integer multiplication (since the port id is an index
|
|
* into the dsw_evdev port array).
|
|
*/
|
|
struct dsw_evdev *dsw;
|
|
|
|
uint16_t dequeue_depth;
|
|
uint16_t enqueue_depth;
|
|
|
|
int32_t inflight_credits;
|
|
|
|
int32_t new_event_threshold;
|
|
|
|
uint16_t pending_releases;
|
|
|
|
uint16_t next_parallel_flow_id;
|
|
|
|
uint16_t ops_since_bg_task;
|
|
|
|
/* most recent 'background' processing */
|
|
uint64_t last_bg;
|
|
|
|
/* For port load measurement. */
|
|
uint64_t next_load_update;
|
|
uint64_t load_update_interval;
|
|
uint64_t measurement_start;
|
|
uint64_t busy_start;
|
|
uint64_t busy_cycles;
|
|
uint64_t total_busy_cycles;
|
|
|
|
/* For the ctl interface and flow migration mechanism. */
|
|
uint64_t next_migration;
|
|
uint64_t migration_interval;
|
|
enum dsw_migration_state migration_state;
|
|
|
|
uint64_t migration_start;
|
|
uint64_t migrations;
|
|
uint64_t migration_latency;
|
|
|
|
uint8_t migration_target_port_id;
|
|
struct dsw_queue_flow migration_target_qf;
|
|
uint8_t cfm_cnt;
|
|
|
|
uint16_t paused_flows_len;
|
|
struct dsw_queue_flow paused_flows[DSW_MAX_PAUSED_FLOWS];
|
|
|
|
/* In a very contrived worst case all inflight events can be
|
|
* laying around paused here.
|
|
*/
|
|
uint16_t paused_events_len;
|
|
struct rte_event paused_events[DSW_MAX_EVENTS];
|
|
|
|
uint16_t seen_events_len;
|
|
uint16_t seen_events_idx;
|
|
struct dsw_queue_flow seen_events[DSW_MAX_EVENTS_RECORDED];
|
|
|
|
uint64_t new_enqueued;
|
|
uint64_t forward_enqueued;
|
|
uint64_t release_enqueued;
|
|
uint64_t queue_enqueued[DSW_MAX_QUEUES];
|
|
|
|
uint64_t dequeued;
|
|
uint64_t queue_dequeued[DSW_MAX_QUEUES];
|
|
|
|
uint16_t out_buffer_len[DSW_MAX_PORTS];
|
|
struct rte_event out_buffer[DSW_MAX_PORTS][DSW_MAX_PORT_OUT_BUFFER];
|
|
|
|
uint16_t in_buffer_len;
|
|
uint16_t in_buffer_start;
|
|
/* This buffer may contain events that were read up from the
|
|
* in_ring during the flow migration process.
|
|
*/
|
|
struct rte_event in_buffer[DSW_MAX_EVENTS];
|
|
|
|
struct rte_event_ring *in_ring __rte_cache_aligned;
|
|
|
|
struct rte_ring *ctl_in_ring __rte_cache_aligned;
|
|
|
|
/* Estimate of current port load. */
|
|
rte_atomic16_t load __rte_cache_aligned;
|
|
} __rte_cache_aligned;
|
|
|
|
struct dsw_queue {
|
|
uint8_t schedule_type;
|
|
uint8_t serving_ports[DSW_MAX_PORTS];
|
|
uint16_t num_serving_ports;
|
|
|
|
uint8_t flow_to_port_map[DSW_MAX_FLOWS] __rte_cache_aligned;
|
|
};
|
|
|
|
struct dsw_evdev {
|
|
struct rte_eventdev_data *data;
|
|
|
|
struct dsw_port ports[DSW_MAX_PORTS];
|
|
uint16_t num_ports;
|
|
struct dsw_queue queues[DSW_MAX_QUEUES];
|
|
uint8_t num_queues;
|
|
int32_t max_inflight;
|
|
|
|
rte_atomic32_t credits_on_loan __rte_cache_aligned;
|
|
};
|
|
|
|
#define DSW_CTL_PAUS_REQ (0)
|
|
#define DSW_CTL_UNPAUS_REQ (1)
|
|
#define DSW_CTL_CFM (2)
|
|
|
|
/* sizeof(struct dsw_ctl_msg) must be equal or less than
|
|
* sizeof(void *), to fit on the control ring.
|
|
*/
|
|
struct dsw_ctl_msg {
|
|
uint8_t type:2;
|
|
uint8_t originating_port_id:6;
|
|
uint8_t queue_id;
|
|
uint16_t flow_hash;
|
|
} __rte_packed;
|
|
|
|
uint16_t dsw_event_enqueue(void *port, const struct rte_event *event);
|
|
uint16_t dsw_event_enqueue_burst(void *port,
|
|
const struct rte_event events[],
|
|
uint16_t events_len);
|
|
uint16_t dsw_event_enqueue_new_burst(void *port,
|
|
const struct rte_event events[],
|
|
uint16_t events_len);
|
|
uint16_t dsw_event_enqueue_forward_burst(void *port,
|
|
const struct rte_event events[],
|
|
uint16_t events_len);
|
|
|
|
uint16_t dsw_event_dequeue(void *port, struct rte_event *ev, uint64_t wait);
|
|
uint16_t dsw_event_dequeue_burst(void *port, struct rte_event *events,
|
|
uint16_t num, uint64_t wait);
|
|
|
|
int dsw_xstats_get_names(const struct rte_eventdev *dev,
|
|
enum rte_event_dev_xstats_mode mode,
|
|
uint8_t queue_port_id,
|
|
struct rte_event_dev_xstats_name *xstats_names,
|
|
unsigned int *ids, unsigned int size);
|
|
int dsw_xstats_get(const struct rte_eventdev *dev,
|
|
enum rte_event_dev_xstats_mode mode, uint8_t queue_port_id,
|
|
const unsigned int ids[], uint64_t values[], unsigned int n);
|
|
uint64_t dsw_xstats_get_by_name(const struct rte_eventdev *dev,
|
|
const char *name, unsigned int *id);
|
|
|
|
static inline struct dsw_evdev *
|
|
dsw_pmd_priv(const struct rte_eventdev *eventdev)
|
|
{
|
|
return eventdev->data->dev_private;
|
|
}
|
|
|
|
#define DSW_LOG_DP(level, fmt, args...) \
|
|
RTE_LOG_DP(level, EVENTDEV, "[%s] %s() line %u: " fmt, \
|
|
DSW_PMD_NAME, \
|
|
__func__, __LINE__, ## args)
|
|
|
|
#define DSW_LOG_DP_PORT(level, port_id, fmt, args...) \
|
|
DSW_LOG_DP(level, "<Port %d> " fmt, port_id, ## args)
|
|
|
|
#endif
|