From c8f44affb7244f2ac3e703cab13d55ede27621bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Tue, 15 Nov 2011 15:29:55 +0000 Subject: net: introduce and use netdev_features_t for device features sets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v2: add couple missing conversions in drivers split unexporting netdev_fix_features() implemented %pNF convert sock::sk_route_(no?)caps Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- lib/vsprintf.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 993599e66e5..8e75003d62f 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -777,6 +777,18 @@ char *uuid_string(char *buf, char *end, const u8 *addr, return string(buf, end, uuid, spec); } +static +char *netdev_feature_string(char *buf, char *end, const u8 *addr, + struct printf_spec spec) +{ + spec.flags |= SPECIAL | SMALL | ZEROPAD; + if (spec.field_width == -1) + spec.field_width = 2 + 2 * sizeof(netdev_features_t); + spec.base = 16; + + return number(buf, end, *(const netdev_features_t *)addr, spec); +} + int kptr_restrict __read_mostly; /* @@ -824,6 +836,7 @@ int kptr_restrict __read_mostly; * Do not use this feature without some mechanism to verify the * correctness of the format string and va_list arguments. * - 'K' For a kernel pointer that should be hidden from unprivileged users + * - 'NF' For a netdev_features_t * * Note: The difference between 'S' and 'F' is that on ia64 and ppc64 * function pointers are really function descriptors, which contain a @@ -896,6 +909,12 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, has_capability_noaudit(current, CAP_SYSLOG)))) ptr = NULL; break; + case 'N': + switch (fmt[1]) { + case 'F': + return netdev_feature_string(buf, end, ptr, spec); + } + break; } spec.flags |= SMALL; if (spec.field_width == -1) { -- cgit v1.2.3 From 75957ba36c05b979701e9ec64b37819adc12f830 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 28 Nov 2011 16:32:35 +0000 Subject: dql: Dynamic queue limits Implementation of dynamic queue limits (dql). This is a libary which allows a queue limit to be dynamically managed. The goal of dql is to set the queue limit, number of objects to the queue, to be minimized without allowing the queue to be starved. dql would be used with a queue which has these properties: 1) Objects are queued up to some limit which can be expressed as a count of objects. 2) Periodically a completion process executes which retires consumed objects. 3) Starvation occurs when limit has been reached, all queued data has actually been consumed but completion processing has not yet run, so queuing new data is blocked. 4) Minimizing the amount of queued data is desirable. A canonical example of such a queue would be a NIC HW transmit queue. The queue limit is dynamic, it will increase or decrease over time depending on the workload. The queue limit is recalculated each time completion processing is done. Increases occur when the queue is starved and can exponentially increase over successive intervals. Decreases occur when more data is being maintained in the queue than needed to prevent starvation. The number of extra objects, or "slack", is measured over successive intervals, and to avoid hysteresis the limit is only reduced by the miminum slack seen over a configurable time period. dql API provides routines to manage the queue: - dql_init is called to intialize the dql structure - dql_reset is called to reset dynamic values - dql_queued called when objects are being enqueued - dql_avail returns availability in the queue - dql_completed is called when objects have be consumed in the queue Configuration consists of: - max_limit, maximum limit - min_limit, minimum limit - slack_hold_time, time to measure instances of slack before reducing queue limit Signed-off-by: Tom Herbert Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- lib/Kconfig | 3 + lib/Makefile | 2 + lib/dynamic_queue_limits.c | 133 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 138 insertions(+) create mode 100644 lib/dynamic_queue_limits.c (limited to 'lib') diff --git a/lib/Kconfig b/lib/Kconfig index 32f3e5ae2be..63b5782732e 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -244,6 +244,9 @@ config CPU_RMAP bool depends on SMP +config DQL + bool + # # Netlink attribute parsing support is select'ed if needed # diff --git a/lib/Makefile b/lib/Makefile index a4da283f5dc..ff00d4dcb7e 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -115,6 +115,8 @@ obj-$(CONFIG_CPU_RMAP) += cpu_rmap.o obj-$(CONFIG_CORDIC) += cordic.o +obj-$(CONFIG_DQL) += dynamic_queue_limits.o + hostprogs-y := gen_crc32table clean-files := crc32table.h diff --git a/lib/dynamic_queue_limits.c b/lib/dynamic_queue_limits.c new file mode 100644 index 00000000000..3d1bdcdd7db --- /dev/null +++ b/lib/dynamic_queue_limits.c @@ -0,0 +1,133 @@ +/* + * Dynamic byte queue limits. See include/linux/dynamic_queue_limits.h + * + * Copyright (c) 2011, Tom Herbert + */ +#include +#include +#include +#include +#include + +#define POSDIFF(A, B) ((A) > (B) ? (A) - (B) : 0) + +/* Records completed count and recalculates the queue limit */ +void dql_completed(struct dql *dql, unsigned int count) +{ + unsigned int inprogress, prev_inprogress, limit; + unsigned int ovlimit, all_prev_completed, completed; + + /* Can't complete more than what's in queue */ + BUG_ON(count > dql->num_queued - dql->num_completed); + + completed = dql->num_completed + count; + limit = dql->limit; + ovlimit = POSDIFF(dql->num_queued - dql->num_completed, limit); + inprogress = dql->num_queued - completed; + prev_inprogress = dql->prev_num_queued - dql->num_completed; + all_prev_completed = POSDIFF(completed, dql->prev_num_queued); + + if ((ovlimit && !inprogress) || + (dql->prev_ovlimit && all_prev_completed)) { + /* + * Queue considered starved if: + * - The queue was over-limit in the last interval, + * and there is no more data in the queue. + * OR + * - The queue was over-limit in the previous interval and + * when enqueuing it was possible that all queued data + * had been consumed. This covers the case when queue + * may have becomes starved between completion processing + * running and next time enqueue was scheduled. + * + * When queue is starved increase the limit by the amount + * of bytes both sent and completed in the last interval, + * plus any previous over-limit. + */ + limit += POSDIFF(completed, dql->prev_num_queued) + + dql->prev_ovlimit; + dql->slack_start_time = jiffies; + dql->lowest_slack = UINT_MAX; + } else if (inprogress && prev_inprogress && !all_prev_completed) { + /* + * Queue was not starved, check if the limit can be decreased. + * A decrease is only considered if the queue has been busy in + * the whole interval (the check above). + * + * If there is slack, the amount of execess data queued above + * the the amount needed to prevent starvation, the queue limit + * can be decreased. To avoid hysteresis we consider the + * minimum amount of slack found over several iterations of the + * completion routine. + */ + unsigned int slack, slack_last_objs; + + /* + * Slack is the maximum of + * - The queue limit plus previous over-limit minus twice + * the number of objects completed. Note that two times + * number of completed bytes is a basis for an upper bound + * of the limit. + * - Portion of objects in the last queuing operation that + * was not part of non-zero previous over-limit. That is + * "round down" by non-overlimit portion of the last + * queueing operation. + */ + slack = POSDIFF(limit + dql->prev_ovlimit, + 2 * (completed - dql->num_completed)); + slack_last_objs = dql->prev_ovlimit ? + POSDIFF(dql->prev_last_obj_cnt, dql->prev_ovlimit) : 0; + + slack = max(slack, slack_last_objs); + + if (slack < dql->lowest_slack) + dql->lowest_slack = slack; + + if (time_after(jiffies, + dql->slack_start_time + dql->slack_hold_time)) { + limit = POSDIFF(limit, dql->lowest_slack); + dql->slack_start_time = jiffies; + dql->lowest_slack = UINT_MAX; + } + } + + /* Enforce bounds on limit */ + limit = clamp(limit, dql->min_limit, dql->max_limit); + + if (limit != dql->limit) { + dql->limit = limit; + ovlimit = 0; + } + + dql->adj_limit = limit + completed; + dql->prev_ovlimit = ovlimit; + dql->prev_last_obj_cnt = dql->last_obj_cnt; + dql->num_completed = completed; + dql->prev_num_queued = dql->num_queued; +} +EXPORT_SYMBOL(dql_completed); + +void dql_reset(struct dql *dql) +{ + /* Reset all dynamic values */ + dql->limit = 0; + dql->num_queued = 0; + dql->num_completed = 0; + dql->last_obj_cnt = 0; + dql->prev_num_queued = 0; + dql->prev_last_obj_cnt = 0; + dql->prev_ovlimit = 0; + dql->lowest_slack = UINT_MAX; + dql->slack_start_time = jiffies; +} +EXPORT_SYMBOL(dql_reset); + +int dql_init(struct dql *dql, unsigned hold_time) +{ + dql->max_limit = DQL_MAX_LIMIT; + dql->min_limit = 0; + dql->slack_hold_time = hold_time; + dql_reset(dql); + return 0; +} +EXPORT_SYMBOL(dql_init); -- cgit v1.2.3 From 8af2a218de38f51ea4b4fa48cac1273319ae260c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 8 Dec 2011 06:06:03 +0000 Subject: sch_red: Adaptative RED AQM Adaptative RED AQM for linux, based on paper from Sally FLoyd, Ramakrishna Gummadi, and Scott Shenker, August 2001 : http://icir.org/floyd/papers/adaptiveRed.pdf Goal of Adaptative RED is to make max_p a dynamic value between 1% and 50% to reach the target average queue : (max_th - min_th) / 2 Every 500 ms: if (avg > target and max_p <= 0.5) increase max_p : max_p += alpha; else if (avg < target and max_p >= 0.01) decrease max_p : max_p *= beta; target :[min_th + 0.4*(min_th - max_th), min_th + 0.6*(min_th - max_th)]. alpha : min(0.01, max_p / 4) beta : 0.9 max_P is a Q0.32 fixed point number (unsigned, with 32 bits mantissa) Changes against our RED implementation are : max_p is no longer a negative power of two (1/(2^Plog)), but a Q0.32 fixed point number, to allow full range described in Adatative paper. To deliver a random number, we now use a reciprocal divide (thats really a multiply), but this operation is done once per marked/droped packet when in RED_BETWEEN_TRESH window, so added cost (compared to previous AND operation) is near zero. dump operation gives current max_p value in a new TCA_RED_MAX_P attribute. Example on a 10Mbit link : tc qdisc add dev $DEV parent 1:1 handle 10: est 1sec 8sec red \ limit 400000 min 30000 max 90000 avpkt 1000 \ burst 55 ecn adaptative bandwidth 10Mbit # tc -s -d qdisc show dev eth3 ... qdisc red 10: parent 1:1 limit 400000b min 30000b max 90000b ecn adaptative ewma 5 max_p=0.113335 Scell_log 15 Sent 50414282 bytes 34504 pkt (dropped 35, overlimits 1392 requeues 0) rate 9749Kbit 831pps backlog 72056b 16p requeues 0 marked 1357 early 35 pdrop 0 other 0 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- lib/reciprocal_div.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib') diff --git a/lib/reciprocal_div.c b/lib/reciprocal_div.c index 6a3bd48fa2a..75510e94f7d 100644 --- a/lib/reciprocal_div.c +++ b/lib/reciprocal_div.c @@ -1,5 +1,6 @@ #include #include +#include u32 reciprocal_value(u32 k) { @@ -7,3 +8,4 @@ u32 reciprocal_value(u32 k) do_div(val, k); return (u32)val; } +EXPORT_SYMBOL(reciprocal_value); -- cgit v1.2.3