From 0ba7a3ba6608de6e0c0bfe3009a63d7e0b7ab0ce Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 9 Sep 2005 02:28:47 -0300 Subject: [PATCH 01/10] [CCID3] Avoid unsigned integer overflows in usecs_div Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ccids/ccid3.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 7bf3b3a91e9..ae0500c79d0 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -43,12 +43,22 @@ #include "ccid3.h" /* - * Reason for maths with 10 here is to avoid 32 bit overflow when a is big. + * Reason for maths here is to avoid 32 bit overflow when a is big. + * With this we get close to the limit. */ static inline u32 usecs_div(const u32 a, const u32 b) { - const u32 tmp = a * (USEC_PER_SEC / 10); - return b > 20 ? tmp / (b / 10) : tmp; + const u32 div = a < (UINT_MAX / (USEC_PER_SEC / 10)) ? 10 : + a < (UINT_MAX / (USEC_PER_SEC / 50)) ? 50 : + a < (UINT_MAX / (USEC_PER_SEC / 100)) ? 100 : + a < (UINT_MAX / (USEC_PER_SEC / 500)) ? 500 : + a < (UINT_MAX / (USEC_PER_SEC / 1000)) ? 1000 : + a < (UINT_MAX / (USEC_PER_SEC / 5000)) ? 5000 : + a < (UINT_MAX / (USEC_PER_SEC / 10000)) ? 10000 : + a < (UINT_MAX / (USEC_PER_SEC / 50000)) ? 50000 : + 100000; + const u32 tmp = a * (USEC_PER_SEC / div); + return (b >= 2 * div) ? tmp / (b / div) : tmp; } static int ccid3_debug; From 507d37cf269ebbd1b32bcc435fe577e411f73151 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 9 Sep 2005 02:30:07 -0300 Subject: [PATCH 02/10] [CCID] Only call the HC insert_options methods when requested Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/dccp.h | 2 ++ net/dccp/ccids/ccid3.c | 5 ++++- net/dccp/options.c | 11 ++++++++--- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 007c290f74d..5e0af0d08a9 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -433,6 +433,8 @@ struct dccp_sock { struct ccid *dccps_hc_tx_ccid; struct dccp_options_received dccps_options_received; enum dccp_role dccps_role:2; + __u8 dccps_hc_rx_insert_options:1; + __u8 dccps_hc_tx_insert_options:1; }; static inline struct dccp_sock *dccp_sk(const struct sock *sk) diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index ae0500c79d0..37fd9eb8daa 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -358,10 +358,12 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, } /* Can we send? if so add options and add to packet history */ - if (rc == 0) + if (rc == 0) { + dp->dccps_hc_tx_insert_options = 1; new_packet->dccphtx_ccval = DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count; + } out: return rc; } @@ -811,6 +813,7 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) hcrx->ccid3hcrx_pinv = ~0; else hcrx->ccid3hcrx_pinv = 1000000 / hcrx->ccid3hcrx_p; + dp->dccps_hc_rx_insert_options = 1; dccp_send_ack(sk); } diff --git a/net/dccp/options.c b/net/dccp/options.c index 382c5894acb..7ad2f4266ff 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -505,13 +505,18 @@ void dccp_insert_options(struct sock *sk, struct sk_buff *skb) (dp->dccps_hc_rx_ackpkts->dccpap_buf_ackno != DCCP_MAX_SEQNO + 1)) dccp_insert_option_ack_vector(sk, skb); - if (dp->dccps_timestamp_echo != 0) dccp_insert_option_timestamp_echo(sk, skb); } - ccid_hc_rx_insert_options(dp->dccps_hc_rx_ccid, sk, skb); - ccid_hc_tx_insert_options(dp->dccps_hc_tx_ccid, sk, skb); + if (dp->dccps_hc_rx_insert_options) { + ccid_hc_rx_insert_options(dp->dccps_hc_rx_ccid, sk, skb); + dp->dccps_hc_rx_insert_options = 0; + } + if (dp->dccps_hc_tx_insert_options) { + ccid_hc_tx_insert_options(dp->dccps_hc_tx_ccid, sk, skb); + dp->dccps_hc_tx_insert_options = 0; + } /* XXX: insert other options when appropriate */ From 27ae543e6f116df66e2b19ff0a3aa1053e4784d8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 9 Sep 2005 02:31:07 -0300 Subject: [PATCH 03/10] [CCID3] Calculate ccid3hcrx_x_recv using usecs_div Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ccids/ccid3.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 37fd9eb8daa..63f897394a1 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -779,11 +779,8 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) case TFRC_RSTATE_DATA: { const u32 delta = timeval_delta(&now, &hcrx->ccid3hcrx_tstamp_last_feedback); - - hcrx->ccid3hcrx_x_recv = (hcrx->ccid3hcrx_bytes_recv * - USEC_PER_SEC); - if (likely(delta > 1)) - hcrx->ccid3hcrx_x_recv /= delta; + hcrx->ccid3hcrx_x_recv = usecs_div(hcrx->ccid3hcrx_bytes_recv, + delta); } break; default: From 1c14ac0ae8eb62cbb40af1e31b156994c7d7d3d5 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 9 Sep 2005 02:32:01 -0300 Subject: [PATCH 04/10] [DCCP] Give precedence to the biggest ELAPSED_TIME We can get this value in an TIMESTAMP_ECHO and/or in an ELAPSED_TIME option, if receiving both give precendence to the biggest one. In my tests they are very close if not equal at all times, so we may well think about removing the code in CCID3 that inserts this option and leaving this to the core, and perhaps even use just TIMESTAMP_ECHO including the elapsed time. Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/options.c | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/net/dccp/options.c b/net/dccp/options.c index 7ad2f4266ff..34b230a0087 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -72,6 +72,7 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) struct dccp_options_received *opt_recv = &dp->dccps_options_received; unsigned char opt, len; unsigned char *value; + u32 elapsed_time; memset(opt_recv, 0, sizeof(*opt_recv)); @@ -159,18 +160,18 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) (unsigned long long) DCCP_SKB_CB(skb)->dccpd_ack_seq); - if (len > 4) { - if (len == 6) - opt_recv->dccpor_elapsed_time = - ntohs(*(u16 *)(value + 4)); - else - opt_recv->dccpor_elapsed_time = - ntohl(*(u32 *)(value + 4)); - dccp_pr_debug("%sTIMESTAMP_ECHO ELAPSED_TIME=%d\n", - debug_prefix, - opt_recv->dccpor_elapsed_time); - } + if (len == 4) + break; + + if (len == 6) + elapsed_time = ntohs(*(u16 *)(value + 4)); + else + elapsed_time = ntohl(*(u32 *)(value + 4)); + + /* Give precedence to the biggest ELAPSED_TIME */ + if (elapsed_time > opt_recv->dccpor_elapsed_time) + opt_recv->dccpor_elapsed_time = elapsed_time; break; case DCCPO_ELAPSED_TIME: if (len != 2 && len != 4) @@ -180,14 +181,15 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) continue; if (len == 2) - opt_recv->dccpor_elapsed_time = - ntohs(*(u16 *)value); + elapsed_time = ntohs(*(u16 *)value); else - opt_recv->dccpor_elapsed_time = - ntohl(*(u32 *)value); + elapsed_time = ntohl(*(u32 *)value); + + if (elapsed_time > opt_recv->dccpor_elapsed_time) + opt_recv->dccpor_elapsed_time = elapsed_time; dccp_pr_debug("%sELAPSED_TIME=%d\n", debug_prefix, - opt_recv->dccpor_elapsed_time); + elapsed_time); break; /* * From draft-ietf-dccp-spec-11.txt: From 1a28599a2c2781dd6af72f4f84175e2db74d3bb1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 9 Sep 2005 02:32:56 -0300 Subject: [PATCH 05/10] [CCID3] Use ELAPSED_TIME in the HC TX RTT estimation Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ccids/ccid3.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 63f897394a1..86c109e2f2c 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -483,7 +483,7 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) opt_recv = &hctx->ccid3hctx_options_received; - t_elapsed = dp->dccps_options_received.dccpor_elapsed_time; + t_elapsed = dp->dccps_options_received.dccpor_elapsed_time * 10; x_recv = opt_recv->ccid3or_receive_rate; pinv = opt_recv->ccid3or_loss_event_rate; @@ -509,8 +509,12 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) /* Update RTT */ r_sample = timeval_now_delta(&packet->dccphtx_tstamp); - /* FIXME: */ - // r_sample -= usecs_to_jiffies(t_elapsed * 10); + if (unlikely(r_sample <= t_elapsed)) + LIMIT_NETDEBUG(KERN_WARNING + "%s: r_sample=%uus, t_elapsed=%uus\n", + __FUNCTION__, r_sample, t_elapsed); + else + r_sample -= t_elapsed; /* Update RTT estimate by * If (No feedback recv) From b3a3077d963fc54a25be26e2e84fe9f4327c1e12 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 9 Sep 2005 02:34:10 -0300 Subject: [PATCH 06/10] [CCID3] Make the ccid3hcrx_rtt calc look more like the ccid3hctx_rtt one Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ccids/ccid3.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 86c109e2f2c..0804b3e435c 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -962,7 +962,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) struct dccp_rx_hist_entry *packet; struct timeval now; u8 win_count; - u32 p_prev; + u32 p_prev, r_sample, t_elapsed; int ins; if (hcrx == NULL) @@ -982,9 +982,23 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) break; p_prev = hcrx->ccid3hcrx_rtt; do_gettimeofday(&now); - hcrx->ccid3hcrx_rtt = timeval_usecs(&now) - - (opt_recv->dccpor_timestamp_echo - - opt_recv->dccpor_elapsed_time) * 10; + timeval_sub_usecs(&now, opt_recv->dccpor_timestamp_echo * 10); + r_sample = timeval_usecs(&now); + t_elapsed = opt_recv->dccpor_elapsed_time * 10; + + if (unlikely(r_sample <= t_elapsed)) + LIMIT_NETDEBUG(KERN_WARNING + "%s: r_sample=%uus, t_elapsed=%uus\n", + __FUNCTION__, r_sample, t_elapsed); + else + r_sample -= t_elapsed; + + if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA) + hcrx->ccid3hcrx_rtt = r_sample; + else + hcrx->ccid3hcrx_rtt = (hcrx->ccid3hcrx_rtt * 9) / 10 + + r_sample / 10; + if (p_prev != hcrx->ccid3hcrx_rtt) ccid3_pr_debug("%s, New RTT=%luus, elapsed time=%u\n", dccp_role(sk), hcrx->ccid3hcrx_rtt, From 954ee31f366fabe53fb450482789258fe552f40b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 9 Sep 2005 02:37:05 -0300 Subject: [PATCH 07/10] [CCID3] Initialize more fields in ccid3_hc_rx_init The initialization of ccid3hcrx_rtt to 5ms is just a bandaid, I'll continue auditing the CCID3 HC rx codebase to fix this properly, probably I'll add a feedback timer as suggested in the CCID3 draft. Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ccids/ccid3.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 0804b3e435c..145aafafe4e 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -1100,11 +1100,9 @@ static int ccid3_hc_rx_init(struct sock *sk) hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA; INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist); INIT_LIST_HEAD(&hcrx->ccid3hcrx_li_hist); - /* - * XXX this seems to be paranoid, need to think more about this, for - * now start with something different than zero. -acme - */ - hcrx->ccid3hcrx_rtt = USEC_PER_SEC / 5; + do_gettimeofday(&hcrx->ccid3hcrx_tstamp_last_ack); + hcrx->ccid3hcrx_tstamp_last_feedback = hcrx->ccid3hcrx_tstamp_last_ack; + hcrx->ccid3hcrx_rtt = 5000; /* XXX 5ms for now... */ return 0; } From b0e567806d16586629468c824dfb2e71155df7da Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 9 Sep 2005 02:38:35 -0300 Subject: [PATCH 08/10] [DCCP] Introduce dccp_timestamp To start the timestamps with 0.0ms, easing the integer maths in the CCIDs, this probably will be reworked to use the to be introduced struct timeval_offset infrastructure out of skb_get_timestamp, etc. Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/dccp.h | 1 + net/dccp/ccids/ccid3.c | 27 ++++++++--------- net/dccp/ccids/ccid3.h | 2 +- net/dccp/ccids/lib/packet_history.h | 3 +- net/dccp/dccp.h | 16 +++------- net/dccp/input.c | 4 +-- net/dccp/ipv4.c | 1 + net/dccp/minisocks.c | 1 + net/dccp/options.c | 45 ++++++++++++++++++++++------- 9 files changed, 61 insertions(+), 39 deletions(-) diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 5e0af0d08a9..8bf4bacb505 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -432,6 +432,7 @@ struct dccp_sock { struct ccid *dccps_hc_rx_ccid; struct ccid *dccps_hc_tx_ccid; struct dccp_options_received dccps_options_received; + struct timeval dccps_epoch; enum dccp_role dccps_role:2; __u8 dccps_hc_rx_insert_options:1; __u8 dccps_hc_tx_insert_options:1; diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 145aafafe4e..348e6fb262c 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -169,7 +169,7 @@ static void ccid3_hc_tx_update_x(struct sock *sk) } else { struct timeval now; - do_gettimeofday(&now); + dccp_timestamp(sk, &now); if (timeval_delta(&now, &hctx->ccid3hctx_t_ld) >= hctx->ccid3hctx_rtt) { hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_recv, @@ -317,7 +317,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, dccp_tx_hist_add_entry(&hctx->ccid3hctx_hist, new_packet); } - do_gettimeofday(&now); + dccp_timestamp(sk, &now); switch (hctx->ccid3hctx_state) { case TFRC_SSTATE_NO_SENT: @@ -382,7 +382,7 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) return; } - do_gettimeofday(&now); + dccp_timestamp(sk, &now); /* check if we have sent a data packet */ if (len > 0) { @@ -461,6 +461,7 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; struct ccid3_options_received *opt_recv; struct dccp_tx_hist_entry *packet; + struct timeval now; unsigned long next_tmout; u32 t_elapsed; u32 pinv; @@ -508,7 +509,8 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) } /* Update RTT */ - r_sample = timeval_now_delta(&packet->dccphtx_tstamp); + dccp_timestamp(sk, &now); + r_sample = timeval_delta(&now, &packet->dccphtx_tstamp); if (unlikely(r_sample <= t_elapsed)) LIMIT_NETDEBUG(KERN_WARNING "%s: r_sample=%uus, t_elapsed=%uus\n", @@ -774,7 +776,7 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); - do_gettimeofday(&now); + dccp_timestamp(sk, &now); switch (hcrx->ccid3hcrx_state) { case TFRC_RSTATE_NO_DATA: @@ -903,10 +905,9 @@ found: if (rtt == 0) rtt = 1; - delta = timeval_now_delta(&hcrx->ccid3hcrx_tstamp_last_feedback); - x_recv = hcrx->ccid3hcrx_bytes_recv * USEC_PER_SEC; - if (likely(delta > 1)) - x_recv /= delta; + dccp_timestamp(sk, &tstamp); + delta = timeval_delta(&tstamp, &hcrx->ccid3hcrx_tstamp_last_feedback); + x_recv = usecs_div(hcrx->ccid3hcrx_bytes_recv, delta); tmp1 = (u64)x_recv * (u64)rtt; do_div(tmp1,10000000); @@ -981,7 +982,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) if (opt_recv->dccpor_timestamp_echo == 0) break; p_prev = hcrx->ccid3hcrx_rtt; - do_gettimeofday(&now); + dccp_timestamp(sk, &now); timeval_sub_usecs(&now, opt_recv->dccpor_timestamp_echo * 10); r_sample = timeval_usecs(&now); t_elapsed = opt_recv->dccpor_elapsed_time * 10; @@ -1013,7 +1014,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) return; } - packet = dccp_rx_hist_entry_new(ccid3_rx_hist, opt_recv->dccpor_ndp, + packet = dccp_rx_hist_entry_new(ccid3_rx_hist, sk, opt_recv->dccpor_ndp, skb, SLAB_ATOMIC); if (packet == NULL) { ccid3_pr_debug("%s, sk=%p, Not enough mem to add rx packet " @@ -1045,7 +1046,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) if (ins != 0) break; - do_gettimeofday(&now); + dccp_timestamp(sk, &now); if (timeval_delta(&now, &hcrx->ccid3hcrx_tstamp_last_ack) >= hcrx->ccid3hcrx_rtt) { hcrx->ccid3hcrx_tstamp_last_ack = now; @@ -1100,7 +1101,7 @@ static int ccid3_hc_rx_init(struct sock *sk) hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA; INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist); INIT_LIST_HEAD(&hcrx->ccid3hcrx_li_hist); - do_gettimeofday(&hcrx->ccid3hcrx_tstamp_last_ack); + dccp_timestamp(sk, &hcrx->ccid3hcrx_tstamp_last_ack); hcrx->ccid3hcrx_tstamp_last_feedback = hcrx->ccid3hcrx_tstamp_last_ack; hcrx->ccid3hcrx_rtt = 5000; /* XXX 5ms for now... */ return 0; diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index ee8cbace663..58be6125b69 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h @@ -115,7 +115,7 @@ struct ccid3_hc_rx_sock { u64 ccid3hcrx_seqno_last_counter:48, ccid3hcrx_state:8, ccid3hcrx_last_counter:4; - unsigned long ccid3hcrx_rtt; + u32 ccid3hcrx_rtt; u32 ccid3hcrx_p; u32 ccid3hcrx_bytes_recv; struct timeval ccid3hcrx_tstamp_last_feedback; diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h index fb90a91aa93..b375ebdb7dc 100644 --- a/net/dccp/ccids/lib/packet_history.h +++ b/net/dccp/ccids/lib/packet_history.h @@ -134,6 +134,7 @@ static inline struct dccp_tx_hist_entry * static inline struct dccp_rx_hist_entry * dccp_rx_hist_entry_new(struct dccp_rx_hist *hist, + const struct sock *sk, const u32 ndp, const struct sk_buff *skb, const unsigned int __nocast prio) @@ -148,7 +149,7 @@ static inline struct dccp_rx_hist_entry * entry->dccphrx_ccval = dh->dccph_ccval; entry->dccphrx_type = dh->dccph_type; entry->dccphrx_ndp = ndp; - do_gettimeofday(&(entry->dccphrx_tstamp)); + dccp_timestamp(sk, &entry->dccphrx_tstamp); } return entry; diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 33456c0d593..95c4630b3b1 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -426,10 +426,13 @@ extern struct dccp_ackpkts * dccp_ackpkts_alloc(unsigned int len, const unsigned int __nocast priority); extern void dccp_ackpkts_free(struct dccp_ackpkts *ap); -extern int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state); +extern int dccp_ackpkts_add(struct dccp_ackpkts *ap, const struct sock *sk, + u64 ackno, u8 state); extern void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, struct sock *sk, u64 ackno); +extern void dccp_timestamp(const struct sock *sk, struct timeval *tv); + static inline suseconds_t timeval_usecs(const struct timeval *tv) { return tv->tv_sec * USEC_PER_SEC + tv->tv_usec; @@ -468,17 +471,6 @@ static inline void timeval_sub_usecs(struct timeval *tv, } } -/* - * Returns the difference in usecs between timeval - * passed in and current time - */ -static inline suseconds_t timeval_now_delta(const struct timeval *tv) -{ - struct timeval now; - do_gettimeofday(&now); - return timeval_delta(&now, tv); -} - #ifdef CONFIG_IP_DCCP_DEBUG extern void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len); diff --git a/net/dccp/input.c b/net/dccp/input.c index ef29cef1daf..c60bc3433f5 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -170,7 +170,7 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, if (dp->dccps_options.dccpo_send_ack_vector) { struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; - if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, + if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_ACKPKTS_STATE_RECEIVED)) { LIMIT_NETDEBUG(KERN_WARNING "DCCP: acknowledgeable " @@ -498,7 +498,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, * DCCP_ACKPKTS_STATE_ECN_MARKED */ if (dp->dccps_options.dccpo_send_ack_vector) { - if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, + if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_ACKPKTS_STATE_RECEIVED)) goto discard; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 3fc75dbee4b..fee9a8c3777 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -1243,6 +1243,7 @@ static int dccp_v4_init_sock(struct sock *sk) static int dccp_ctl_socket_init = 1; dccp_options_init(&dp->dccps_options); + do_gettimeofday(&dp->dccps_epoch); if (dp->dccps_options.dccpo_send_ack_vector) { dp->dccps_hc_rx_ackpkts = diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index ce5dff4ac22..18461bc04cb 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -96,6 +96,7 @@ struct sock *dccp_create_openreq_child(struct sock *sk, newdp->dccps_hc_rx_ackpkts = NULL; newdp->dccps_role = DCCP_ROLE_SERVER; newicsk->icsk_rto = DCCP_TIMEOUT_INIT; + do_gettimeofday(&newdp->dccps_epoch); if (newdp->dccps_options.dccpo_send_ack_vector) { newdp->dccps_hc_rx_ackpkts = diff --git a/net/dccp/options.c b/net/dccp/options.c index 34b230a0087..d4c4242d8dd 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -140,7 +140,7 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) opt_recv->dccpor_timestamp = ntohl(*(u32 *)value); dp->dccps_timestamp_echo = opt_recv->dccpor_timestamp; - do_gettimeofday(&dp->dccps_timestamp_time); + dccp_timestamp(sk, &dp->dccps_timestamp_time); dccp_pr_debug("%sTIMESTAMP=%u, ackno=%llu\n", debug_prefix, opt_recv->dccpor_timestamp, @@ -361,9 +361,13 @@ static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) #endif struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; int len = ap->dccpap_buf_vector_len + 2; - const u32 elapsed_time = timeval_now_delta(&ap->dccpap_time) / 10; + struct timeval now; + u32 elapsed_time; unsigned char *to, *from; + dccp_timestamp(sk, &now); + elapsed_time = timeval_delta(&now, &ap->dccpap_time) / 10; + if (elapsed_time != 0) dccp_insert_option_elapsed_time(sk, skb, elapsed_time); @@ -428,13 +432,29 @@ static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) (unsigned long long) ap->dccpap_ack_ackno); } +void dccp_timestamp(const struct sock *sk, struct timeval *tv) +{ + const struct dccp_sock *dp = dccp_sk(sk); + + do_gettimeofday(tv); + tv->tv_sec -= dp->dccps_epoch.tv_sec; + tv->tv_usec -= dp->dccps_epoch.tv_usec; + + while (tv->tv_usec < 0) { + tv->tv_sec--; + tv->tv_usec += USEC_PER_SEC; + } +} + +EXPORT_SYMBOL_GPL(dccp_timestamp); + void dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb) { struct timeval tv; u32 now; - do_gettimeofday(&tv); - now = (tv.tv_sec * USEC_PER_SEC + tv.tv_usec) / 10; + dccp_timestamp(sk, &tv); + now = timeval_usecs(&tv) / 10; /* yes this will overflow but that is the point as we want a * 10 usec 32 bit timer which mean it wraps every 11.9 hours */ @@ -452,13 +472,17 @@ static void dccp_insert_option_timestamp_echo(struct sock *sk, const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT TX opt: " : "server TX opt: "; #endif + struct timeval now; u32 tstamp_echo; - const u32 elapsed_time = - timeval_now_delta(&dp->dccps_timestamp_time) / 10; - const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time); - const int len = 6 + elapsed_time_len; + u32 elapsed_time; + int len, elapsed_time_len; unsigned char *to; + dccp_timestamp(sk, &now); + elapsed_time = timeval_delta(&now, &dp->dccps_timestamp_time) / 10; + elapsed_time_len = dccp_elapsed_time_len(elapsed_time); + len = 6 + elapsed_time_len; + if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to insert " "timestamp echo!\n"); @@ -623,7 +647,8 @@ static inline int dccp_ackpkts_set_buf_head_state(struct dccp_ackpkts *ap, /* * Implements the draft-ietf-dccp-spec-11.txt Appendix A */ -int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state) +int dccp_ackpkts_add(struct dccp_ackpkts *ap, const struct sock *sk, + u64 ackno, u8 state) { /* * Check at the right places if the buffer is full, if it is, tell the @@ -704,7 +729,7 @@ int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state) } ap->dccpap_buf_ackno = ackno; - do_gettimeofday(&ap->dccpap_time); + dccp_timestamp(sk, &ap->dccpap_time); out: dccp_pr_debug(""); dccp_ackpkts_print(ap); From 59725dc2a2e86a03bbf97976ede3bdc6f95bba92 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 9 Sep 2005 02:40:58 -0300 Subject: [PATCH 09/10] [CCID3] Introduce ccid3_hc_[rt]x_sk() for overal consistency Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ccids/ccid3.c | 70 +++++++++++++++++------------------------- net/dccp/ccids/ccid3.h | 12 +++++--- 2 files changed, 37 insertions(+), 45 deletions(-) diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 348e6fb262c..ea30012dd19 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -112,8 +112,7 @@ static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state) static inline void ccid3_hc_tx_set_state(struct sock *sk, enum ccid3_hc_tx_states state) { - struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); enum ccid3_hc_tx_states oldstate = hctx->ccid3hctx_state; ccid3_pr_debug("%s(%p) %-8.8s -> %s\n", @@ -154,8 +153,7 @@ static inline void ccid3_calc_new_delta(struct ccid3_hc_tx_sock *hctx) */ static void ccid3_hc_tx_update_x(struct sock *sk) { - struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); /* To avoid large error in calcX */ if (hctx->ccid3hctx_p >= TFRC_SMALLEST_P) { @@ -184,9 +182,8 @@ static void ccid3_hc_tx_update_x(struct sock *sk) static void ccid3_hc_tx_no_feedback_timer(unsigned long data) { struct sock *sk = (struct sock *)data; - struct dccp_sock *dp = dccp_sk(sk); unsigned long next_tmout = 0; - struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); bh_lock_sock(sk); if (sock_owned_by_user(sk)) { @@ -284,7 +281,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb, int len) { struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); struct dccp_tx_hist_entry *new_packet; struct timeval now; long delay; @@ -370,8 +367,8 @@ out: static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) { - struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + const struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); struct timeval now; BUG_ON(hctx == NULL); @@ -457,8 +454,8 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) { - struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + const struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); struct ccid3_options_received *opt_recv; struct dccp_tx_hist_entry *packet; struct timeval now; @@ -609,8 +606,7 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) static void ccid3_hc_tx_insert_options(struct sock *sk, struct sk_buff *skb) { - const struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); if (hctx == NULL || !(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) @@ -624,8 +620,8 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, unsigned char *value) { int rc = 0; - struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + const struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); struct ccid3_options_received *opt_recv; if (hctx == NULL) @@ -688,11 +684,11 @@ static int ccid3_hc_tx_init(struct sock *sk) ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); - hctx = dp->dccps_hc_tx_ccid_private = kmalloc(sizeof(*hctx), - gfp_any()); - if (hctx == NULL) + dp->dccps_hc_tx_ccid_private = kmalloc(sizeof(*hctx), gfp_any()); + if (dp->dccps_hc_tx_ccid_private == NULL) return -ENOMEM; + hctx = ccid3_hc_tx_sk(sk); memset(hctx, 0, sizeof(*hctx)); if (dp->dccps_packet_size >= TFRC_MIN_PACKET_SIZE && @@ -714,7 +710,7 @@ static int ccid3_hc_tx_init(struct sock *sk) static void ccid3_hc_tx_exit(struct sock *sk) { struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); BUG_ON(hctx == NULL); @@ -756,8 +752,7 @@ static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state) static inline void ccid3_hc_rx_set_state(struct sock *sk, enum ccid3_hc_rx_states state) { - struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); enum ccid3_hc_rx_states oldstate = hcrx->ccid3hcrx_state; ccid3_pr_debug("%s(%p) %-8.8s -> %s\n", @@ -769,8 +764,8 @@ static inline void ccid3_hc_rx_set_state(struct sock *sk, static void ccid3_hc_rx_send_feedback(struct sock *sk) { + struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; struct dccp_rx_hist_entry *packet; struct timeval now; @@ -822,9 +817,8 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) static void ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) { - const struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); u32 x_recv, pinv; - struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; if (hcrx == NULL || !(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) @@ -853,8 +847,7 @@ static void ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) { - struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); struct dccp_rx_hist_entry *entry, *next, *tail = NULL; u32 rtt, delta, x_recv, fval, p, tmp2; struct timeval tstamp = { 0, }; @@ -926,8 +919,7 @@ found: static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss) { - struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); if (seq_loss != DCCP_MAX_SEQNO + 1 && list_empty(&hcrx->ccid3hcrx_li_hist)) { @@ -945,8 +937,7 @@ static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss) static void ccid3_hc_rx_detect_loss(struct sock *sk) { - struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); u8 win_loss; const u64 seq_loss = dccp_rx_hist_detect_loss(&hcrx->ccid3hcrx_hist, &hcrx->ccid3hcrx_li_hist, @@ -957,8 +948,7 @@ static void ccid3_hc_rx_detect_loss(struct sock *sk) static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) { - struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); const struct dccp_options_received *opt_recv; struct dccp_rx_hist_entry *packet; struct timeval now; @@ -972,7 +962,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) BUG_ON(!(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA || hcrx->ccid3hcrx_state == TFRC_RSTATE_DATA)); - opt_recv = &dp->dccps_options_received; + opt_recv = &dccp_sk(sk)->dccps_options_received; switch (DCCP_SKB_CB(skb)->dccpd_type) { case DCCP_PKT_ACK: @@ -1085,11 +1075,11 @@ static int ccid3_hc_rx_init(struct sock *sk) ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); - hcrx = dp->dccps_hc_rx_ccid_private = kmalloc(sizeof(*hcrx), - gfp_any()); - if (hcrx == NULL) + dp->dccps_hc_rx_ccid_private = kmalloc(sizeof(*hcrx), gfp_any()); + if (dp->dccps_hc_rx_ccid_private == NULL) return -ENOMEM; + hcrx = ccid3_hc_rx_sk(sk); memset(hcrx, 0, sizeof(*hcrx)); if (dp->dccps_packet_size >= TFRC_MIN_PACKET_SIZE && @@ -1109,8 +1099,8 @@ static int ccid3_hc_rx_init(struct sock *sk) static void ccid3_hc_rx_exit(struct sock *sk) { + struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); @@ -1131,8 +1121,7 @@ static void ccid3_hc_rx_exit(struct sock *sk) static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info) { - const struct dccp_sock *dp = dccp_sk(sk); - const struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); if (hcrx == NULL) return; @@ -1144,8 +1133,7 @@ static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info) static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info) { - const struct dccp_sock *dp = dccp_sk(sk); - const struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); if (hctx == NULL) return; diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index 58be6125b69..d16f00d784f 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h @@ -128,10 +128,14 @@ struct ccid3_hc_rx_sock { u32 ccid3hcrx_x_recv; }; -#define ccid3_hc_tx_field(s,field) (s->dccps_hc_tx_ccid_private == NULL ? 0 : \ - ((struct ccid3_hc_tx_sock *)s->dccps_hc_tx_ccid_private)->ccid3hctx_##field) +static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk) +{ + return dccp_sk(sk)->dccps_hc_tx_ccid_private; +} -#define ccid3_hc_rx_field(s,field) (s->dccps_hc_rx_ccid_private == NULL ? 0 : \ - ((struct ccid3_hc_rx_sock *)s->dccps_hc_rx_ccid_private)->ccid3hcrx_##field) +static inline struct ccid3_hc_rx_sock *ccid3_hc_rx_sk(const struct sock *sk) +{ + return dccp_sk(sk)->dccps_hc_rx_ccid_private; +} #endif /* _DCCP_CCID3_H_ */ From cb7b593c2c808b32a1ea188599713c434b95f849 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 9 Sep 2005 13:35:42 -0700 Subject: [PATCH 10/10] [IPV4] fib_trie: fix proc interface Create one iterator for walking over FIB trie, and use it for all the /proc functions. Add a /proc/net/route output for backwards compatibility with old applications. Make initialization of fib_trie same as fib_hash so no #ifdef is needed in af_inet.c Fixes: http://bugzilla.kernel.org/show_bug.cgi?id=5209 Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 13 - net/ipv4/fib_trie.c | 830 +++++++++++++++++++++----------------------- 2 files changed, 398 insertions(+), 445 deletions(-) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index bf147f8db39..a9d84f93442 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1248,11 +1248,6 @@ module_init(inet_init); /* ------------------------------------------------------------------------ */ #ifdef CONFIG_PROC_FS -#ifdef CONFIG_IP_FIB_TRIE -extern int fib_stat_proc_init(void); -extern void fib_stat_proc_exit(void); -#endif - static int __init ipv4_proc_init(void) { int rc = 0; @@ -1265,19 +1260,11 @@ static int __init ipv4_proc_init(void) goto out_udp; if (fib_proc_init()) goto out_fib; -#ifdef CONFIG_IP_FIB_TRIE - if (fib_stat_proc_init()) - goto out_fib_stat; -#endif if (ip_misc_proc_init()) goto out_misc; out: return rc; out_misc: -#ifdef CONFIG_IP_FIB_TRIE - fib_stat_proc_exit(); -out_fib_stat: -#endif fib_proc_exit(); out_fib: udp4_proc_exit(); diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index b2dea4e5da7..1b63b482416 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -43,7 +43,7 @@ * 2 of the License, or (at your option) any later version. */ -#define VERSION "0.402" +#define VERSION "0.403" #include #include @@ -164,7 +164,6 @@ static struct node *resize(struct trie *t, struct tnode *tn); static struct tnode *inflate(struct trie *t, struct tnode *tn); static struct tnode *halve(struct trie *t, struct tnode *tn); static void tnode_free(struct tnode *tn); -static void trie_dump_seq(struct seq_file *seq, struct trie *t); static kmem_cache_t *fn_alias_kmem __read_mostly; static struct trie *trie_local = NULL, *trie_main = NULL; @@ -1971,378 +1970,137 @@ struct fib_table * __init fib_hash_init(int id) return tb; } -/* Trie dump functions */ - -static void putspace_seq(struct seq_file *seq, int n) -{ - while (n--) - seq_printf(seq, " "); -} - -static void printbin_seq(struct seq_file *seq, unsigned int v, int bits) -{ - while (bits--) - seq_printf(seq, "%s", (v & (1<: "); - seq_printf(seq, "%s:%p ", IS_LEAF(n)?"Leaf":"Internal node", n); - - if (IS_LEAF(n)) { - struct leaf *l = (struct leaf *)n; - struct fib_alias *fa; - int i; - - seq_printf(seq, "key=%d.%d.%d.%d\n", - n->key >> 24, (n->key >> 16) % 256, (n->key >> 8) % 256, n->key % 256); - - for (i = 32; i >= 0; i--) - if (find_leaf_info(&l->list, i)) { - struct list_head *fa_head = get_fa_head(l, i); - - if (!fa_head) - continue; - - if (list_empty(fa_head)) - continue; - - putspace_seq(seq, indent+2); - seq_printf(seq, "{/%d...dumping}\n", i); - - list_for_each_entry_rcu(fa, fa_head, fa_list) { - putspace_seq(seq, indent+2); - if (fa->fa_info == NULL) { - seq_printf(seq, "Error fa_info=NULL\n"); - continue; - } - if (fa->fa_info->fib_nh == NULL) { - seq_printf(seq, "Error _fib_nh=NULL\n"); - continue; - } - - seq_printf(seq, "{type=%d scope=%d TOS=%d}\n", - fa->fa_type, - fa->fa_scope, - fa->fa_tos); - } - } - } else { - struct tnode *tn = (struct tnode *)n; - int plen = ((struct tnode *)n)->pos; - t_key prf = MASK_PFX(n->key, plen); - - seq_printf(seq, "key=%d.%d.%d.%d/%d\n", - prf >> 24, (prf >> 16) % 256, (prf >> 8) % 256, prf % 256, plen); - - putspace_seq(seq, indent); seq_printf(seq, "| "); - seq_printf(seq, "{key prefix=%08x/", tn->key & TKEY_GET_MASK(0, tn->pos)); - printbin_seq(seq, tkey_extract_bits(tn->key, 0, tn->pos), tn->pos); - seq_printf(seq, "}\n"); - putspace_seq(seq, indent); seq_printf(seq, "| "); - seq_printf(seq, "{pos=%d", tn->pos); - seq_printf(seq, " (skip=%d bits)", tn->pos - pend); - seq_printf(seq, " bits=%d (%u children)}\n", tn->bits, (1 << tn->bits)); - putspace_seq(seq, indent); seq_printf(seq, "| "); - seq_printf(seq, "{empty=%d full=%d}\n", tn->empty_children, tn->full_children); - } -} - -static void trie_dump_seq(struct seq_file *seq, struct trie *t) -{ - struct node *n; - int cindex = 0; - int indent = 1; - int pend = 0; - int depth = 0; - struct tnode *tn; - - rcu_read_lock(); - n = rcu_dereference(t->trie); - seq_printf(seq, "------ trie_dump of t=%p ------\n", t); - - if (!n) { - seq_printf(seq, "------ trie is empty\n"); - - rcu_read_unlock(); - return; - } - - printnode_seq(seq, indent, n, pend, cindex, 0); - - if (!IS_TNODE(n)) { - rcu_read_unlock(); - return; - } - - tn = (struct tnode *)n; - pend = tn->pos+tn->bits; - putspace_seq(seq, indent); seq_printf(seq, "\\--\n"); - indent += 3; - depth++; - - while (tn && cindex < (1 << tn->bits)) { - struct node *child = rcu_dereference(tn->child[cindex]); - if (!child) - cindex++; - else { - /* Got a child */ - printnode_seq(seq, indent, child, pend, - cindex, tn->bits); - - if (IS_LEAF(child)) - cindex++; - - else { - /* - * New tnode. Decend one level - */ - - depth++; - n = child; - tn = (struct tnode *)n; - pend = tn->pos+tn->bits; - putspace_seq(seq, indent); - seq_printf(seq, "\\--\n"); - indent += 3; - cindex = 0; - } - } - - /* - * Test if we are done - */ - - while (cindex >= (1 << tn->bits)) { - /* - * Move upwards and test for root - * pop off all traversed nodes - */ - - if (NODE_PARENT(tn) == NULL) { - tn = NULL; - break; - } - - cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits); - cindex++; - tn = NODE_PARENT(tn); - pend = tn->pos + tn->bits; - indent -= 3; - depth--; - } - } - rcu_read_unlock(); -} - -static struct trie_stat *trie_stat_new(void) -{ - struct trie_stat *s; - int i; - - s = kmalloc(sizeof(struct trie_stat), GFP_KERNEL); - if (!s) - return NULL; - - s->totdepth = 0; - s->maxdepth = 0; - s->tnodes = 0; - s->leaves = 0; - s->nullpointers = 0; - - for (i = 0; i < MAX_CHILDS; i++) - s->nodesizes[i] = 0; - - return s; -} - -static struct trie_stat *trie_collect_stats(struct trie *t) -{ - struct node *n; - struct trie_stat *s = trie_stat_new(); - int cindex = 0; - int pend = 0; - int depth = 0; - - if (!s) - return NULL; - - rcu_read_lock(); - n = rcu_dereference(t->trie); - - if (!n) - return s; - - if (IS_TNODE(n)) { - struct tnode *tn = (struct tnode *)n; - pend = tn->pos+tn->bits; - s->nodesizes[tn->bits]++; - depth++; - - while (tn && cindex < (1 << tn->bits)) { - struct node *ch = rcu_dereference(tn->child[cindex]); - if (ch) { - - /* Got a child */ - - if (IS_LEAF(tn->child[cindex])) { - cindex++; - - /* stats */ - if (depth > s->maxdepth) - s->maxdepth = depth; - s->totdepth += depth; - s->leaves++; - } else { - /* - * New tnode. Decend one level - */ - - s->tnodes++; - s->nodesizes[tn->bits]++; - depth++; - - n = ch; - tn = (struct tnode *)n; - pend = tn->pos+tn->bits; - - cindex = 0; - } - } else { - cindex++; - s->nullpointers++; - } - - /* - * Test if we are done - */ - - while (cindex >= (1 << tn->bits)) { - /* - * Move upwards and test for root - * pop off all traversed nodes - */ - - if (NODE_PARENT(tn) == NULL) { - tn = NULL; - n = NULL; - break; - } - - cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits); - tn = NODE_PARENT(tn); - cindex++; - n = (struct node *)tn; - pend = tn->pos+tn->bits; - depth--; - } - } - } - - rcu_read_unlock(); - return s; -} - #ifdef CONFIG_PROC_FS +/* Depth first Trie walk iterator */ +struct fib_trie_iter { + struct tnode *tnode; + struct trie *trie; + unsigned index; + unsigned depth; +}; -static struct fib_alias *fib_triestat_get_first(struct seq_file *seq) +static struct node *fib_trie_get_next(struct fib_trie_iter *iter) { + struct tnode *tn = iter->tnode; + unsigned cindex = iter->index; + struct tnode *p; + + pr_debug("get_next iter={node=%p index=%d depth=%d}\n", + iter->tnode, iter->index, iter->depth); +rescan: + while (cindex < (1<bits)) { + struct node *n = tnode_get_child(tn, cindex); + + if (n) { + if (IS_LEAF(n)) { + iter->tnode = tn; + iter->index = cindex + 1; + } else { + /* push down one level */ + iter->tnode = (struct tnode *) n; + iter->index = 0; + ++iter->depth; + } + return n; + } + + ++cindex; + } + + /* Current node exhausted, pop back up */ + p = NODE_PARENT(tn); + if (p) { + cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1; + tn = p; + --iter->depth; + goto rescan; + } + + /* got root? */ return NULL; } -static struct fib_alias *fib_triestat_get_next(struct seq_file *seq) +static struct node *fib_trie_get_first(struct fib_trie_iter *iter, + struct trie *t) { + struct node *n = rcu_dereference(t->trie); + + if (n && IS_TNODE(n)) { + iter->tnode = (struct tnode *) n; + iter->trie = t; + iter->index = 0; + iter->depth = 0; + return n; + } return NULL; } -static void *fib_triestat_seq_start(struct seq_file *seq, loff_t *pos) +static void trie_collect_stats(struct trie *t, struct trie_stat *s) { - if (!ip_fib_main_table) - return NULL; + struct node *n; + struct fib_trie_iter iter; - if (*pos) - return fib_triestat_get_next(seq); - else - return SEQ_START_TOKEN; -} + memset(s, 0, sizeof(*s)); -static void *fib_triestat_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - ++*pos; - if (v == SEQ_START_TOKEN) - return fib_triestat_get_first(seq); - else - return fib_triestat_get_next(seq); -} - -static void fib_triestat_seq_stop(struct seq_file *seq, void *v) -{ + rcu_read_lock(); + for (n = fib_trie_get_first(&iter, t); n; + n = fib_trie_get_next(&iter)) { + if (IS_LEAF(n)) { + s->leaves++; + s->totdepth += iter.depth; + if (iter.depth > s->maxdepth) + s->maxdepth = iter.depth; + } else { + const struct tnode *tn = (const struct tnode *) n; + int i; + s->tnodes++; + s->nodesizes[tn->bits]++; + for (i = 0; i < (1<bits); i++) + if (!tn->child[i]) + s->nullpointers++; + } + } + rcu_read_unlock(); } /* * This outputs /proc/net/fib_triestats - * - * It always works in backward compatibility mode. - * The format of the file is not supposed to be changed. */ - -static void collect_and_show(struct trie *t, struct seq_file *seq) +static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat) { - int bytes = 0; /* How many bytes are used, a ref is 4 bytes */ - int i, max, pointers; - struct trie_stat *stat; - int avdepth; + unsigned i, max, pointers, bytes, avdepth; - stat = trie_collect_stats(t); + if (stat->leaves) + avdepth = stat->totdepth*100 / stat->leaves; + else + avdepth = 0; - bytes = 0; - seq_printf(seq, "trie=%p\n", t); + seq_printf(seq, "\tAver depth: %d.%02d\n", avdepth / 100, avdepth % 100 ); + seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth); - if (stat) { - if (stat->leaves) - avdepth = stat->totdepth*100 / stat->leaves; - else - avdepth = 0; - seq_printf(seq, "Aver depth: %d.%02d\n", avdepth / 100, avdepth % 100); - seq_printf(seq, "Max depth: %4d\n", stat->maxdepth); + seq_printf(seq, "\tLeaves: %u\n", stat->leaves); - seq_printf(seq, "Leaves: %d\n", stat->leaves); - bytes += sizeof(struct leaf) * stat->leaves; - seq_printf(seq, "Internal nodes: %d\n", stat->tnodes); - bytes += sizeof(struct tnode) * stat->tnodes; + bytes = sizeof(struct leaf) * stat->leaves; + seq_printf(seq, "\tInternal nodes: %d\n\t", stat->tnodes); + bytes += sizeof(struct tnode) * stat->tnodes; - max = MAX_CHILDS-1; + max = MAX_CHILDS-1; + while (max >= 0 && stat->nodesizes[max] == 0) + max--; - while (max >= 0 && stat->nodesizes[max] == 0) - max--; - pointers = 0; + pointers = 0; + for (i = 1; i <= max; i++) + if (stat->nodesizes[i] != 0) { + seq_printf(seq, " %d: %d", i, stat->nodesizes[i]); + pointers += (1<nodesizes[i]; + } + seq_putc(seq, '\n'); + seq_printf(seq, "\tPointers: %d\n", pointers); - for (i = 1; i <= max; i++) - if (stat->nodesizes[i] != 0) { - seq_printf(seq, " %d: %d", i, stat->nodesizes[i]); - pointers += (1<nodesizes[i]; - } - seq_printf(seq, "\n"); - seq_printf(seq, "Pointers: %d\n", pointers); - bytes += sizeof(struct node *) * pointers; - seq_printf(seq, "Null ptrs: %d\n", stat->nullpointers); - seq_printf(seq, "Total size: %d kB\n", bytes / 1024); - - kfree(stat); - } + bytes += sizeof(struct node *) * pointers; + seq_printf(seq, "Null ptrs: %d\n", stat->nullpointers); + seq_printf(seq, "Total size: %d kB\n", (bytes + 1023) / 1024); #ifdef CONFIG_IP_FIB_TRIE_STATS seq_printf(seq, "Counters:\n---------\n"); @@ -2360,169 +2118,377 @@ static void collect_and_show(struct trie *t, struct seq_file *seq) static int fib_triestat_seq_show(struct seq_file *seq, void *v) { - char bf[128]; + struct trie_stat *stat; - if (v == SEQ_START_TOKEN) { - seq_printf(seq, "Basic info: size of leaf: %Zd bytes, size of tnode: %Zd bytes.\n", - sizeof(struct leaf), sizeof(struct tnode)); - if (trie_local) - collect_and_show(trie_local, seq); + stat = kmalloc(sizeof(*stat), GFP_KERNEL); + if (!stat) + return -ENOMEM; - if (trie_main) - collect_and_show(trie_main, seq); - } else { - snprintf(bf, sizeof(bf), "*\t%08X\t%08X", 200, 400); + seq_printf(seq, "Basic info: size of leaf: %Zd bytes, size of tnode: %Zd bytes.\n", + sizeof(struct leaf), sizeof(struct tnode)); - seq_printf(seq, "%-127s\n", bf); + if (trie_local) { + seq_printf(seq, "Local:\n"); + trie_collect_stats(trie_local, stat); + trie_show_stats(seq, stat); } + + if (trie_main) { + seq_printf(seq, "Main:\n"); + trie_collect_stats(trie_main, stat); + trie_show_stats(seq, stat); + } + kfree(stat); + return 0; } -static struct seq_operations fib_triestat_seq_ops = { - .start = fib_triestat_seq_start, - .next = fib_triestat_seq_next, - .stop = fib_triestat_seq_stop, - .show = fib_triestat_seq_show, -}; - static int fib_triestat_seq_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int rc = -ENOMEM; - - rc = seq_open(file, &fib_triestat_seq_ops); - if (rc) - goto out_kfree; - - seq = file->private_data; -out: - return rc; -out_kfree: - goto out; + return single_open(file, fib_triestat_seq_show, NULL); } -static struct file_operations fib_triestat_seq_fops = { +static struct file_operations fib_triestat_fops = { .owner = THIS_MODULE, .open = fib_triestat_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = single_release, }; -int __init fib_stat_proc_init(void) +static struct node *fib_trie_get_idx(struct fib_trie_iter *iter, + loff_t pos) { - if (!proc_net_fops_create("fib_triestat", S_IRUGO, &fib_triestat_seq_fops)) - return -ENOMEM; - return 0; -} + loff_t idx = 0; + struct node *n; -void __init fib_stat_proc_exit(void) -{ - proc_net_remove("fib_triestat"); -} + for (n = fib_trie_get_first(iter, trie_local); + n; ++idx, n = fib_trie_get_next(iter)) { + if (pos == idx) + return n; + } -static struct fib_alias *fib_trie_get_first(struct seq_file *seq) -{ - return NULL; -} - -static struct fib_alias *fib_trie_get_next(struct seq_file *seq) -{ + for (n = fib_trie_get_first(iter, trie_main); + n; ++idx, n = fib_trie_get_next(iter)) { + if (pos == idx) + return n; + } return NULL; } static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos) { - if (!ip_fib_main_table) - return NULL; - - if (*pos) - return fib_trie_get_next(seq); - else + rcu_read_lock(); + if (*pos == 0) return SEQ_START_TOKEN; + return fib_trie_get_idx(seq->private, *pos - 1); } static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) { + struct fib_trie_iter *iter = seq->private; + void *l = v; + ++*pos; if (v == SEQ_START_TOKEN) - return fib_trie_get_first(seq); - else - return fib_trie_get_next(seq); + return fib_trie_get_idx(iter, 0); + v = fib_trie_get_next(iter); + BUG_ON(v == l); + if (v) + return v; + + /* continue scan in next trie */ + if (iter->trie == trie_local) + return fib_trie_get_first(iter, trie_main); + + return NULL; } static void fib_trie_seq_stop(struct seq_file *seq, void *v) { + rcu_read_unlock(); } -/* - * This outputs /proc/net/fib_trie. - * - * It always works in backward compatibility mode. - * The format of the file is not supposed to be changed. - */ +static void seq_indent(struct seq_file *seq, int n) +{ + while (n-- > 0) seq_puts(seq, " "); +} +static inline const char *rtn_scope(enum rt_scope_t s) +{ + static char buf[32]; + + switch(s) { + case RT_SCOPE_UNIVERSE: return "universe"; + case RT_SCOPE_SITE: return "site"; + case RT_SCOPE_LINK: return "link"; + case RT_SCOPE_HOST: return "host"; + case RT_SCOPE_NOWHERE: return "nowhere"; + default: + snprintf(buf, sizeof(buf), "scope=%d", s); + return buf; + } +} + +static const char *rtn_type_names[__RTN_MAX] = { + [RTN_UNSPEC] = "UNSPEC", + [RTN_UNICAST] = "UNICAST", + [RTN_LOCAL] = "LOCAL", + [RTN_BROADCAST] = "BROADCAST", + [RTN_ANYCAST] = "ANYCAST", + [RTN_MULTICAST] = "MULTICAST", + [RTN_BLACKHOLE] = "BLACKHOLE", + [RTN_UNREACHABLE] = "UNREACHABLE", + [RTN_PROHIBIT] = "PROHIBIT", + [RTN_THROW] = "THROW", + [RTN_NAT] = "NAT", + [RTN_XRESOLVE] = "XRESOLVE", +}; + +static inline const char *rtn_type(unsigned t) +{ + static char buf[32]; + + if (t < __RTN_MAX && rtn_type_names[t]) + return rtn_type_names[t]; + snprintf(buf, sizeof(buf), "type %d", t); + return buf; +} + +/* Pretty print the trie */ static int fib_trie_seq_show(struct seq_file *seq, void *v) { - char bf[128]; + const struct fib_trie_iter *iter = seq->private; + struct node *n = v; - if (v == SEQ_START_TOKEN) { - if (trie_local) - trie_dump_seq(seq, trie_local); + if (v == SEQ_START_TOKEN) + return 0; - if (trie_main) - trie_dump_seq(seq, trie_main); + if (IS_TNODE(n)) { + struct tnode *tn = (struct tnode *) n; + t_key prf = ntohl(MASK_PFX(tn->key, tn->pos)); + + if (!NODE_PARENT(n)) { + if (iter->trie == trie_local) + seq_puts(seq, ":\n"); + else + seq_puts(seq, "
:\n"); + } else { + seq_indent(seq, iter->depth-1); + seq_printf(seq, " +-- %d.%d.%d.%d/%d\n", + NIPQUAD(prf), tn->pos); + } } else { - snprintf(bf, sizeof(bf), - "*\t%08X\t%08X", 200, 400); - seq_printf(seq, "%-127s\n", bf); + struct leaf *l = (struct leaf *) n; + int i; + u32 val = ntohl(l->key); + + seq_indent(seq, iter->depth); + seq_printf(seq, " |-- %d.%d.%d.%d\n", NIPQUAD(val)); + for (i = 32; i >= 0; i--) { + struct leaf_info *li = find_leaf_info(&l->list, i); + if (li) { + struct fib_alias *fa; + list_for_each_entry_rcu(fa, &li->falh, fa_list) { + seq_indent(seq, iter->depth+1); + seq_printf(seq, " /%d %s %s", i, + rtn_scope(fa->fa_scope), + rtn_type(fa->fa_type)); + if (fa->fa_tos) + seq_printf(seq, "tos =%d\n", + fa->fa_tos); + seq_putc(seq, '\n'); + } + } + } } return 0; } static struct seq_operations fib_trie_seq_ops = { - .start = fib_trie_seq_start, - .next = fib_trie_seq_next, - .stop = fib_trie_seq_stop, - .show = fib_trie_seq_show, + .start = fib_trie_seq_start, + .next = fib_trie_seq_next, + .stop = fib_trie_seq_stop, + .show = fib_trie_seq_show, }; static int fib_trie_seq_open(struct inode *inode, struct file *file) { struct seq_file *seq; int rc = -ENOMEM; + struct fib_trie_iter *s = kmalloc(sizeof(*s), GFP_KERNEL); + + if (!s) + goto out; rc = seq_open(file, &fib_trie_seq_ops); if (rc) goto out_kfree; - seq = file->private_data; + seq = file->private_data; + seq->private = s; + memset(s, 0, sizeof(*s)); out: return rc; out_kfree: + kfree(s); goto out; } -static struct file_operations fib_trie_seq_fops = { - .owner = THIS_MODULE, - .open = fib_trie_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release= seq_release_private, +static struct file_operations fib_trie_fops = { + .owner = THIS_MODULE, + .open = fib_trie_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; + +static unsigned fib_flag_trans(int type, u32 mask, const struct fib_info *fi) +{ + static unsigned type2flags[RTN_MAX + 1] = { + [7] = RTF_REJECT, [8] = RTF_REJECT, + }; + unsigned flags = type2flags[type]; + + if (fi && fi->fib_nh->nh_gw) + flags |= RTF_GATEWAY; + if (mask == 0xFFFFFFFF) + flags |= RTF_HOST; + flags |= RTF_UP; + return flags; +} + +/* + * This outputs /proc/net/route. + * The format of the file is not supposed to be changed + * and needs to be same as fib_hash output to avoid breaking + * legacy utilities + */ +static int fib_route_seq_show(struct seq_file *seq, void *v) +{ + struct leaf *l = v; + int i; + char bf[128]; + + if (v == SEQ_START_TOKEN) { + seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway " + "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU" + "\tWindow\tIRTT"); + return 0; + } + + if (IS_TNODE(l)) + return 0; + + for (i=32; i>=0; i--) { + struct leaf_info *li = find_leaf_info(&l->list, i); + struct fib_alias *fa; + u32 mask, prefix; + + if (!li) + continue; + + mask = inet_make_mask(li->plen); + prefix = htonl(l->key); + + list_for_each_entry_rcu(fa, &li->falh, fa_list) { + const struct fib_info *fi = rcu_dereference(fa->fa_info); + unsigned flags = fib_flag_trans(fa->fa_type, mask, fi); + + if (fa->fa_type == RTN_BROADCAST + || fa->fa_type == RTN_MULTICAST) + continue; + + if (fi) + snprintf(bf, sizeof(bf), + "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u", + fi->fib_dev ? fi->fib_dev->name : "*", + prefix, + fi->fib_nh->nh_gw, flags, 0, 0, + fi->fib_priority, + mask, + (fi->fib_advmss ? fi->fib_advmss + 40 : 0), + fi->fib_window, + fi->fib_rtt >> 3); + else + snprintf(bf, sizeof(bf), + "*\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u", + prefix, 0, flags, 0, 0, 0, + mask, 0, 0, 0); + + seq_printf(seq, "%-127s\n", bf); + } + } + + return 0; +} + +static struct seq_operations fib_route_seq_ops = { + .start = fib_trie_seq_start, + .next = fib_trie_seq_next, + .stop = fib_trie_seq_stop, + .show = fib_route_seq_show, +}; + +static int fib_route_seq_open(struct inode *inode, struct file *file) +{ + struct seq_file *seq; + int rc = -ENOMEM; + struct fib_trie_iter *s = kmalloc(sizeof(*s), GFP_KERNEL); + + if (!s) + goto out; + + rc = seq_open(file, &fib_route_seq_ops); + if (rc) + goto out_kfree; + + seq = file->private_data; + seq->private = s; + memset(s, 0, sizeof(*s)); +out: + return rc; +out_kfree: + kfree(s); + goto out; +} + +static struct file_operations fib_route_fops = { + .owner = THIS_MODULE, + .open = fib_route_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, }; int __init fib_proc_init(void) { - if (!proc_net_fops_create("fib_trie", S_IRUGO, &fib_trie_seq_fops)) - return -ENOMEM; + if (!proc_net_fops_create("fib_trie", S_IRUGO, &fib_trie_fops)) + goto out1; + + if (!proc_net_fops_create("fib_triestat", S_IRUGO, &fib_triestat_fops)) + goto out2; + + if (!proc_net_fops_create("route", S_IRUGO, &fib_route_fops)) + goto out3; + return 0; + +out3: + proc_net_remove("fib_triestat"); +out2: + proc_net_remove("fib_trie"); +out1: + return -ENOMEM; } void __init fib_proc_exit(void) { proc_net_remove("fib_trie"); + proc_net_remove("fib_triestat"); + proc_net_remove("route"); } #endif /* CONFIG_PROC_FS */