shortcut-fe: rework netfilter conntrack notification

The original patch over rode the nf_conntrack_un/register_notifier API, which will
break other modules relying on the API. Reworked the notification APIs to play nice
with others. Also avoid to touch the code of fullcone nat.

Co-Authored-By: AmadeusGhost <42570690+AmadeusGhost@users.noreply.github.com>
Co-Authored-By: quarkysg <35649562+quarkysg@users.noreply.github.com>
This commit is contained in:
Lienol 2020-11-21 17:34:56 +08:00
parent 080041e660
commit 0fc053768a
6 changed files with 176 additions and 189 deletions

View File

@ -88,4 +88,4 @@ endef
$(eval $(call KernelPackage,$(PKG_NAME)))
$(eval $(call KernelPackage,$(PKG_NAME)-noload))
$(eval $(call BuildPackage,fast-classifier-example))
#$(eval $(call BuildPackage,fast-classifier-example))

View File

@ -1807,10 +1807,12 @@ static int __init fast_classifier_init(void)
goto exit3;
}
#ifdef CONFIG_NF_CONNTRACK_EVENTS
/*
* Register a notifier hook to get fast notifications of expired connections.
*/
#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
result = nf_conntrack_register_chain_notifier(&init_net, &fast_classifier_conntrack_notifier);
#else
result = nf_conntrack_register_notifier(&init_net, &fast_classifier_conntrack_notifier);
if (result < 0) {
DEBUG_ERROR("can't register nf notifier hook: %d\n", result);
@ -1824,7 +1826,7 @@ static int __init fast_classifier_init(void)
DEBUG_ERROR("failed to register genl family: %d\n", result);
goto exit5;
}
#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0))
result = genl_register_family_with_ops_groups(&fast_classifier_gnl_family,
fast_classifier_gnl_ops,
fast_classifier_genl_mcgrp);
@ -1877,7 +1879,11 @@ exit6:
exit5:
#ifdef CONFIG_NF_CONNTRACK_EVENTS
#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
nf_conntrack_unregister_chain_notifier(&init_net, &fast_classifier_conntrack_notifier);
#else
nf_conntrack_unregister_notifier(&init_net, &fast_classifier_conntrack_notifier);
#endif
exit4:
#endif
@ -1945,8 +1951,11 @@ static void __exit fast_classifier_exit(void)
}
#ifdef CONFIG_NF_CONNTRACK_EVENTS
#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
nf_conntrack_unregister_chain_notifier(&init_net, &fast_classifier_conntrack_notifier);
#else
nf_conntrack_unregister_notifier(&init_net, &fast_classifier_conntrack_notifier);
#endif
#endif
nf_unregister_net_hooks(&init_net, fast_classifier_ops_post_routing, ARRAY_SIZE(fast_classifier_ops_post_routing));

View File

@ -15,7 +15,7 @@ include $(TOPDIR)/rules.mk
include $(INCLUDE_DIR)/kernel.mk
PKG_NAME:=shortcut-fe
PKG_RELEASE:=1
PKG_RELEASE:=2
include $(INCLUDE_DIR)/package.mk
@ -38,8 +38,6 @@ Shortcut is an in-Linux-kernel IP packet forwarding engine.
endef
define KernelPackage/shortcut-fe/install
$(INSTALL_DIR) $(1)/etc/init.d
$(INSTALL_BIN) ./files/etc/init.d/shortcut-fe $(1)/etc/init.d
$(INSTALL_DIR) $(1)/usr/bin
$(INSTALL_BIN) ./files/usr/bin/sfe_dump $(1)/usr/bin
endef

View File

@ -1,48 +0,0 @@
#!/bin/sh /etc/rc.common
#
# Copyright (c) 2014-2015 The Linux Foundation. All rights reserved.
# Permission to use, copy, modify, and/or distribute this software for
# any purpose with or without fee is hereby granted, provided that the
# above copyright notice and this permission notice appear in all copies.
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
# OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#
#SFE connection manager has a lower priority, it should be started after other connection manager
#to detect the existence of connection manager with higher priority
START=72
have_cm() {
[ -d "/sys/kernel/debug/ecm" ] && echo 1 && return
echo 0
}
#load shortcut-fe connection manager
load_sfe_cm() {
local kernel_version=$(uname -r)
#shortcut-fe-drv.ko is not needed because other connection manager is not enabled
[ -d "/sys/module/shortcut_fe_drv" ] && rmmod shortcut_fe_drv
[ -e "/lib/modules/$kernel_version/shortcut-fe-cm.ko" ] && {
[ -d /sys/module/shortcut_fe_cm ] || insmod /lib/modules/$kernel_version/shortcut-fe-cm.ko
}
[ -e "/lib/modules/$kernel_version/fast-classifier.ko" ] && {
[ -d /sys/module/fast_classifier ] || insmod /lib/modules/$kernel_version/fast-classifier.ko
}
}
start() {
[ "$(have_cm)" = "1" ] || load_sfe_cm
}
stop() {
[ -d /sys/module/shortcut_fe_cm ] && rmmod shortcut_fe_cm
[ -d /sys/module/fast_classifier ] && rmmod fast_classifier
}

View File

@ -1049,7 +1049,7 @@ static int __init sfe_cm_init(void)
*/
#ifdef CONFIG_NF_CONNTRACK_EVENTS
#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
(void)nf_conntrack_register_notifier(&init_net, &sfe_cm_conntrack_notifier);
(void)nf_conntrack_register_chain_notifier(&init_net, &sfe_cm_conntrack_notifier);
#else
result = nf_conntrack_register_notifier(&init_net, &sfe_cm_conntrack_notifier);
if (result < 0) {
@ -1123,8 +1123,11 @@ static void __exit sfe_cm_exit(void)
sfe_ipv6_destroy_all_rules_for_dev(NULL);
#ifdef CONFIG_NF_CONNTRACK_EVENTS
#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
nf_conntrack_unregister_chain_notifier(&init_net, &sfe_cm_conntrack_notifier);
#else
nf_conntrack_unregister_notifier(&init_net, &sfe_cm_conntrack_notifier);
#endif
#endif
nf_unregister_net_hooks(&init_net, sfe_cm_ops_post_routing, ARRAY_SIZE(sfe_cm_ops_post_routing));

View File

@ -1,134 +1,58 @@
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -52,6 +52,9 @@ struct br_ip_list {
extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *));
+extern void br_dev_update_stats(struct net_device *dev,
+ struct rtnl_link_stats64 *nlstats);
+
#if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_IGMP_SNOOPING)
int br_multicast_list_adjacent(struct net_device *dev,
struct list_head *br_ip_list);
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -825,6 +825,9 @@
__u8 decrypted:1;
@@ -826,6 +826,10 @@ struct sk_buff {
#endif
__u8 gro_skip:1;
+#ifdef CONFIG_SHORTCUT_FE
+ __u8 fast_forwarded:1;
+#endif
+
#ifdef CONFIG_NET_SCHED
__u16 tc_index; /* traffic control index */
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -51,6 +51,8 @@
#define BR_DEFAULT_AGEING_TIME (300 * HZ)
extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *));
+extern void br_dev_update_stats(struct net_device *dev,
+ struct rtnl_link_stats64 *nlstats);
#if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_IGMP_SNOOPING)
int br_multicast_list_adjacent(struct net_device *dev,
#endif
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -17,6 +17,9 @@ struct timer_list {
unsigned long expires;
@@ -18,6 +18,10 @@ struct timer_list {
void (*function)(struct timer_list *);
u32 flags;
+#ifdef CONFIG_SHORTCUT_FE
+ unsigned long cust_data;
+#endif
+
#ifdef CONFIG_LOCKDEP
struct lockdep_map lockdep_map;
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -473,3 +473,6 @@ config HAVE_CBPF_JIT
# Extended BPF JIT (eBPF)
config HAVE_EBPF_JIT
bool
+
+config SHORTCUT_FE
+ bool "Enables kernel network stack path for Shortcut Forwarding Engine
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3192,8 +3192,17 @@ static int xmit_one(struct sk_buff *skb,
unsigned int len;
int rc;
+#ifdef CONFIG_SHORTCUT_FE
+ /* If this skb has been fast forwarded then we don't want it to
+ * go to any taps (by definition we're trying to bypass them).
+ */
+ if (!skb->fast_forwarded) {
+#endif
if (dev_nit_active(dev))
dev_queue_xmit_nit(skb, dev);
+#ifdef CONFIG_SHORTCUT_FE
+ }
+#endif
#ifdef CONFIG_ETHERNET_PACKET_MANGLE
if (!dev->eth_mangle_tx ||
@@ -4684,6 +4693,11 @@ void netdev_rx_handler_unregister(struct
}
EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
+#ifdef CONFIG_SHORTCUT_FE
+int (*athrs_fast_nat_recv)(struct sk_buff *skb) __rcu __read_mostly;
+EXPORT_SYMBOL_GPL(athrs_fast_nat_recv);
+#endif
+
/*
* Limit the use of PFMEMALLOC reserves to those protocols that implement
* the special handling of PFMEMALLOC skbs.
@@ -4733,6 +4747,9 @@ static int __netif_receive_skb_core(stru
bool deliver_exact = false;
int ret = NET_RX_DROP;
__be16 type;
+#ifdef CONFIG_SHORTCUT_FE
+ int (*fast_recv)(struct sk_buff *skb);
+#endif
net_timestamp_check(!netdev_tstamp_prequeue, skb);
@@ -4773,6 +4790,16 @@ another_round:
goto out;
}
+#ifdef CONFIG_SHORTCUT_FE
+ fast_recv = rcu_dereference(athrs_fast_nat_recv);
+ if (fast_recv) {
+ if (fast_recv(skb)) {
+ ret = NET_RX_SUCCESS;
+ goto out;
+ }
+ }
+#endif
+
if (skb_skip_tc_classify(skb))
goto skip_classify;
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -33,11 +33,17 @@
/* Do not check the TCP window for incoming packets */
static int nf_ct_tcp_no_window_check __read_mostly = 1;
+#ifdef CONFIG_SHORTCUT_FE
+EXPORT_SYMBOL_GPL(nf_ct_tcp_no_window_check);
+#endif
/* "Be conservative in what you do,
be liberal in what you accept from others."
If it's non-zero, we mark only out of window RST segments as INVALID. */
static int nf_ct_tcp_be_liberal __read_mostly = 0;
+#ifdef CONFIG_SHORTCUT_FE
+EXPORT_SYMBOL_GPL(nf_ct_tcp_be_liberal);
+#endif
/* If it is set to zero, we disable picking up already established
connections. */
#endif
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -75,6 +75,8 @@ struct nf_ct_event {
#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
extern int nf_conntrack_register_notifier(struct net *net, struct notifier_block *nb);
extern int nf_conntrack_unregister_notifier(struct net *net, struct notifier_block *nb);
+extern int nf_conntrack_register_chain_notifier(struct net *net, struct notifier_block *nb);
+extern int nf_conntrack_unregister_chain_notifier(struct net *net, struct notifier_block *nb);
#else
struct nf_ct_event_notifier {
int (*fcn)(unsigned int events, struct nf_ct_event *item);
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -757,3 +757,26 @@
return p->flags & flag;
@@ -746,6 +746,28 @@ void br_port_flags_change(struct net_bri
br_recalculate_neigh_suppress_enabled(br);
}
EXPORT_SYMBOL_GPL(br_port_flag_is_set);
+
+/* Update bridge statistics for bridge packets processed by offload engines */
+void br_dev_update_stats(struct net_device *dev,
+ struct rtnl_link_stats64 *nlstats)
+{
@ -150,26 +74,105 @@
+ u64_stats_update_end(&stats->syncp);
+}
+EXPORT_SYMBOL_GPL(br_dev_update_stats);
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -158,6 +158,14 @@
If unsure, say `N'.
+config NF_CONNTRACK_CHAIN_EVENTS
+ bool "Register multiple callbacks to ct events"
+ depends on NF_CONNTRACK_EVENTS
+ help
+ Support multiple registrations.
+
+ If unsure, say `N'.
bool br_port_flag_is_set(const struct net_device *dev, unsigned long flag)
{
struct net_bridge_port *p;
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3189,8 +3189,17 @@ static int xmit_one(struct sk_buff *skb, struct net_device *dev,
unsigned int len;
int rc;
+#ifdef CONFIG_SHORTCUT_FE
+ /* If this skb has been fast forwarded then we don't want it to
+ * go to any taps (by definition we're trying to bypass them).
+ */
+ if (!skb->fast_forwarded) {
+#endif
if (dev_nit_active(dev))
dev_queue_xmit_nit(skb, dev);
+#ifdef CONFIG_SHORTCUT_FE
+ }
+#endif
#ifdef CONFIG_ETHERNET_PACKET_MANGLE
if (!dev->eth_mangle_tx ||
@@ -4683,6 +4691,11 @@ void netdev_rx_handler_unregister(struct
}
EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
+#ifdef CONFIG_SHORTCUT_FE
+int (*athrs_fast_nat_recv)(struct sk_buff *skb) __rcu __read_mostly;
+EXPORT_SYMBOL_GPL(athrs_fast_nat_recv);
+#endif
+
config NF_CONNTRACK_TIMESTAMP
bool 'Connection tracking timestamping'
depends on NETFILTER_ADVANCED
/*
* Limit the use of PFMEMALLOC reserves to those protocols that implement
* the special handling of PFMEMALLOC skbs.
@@ -4733,6 +4746,10 @@ static int __netif_receive_skb_core(stru
int ret = NET_RX_DROP;
__be16 type;
+#ifdef CONFIG_SHORTCUT_FE
+ int (*fast_recv)(struct sk_buff *skb);
+#endif
+
net_timestamp_check(!netdev_tstamp_prequeue, skb);
trace_netif_receive_skb(skb);
@@ -4772,6 +4789,16 @@ another_round:
goto out;
}
+#ifdef CONFIG_SHORTCUT_FE
+ fast_recv = rcu_dereference(athrs_fast_nat_recv);
+ if (fast_recv) {
+ if (fast_recv(skb)) {
+ ret = NET_RX_SUCCESS;
+ goto out;
+ }
+ }
+#endif
+
if (skb_skip_tc_classify(skb))
goto skip_classify;
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -473,3 +473,6 @@ config HAVE_CBPF_JIT
# Extended BPF JIT (eBPF)
config HAVE_EBPF_JIT
bool
+
+config SHORTCUT_FE
+ bool "Enables kernel network stack path for Shortcut Forwarding Engine
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -34,11 +34,19 @@
/* Do not check the TCP window for incoming packets */
static int nf_ct_tcp_no_window_check __read_mostly = 1;
+#ifdef CONFIG_SHORTCUT_FE
+EXPORT_SYMBOL_GPL(nf_ct_tcp_no_window_check);
+#endif
+
/* "Be conservative in what you do,
be liberal in what you accept from others."
If it's non-zero, we mark only out of window RST segments as INVALID. */
static int nf_ct_tcp_be_liberal __read_mostly = 0;
+#ifdef CONFIG_SHORTCUT_FE
+EXPORT_SYMBOL_GPL(nf_ct_tcp_be_liberal);
+#endif
+
/* If it is set to zero, we disable picking up already established
connections. */
static int nf_ct_tcp_loose __read_mostly = 1;
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -127,7 +130,11 @@ int nf_conntrack_eventmask_report(unsign
@@ -162,7 +162,11 @@ int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct,
rcu_read_lock();
notify = rcu_dereference(net->ct.nf_conntrack_event_cb);
@ -181,14 +184,13 @@
goto out_unlock;
e = nf_ct_ecache_find(ct);
@@ -146,7 +153,15 @@ int nf_conntrack_eventmask_report(unsign
@@ -181,7 +185,14 @@ int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct,
if (!((eventmask | missed) & e->ctmask))
goto out_unlock;
+#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
+ ret = atomic_notifier_call_chain(&net->ct.nf_conntrack_chain,
+ eventmask | missed, &item);
+
+ if (notify)
+ ret = notify->fcn(eventmask | missed, &item);
+#else
@ -197,7 +199,7 @@
if (unlikely(ret < 0 || missed)) {
spin_lock_bh(&ct->lock);
if (ret < 0) {
@@ -186,7 +201,11 @@ void nf_ct_deliver_cached_events(struct
@@ -263,7 +274,11 @@ void nf_ct_deliver_cached_events(struct nf_conn *ct)
rcu_read_lock();
notify = rcu_dereference(net->ct.nf_conntrack_event_cb);
@ -209,7 +211,7 @@
goto out_unlock;
e = nf_ct_ecache_find(ct);
@@ -210,7 +229,16 @@ void nf_ct_deliver_cached_events(struct
@@ -287,7 +302,15 @@ void nf_ct_deliver_cached_events(struct nf_conn *ct)
item.portid = 0;
item.report = 0;
@ -217,7 +219,6 @@
+ ret = atomic_notifier_call_chain(&net->ct.nf_conntrack_chain,
+ events | missed,
+ &item);
+
+ if (notify != NULL)
+ ret = notify->fcn(events | missed, &item);
+#else
@ -225,4 +226,28 @@
+#endif
if (likely(ret == 0 && !missed))
goto out_unlock;
goto out_unlock;
@@ -340,6 +363,11 @@ int nf_conntrack_register_notifier(struct net *net, struct notifier_block *nb)
{
return atomic_notifier_chain_register(&net->ct.nf_conntrack_chain, nb);
}
+int nf_conntrack_register_chain_notifier(struct net *net, struct notifier_block *nb)
+{
+ return atomic_notifier_chain_register(&net->ct.nf_conntrack_chain, nb);
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_register_chain_notifier);
#else
int nf_conntrack_register_notifier(struct net *net,
struct nf_ct_event_notifier *new)
@@ -369,6 +397,11 @@ int nf_conntrack_unregister_notifier(struct net *net, struct notifier_block *nb)
{
return atomic_notifier_chain_unregister(&net->ct.nf_conntrack_chain, nb);
}
+int nf_conntrack_unregister_chain_notifier(struct net *net, struct notifier_block *nb)
+{
+ return atomic_notifier_chain_unregister(&net->ct.nf_conntrack_chain, nb);
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_unregister_chain_notifier);
#else
void nf_conntrack_unregister_notifier(struct net *net,
struct nf_ct_event_notifier *new)