nixpkgs/pkgs/os-specific/linux/kernel/ubuntu-fan-4.patch

617 lines
18 KiB
Diff
Raw Normal View History

From f3c956096902669c3529cb01d40deb0c759ed94f Mon Sep 17 00:00:00 2001
From: Jay Vosburgh <jay.vosburgh@canonical.com>
Date: Wed, 1 Apr 2015 16:11:09 -0700
Subject: [PATCH] UBUNTU: SAUCE: fan: Proof of concept implementation (v2)
Modification to ipip tunnel driver to accept a new netlink option,
IFLA_IPTUN_FAN_UNDERLAY, which provides a /16 network prefix and enables
TX side destination address remapping for traffic entering the tunnel
(to be encapsulated).
For an overlay (inner) address Y.A.B.C, the transformation is F.G.A.B,
where "F" and "G" are the first two octets of the underlay network (the
network portion of a /16), "A" and "B" are the low order two octets of the
underlay network host (the host portion of a /16), and "Y" is a configured
first octet of the overlay network.
E.g., underlay host 10.88.3.4 with an overlay of 99 would host overlay
subnet 99.3.4.0/24. An overlay network datagram from 99.3.4.5 to 99.6.7.8
would be directed to underlay host 10.88.6.7, which hosts overlay network
99.6.7.0/24.
Includes net.fan.version sysctl as a sentinel for availability of the
fan functionality.
NOTE: this requires an updated iproute2 to facilitate configuration of
the fan.
BugLink: http://bugs.launchpad.net/bugs/1439706
Signed-off-by: Jay Vosburgh <jay.vosburgh@canonical.com>
[apw@canonical.com: move IFLA_IPTUN_FAN_UNDERLAY up to avoid clashing
with future feature additions.]
Signed-off-by: Andy Whitcroft <apw@canonical.com>
---
include/net/ip_tunnels.h | 6 +++
include/uapi/linux/if_tunnel.h | 4 ++
net/ipv4/ipip.c | 112 +++++++++++++++++++++++++++++++++++++++--
3 files changed, 117 insertions(+), 5 deletions(-)
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 25a59eb..d7eada2 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -51,6 +51,11 @@ struct ip_tunnel_dst {
__be32 saddr;
};
+/* Underlay address prefix for ipip fan mode */
+struct ip_tunnel_fan {
+ u32 underlay;
+};
+
struct ip_tunnel {
struct ip_tunnel __rcu *next;
struct hlist_node hash_node;
@@ -82,6 +87,7 @@ struct ip_tunnel {
#endif
struct ip_tunnel_prl_entry __rcu *prl; /* potential router list */
unsigned int prl_count; /* # of entries in PRL */
+ struct ip_tunnel_fan fan;
int ip_tnl_net_id;
struct gro_cells gro_cells;
};
diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
index bd3cc11..8f7d269 100644
--- a/include/uapi/linux/if_tunnel.h
+++ b/include/uapi/linux/if_tunnel.h
@@ -57,6 +57,10 @@ enum {
IFLA_IPTUN_ENCAP_FLAGS,
IFLA_IPTUN_ENCAP_SPORT,
IFLA_IPTUN_ENCAP_DPORT,
+
+ __IFLA_IPTUN_VENDOR_BREAK, /* Ensure new entries do not hit the below. */
+ IFLA_IPTUN_FAN_UNDERLAY=32,
+
__IFLA_IPTUN_MAX,
};
#define IFLA_IPTUN_MAX (__IFLA_IPTUN_MAX - 1)
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 40403114..e3c27cd 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -209,13 +209,38 @@ drop:
}
/*
+ * Determine fan tunnel endpoint to send packet to, based on the inner IP
+ * address. For an overlay (inner) address Y.A.B.C, the transformation is
+ * F.G.A.B, where "F" and "G" are the first two octets of the underlay
+ * network (the network portion of a /16), "A" and "B" are the low order
+ * two octets of the underlay network host (the host portion of a /16),
+ * and "Y" is a configured first octet of the overlay network.
+ *
+ * E.g., underlay host 10.88.3.4 with an overlay of 99 would host overlay
+ * subnet 99.3.4.0/24. An overlay network datagram from 99.3.4.5 to
+ * 99.6.7.8, would be directed to underlay host 10.88.6.7, which hosts
+ * overlay network 99.6.7.0/24.
+ */
+static void ipip_build_fan_iphdr(struct ip_tunnel *tunnel, struct sk_buff *skb, struct iphdr *iph)
+{
+ u32 daddr;
+
+ *iph = tunnel->parms.iph;
+
+ daddr = ntohl(ip_hdr(skb)->daddr);
+ iph->daddr = htonl((tunnel->fan.underlay & 0xffff0000) |
+ ((daddr >> 8) & 0x0000ffff));
+}
+
+/*
* This function assumes it is being called from dev_queue_xmit()
* and that skb is filled properly by that function.
*/
static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- const struct iphdr *tiph = &tunnel->parms.iph;
+ const struct iphdr *tiph;
+ struct iphdr fiph;
if (unlikely(skb->protocol != htons(ETH_P_IP)))
goto tx_error;
@@ -224,6 +249,13 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
if (IS_ERR(skb))
goto out;
+ if (tunnel->fan.underlay) {
+ ipip_build_fan_iphdr(tunnel, skb, &fiph);
+ tiph = &fiph;
+ } else {
+ tiph = &tunnel->parms.iph;
+ }
+
skb_set_inner_ipproto(skb, IPPROTO_IPIP);
ip_tunnel_xmit(skb, dev, tiph, tiph->protocol);
@@ -377,21 +409,44 @@ static bool ipip_netlink_encap_parms(struct nlattr *data[],
return ret;
}
+static int ipip_netlink_fan(struct nlattr *data[], struct ip_tunnel *t,
+ struct ip_tunnel_parm *parms)
+{
+ u32 net = t->fan.underlay;
+
+ if (!data[IFLA_IPTUN_FAN_UNDERLAY])
+ goto err_check;
+
+ net = ntohl(nla_get_be32(data[IFLA_IPTUN_FAN_UNDERLAY])) & 0xffff0000;
+
+err_check:
+ if (parms->iph.daddr && net)
+ return -EINVAL;
+
+ t->fan.underlay = net;
+
+ return 0;
+}
+
static int ipip_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[])
{
struct ip_tunnel_parm p;
struct ip_tunnel_encap ipencap;
+ struct ip_tunnel *t = netdev_priv(dev);
+ int err;
if (ipip_netlink_encap_parms(data, &ipencap)) {
- struct ip_tunnel *t = netdev_priv(dev);
- int err = ip_tunnel_encap_setup(t, &ipencap);
+ err = ip_tunnel_encap_setup(t, &ipencap);
if (err < 0)
return err;
}
ipip_netlink_parms(data, &p);
+ err = ipip_netlink_fan(data, t, &p);
+ if (err < 0)
+ return err;
return ip_tunnel_newlink(dev, tb, &p);
}
@@ -400,16 +455,20 @@ static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
{
struct ip_tunnel_parm p;
struct ip_tunnel_encap ipencap;
+ struct ip_tunnel *t = netdev_priv(dev);
+ int err;
if (ipip_netlink_encap_parms(data, &ipencap)) {
- struct ip_tunnel *t = netdev_priv(dev);
- int err = ip_tunnel_encap_setup(t, &ipencap);
+ err = ip_tunnel_encap_setup(t, &ipencap);
if (err < 0)
return err;
}
ipip_netlink_parms(data, &p);
+ err = ipip_netlink_fan(data, t, &p);
+ if (err < 0)
+ return err;
if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) ||
(!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))
@@ -441,6 +500,8 @@ static size_t ipip_get_size(const struct net_device *dev)
nla_total_size(2) +
/* IFLA_IPTUN_ENCAP_DPORT */
nla_total_size(2) +
+ /* IFLA_IPTUN_FAN_UNDERLAY */
+ nla_total_size(4) +
0;
}
@@ -468,6 +529,11 @@ static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
tunnel->encap.flags))
goto nla_put_failure;
+ if (tunnel->fan.underlay)
+ if (nla_put_be32(skb, IFLA_IPTUN_FAN_UNDERLAY,
+ htonl(tunnel->fan.underlay)))
+ goto nla_put_failure;
+
return 0;
nla_put_failure:
@@ -485,6 +551,9 @@ static const struct nla_policy ipip_policy[IFLA_IPTUN_MAX + 1] = {
[IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 },
[IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 },
[IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 },
+
+ [__IFLA_IPTUN_VENDOR_BREAK ... IFLA_IPTUN_MAX] = { .type = NLA_BINARY },
+ [IFLA_IPTUN_FAN_UNDERLAY] = { .type = NLA_U32 },
};
static struct rtnl_link_ops ipip_link_ops __read_mostly = {
@@ -524,6 +593,23 @@ static struct pernet_operations ipip_net_ops = {
.size = sizeof(struct ip_tunnel_net),
};
+#ifdef CONFIG_SYSCTL
+static struct ctl_table_header *ipip_fan_header;
+static unsigned int ipip_fan_version = 1;
+
+static struct ctl_table ipip_fan_sysctls[] = {
+ {
+ .procname = "version",
+ .data = &ipip_fan_version,
+ .maxlen = sizeof(ipip_fan_version),
+ .mode = 0444,
+ .proc_handler = proc_dointvec,
+ },
+ {},
+};
+
+#endif /* CONFIG_SYSCTL */
+
static int __init ipip_init(void)
{
int err;
@@ -542,9 +628,22 @@ static int __init ipip_init(void)
if (err < 0)
goto rtnl_link_failed;
+#ifdef CONFIG_SYSCTL
+ ipip_fan_header = register_net_sysctl(&init_net, "net/fan",
+ ipip_fan_sysctls);
+ if (!ipip_fan_header) {
+ err = -ENOMEM;
+ goto sysctl_failed;
+ }
+#endif /* CONFIG_SYSCTL */
+
out:
return err;
+#ifdef CONFIG_SYSCTL
+sysctl_failed:
+ rtnl_link_unregister(&ipip_link_ops);
+#endif /* CONFIG_SYSCTL */
rtnl_link_failed:
xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
xfrm_tunnel_failed:
@@ -554,6 +653,9 @@ xfrm_tunnel_failed:
static void __exit ipip_fini(void)
{
+#ifdef CONFIG_SYSCTL
+ unregister_net_sysctl_table(ipip_fan_header);
+#endif /* CONFIG_SYSCTL */
rtnl_link_unregister(&ipip_link_ops);
if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
pr_info("%s: can't deregister tunnel\n", __func__);
--
2.4.1
From 4ea8011656dfdd76e7a2391bdad47c06f85a9d02 Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@canonical.com>
Date: Tue, 21 Jul 2015 16:52:10 +0100
Subject: [PATCH] UBUNTU: SAUCE: fan: tunnel multiple mapping mode (v3)
Switch to a single tunnel for all mappings, this removes the limitations
on how many mappings each tunnel can handle, and therefore how many Fan
slices each local address may hold.
NOTE: This introduces a new kernel netlink interface which needs updated
iproute2 support.
BugLink: http://bugs.launchpad.net/bugs/1470091
Signed-off-by: Jay Vosburgh <jay.vosburgh@canonical.com>
Signed-off-by: Andy Whitcroft <apw@canonical.com>
Acked-by: Tim Gardner <tim.gardner@canonical.com>
Acked-by: Brad Figg <brad.figg@canonical.com>
Signed-off-by: Brad Figg <brad.figg@canonical.com>
---
include/net/ip_tunnels.h | 14 ++++-
include/uapi/linux/if_tunnel.h | 20 ++++++-
net/ipv4/ip_tunnel.c | 7 ++-
net/ipv4/ipip.c | 120 +++++++++++++++++++++++++++++++++--------
4 files changed, 133 insertions(+), 28 deletions(-)
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index d7eada2..2f7bc8c 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -51,9 +51,18 @@ struct ip_tunnel_dst {
__be32 saddr;
};
-/* Underlay address prefix for ipip fan mode */
+/* A fan overlay /8 (250.0.0.0/8, for example) maps to exactly one /16
+ * underlay (10.88.0.0/16, for example). Multiple local addresses within
+ * the /16 may be used, but a particular overlay may not span
+ * multiple underlay subnets.
+ *
+ * We store one underlay, indexed by the overlay's high order octet.
+ */
+#define FAN_OVERLAY_CNT 256
+
struct ip_tunnel_fan {
- u32 underlay;
+/* u32 __rcu *map;*/
+ u32 map[FAN_OVERLAY_CNT];
};
struct ip_tunnel {
@@ -104,6 +113,7 @@ struct ip_tunnel {
#define TUNNEL_OAM __cpu_to_be16(0x0200)
#define TUNNEL_CRIT_OPT __cpu_to_be16(0x0400)
#define TUNNEL_GENEVE_OPT __cpu_to_be16(0x0800)
#define TUNNEL_VXLAN_OPT __cpu_to_be16(0x1000)
+#define TUNNEL_FAN __cpu_to_be16(0x4000)
#define TUNNEL_OPTIONS_PRESENT (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT)
diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
index 8f7d269..9625934 100644
--- a/include/uapi/linux/if_tunnel.h
+++ b/include/uapi/linux/if_tunnel.h
@@ -58,8 +58,8 @@ enum {
IFLA_IPTUN_ENCAP_SPORT,
IFLA_IPTUN_ENCAP_DPORT,
- __IFLA_IPTUN_VENDOR_BREAK, /* Ensure new entries do not hit the below. */
- IFLA_IPTUN_FAN_UNDERLAY=32,
+ __IFLA_IPTUN_VENDOR_BREAK, /* Ensure new entries do not hit the below. */
+ IFLA_IPTUN_FAN_MAP = 33,
__IFLA_IPTUN_MAX,
};
@@ -135,4 +135,20 @@ enum {
};
#define IFLA_VTI_MAX (__IFLA_VTI_MAX - 1)
+
+enum {
+ IFLA_FAN_UNSPEC,
+ IFLA_FAN_MAPPING,
+ __IFLA_FAN_MAX,
+};
+
+#define IFLA_FAN_MAX (__IFLA_FAN_MAX - 1)
+
+struct ip_tunnel_fan_map {
+ __be32 underlay;
+ __be32 overlay;
+ __u16 underlay_prefix;
+ __u16 overlay_prefix;
+};
+
#endif /* _UAPI_IF_TUNNEL_H_ */
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index d3e4479..60bd10f 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -1078,6 +1078,11 @@ out:
}
EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
+static int ip_tunnel_is_fan(struct ip_tunnel *tunnel)
+{
+ return tunnel->parms.i_flags & TUNNEL_FAN;
+}
+
int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
struct ip_tunnel_parm *p)
{
@@ -1087,7 +1092,7 @@ int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
if (dev == itn->fb_tunnel_dev)
- return -EINVAL;
+ return ip_tunnel_is_fan(tunnel) ? 0 : -EINVAL;
t = ip_tunnel_find(itn, p, dev->type);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index e3c27cd..d6ebc66 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -107,6 +107,7 @@
#include <linux/init.h>
#include <linux/netfilter_ipv4.h>
#include <linux/if_ether.h>
+#include <linux/inetdevice.h>
#include <net/sock.h>
#include <net/ip.h>
@@ -208,6 +209,11 @@ drop:
return 0;
}
+static int ipip_tunnel_is_fan(struct ip_tunnel *tunnel)
+{
+ return tunnel->parms.i_flags & TUNNEL_FAN;
+}
+
/*
* Determine fan tunnel endpoint to send packet to, based on the inner IP
* address. For an overlay (inner) address Y.A.B.C, the transformation is
@@ -221,15 +227,20 @@ drop:
* 99.6.7.8, would be directed to underlay host 10.88.6.7, which hosts
* overlay network 99.6.7.0/24.
*/
-static void ipip_build_fan_iphdr(struct ip_tunnel *tunnel, struct sk_buff *skb, struct iphdr *iph)
+static int ipip_build_fan_iphdr(struct ip_tunnel *tunnel, struct sk_buff *skb, struct iphdr *iph)
{
- u32 daddr;
-
- *iph = tunnel->parms.iph;
+ unsigned int overlay;
+ u32 daddr, underlay;
daddr = ntohl(ip_hdr(skb)->daddr);
- iph->daddr = htonl((tunnel->fan.underlay & 0xffff0000) |
- ((daddr >> 8) & 0x0000ffff));
+ overlay = daddr >> 24;
+ underlay = tunnel->fan.map[overlay];
+ if (!underlay)
+ return -EINVAL;
+
+ *iph = tunnel->parms.iph;
+ iph->daddr = htonl(underlay | ((daddr >> 8) & 0x0000ffff));
+ return 0;
}
/*
@@ -249,8 +260,9 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
if (IS_ERR(skb))
goto out;
- if (tunnel->fan.underlay) {
- ipip_build_fan_iphdr(tunnel, skb, &fiph);
+ if (ipip_tunnel_is_fan(tunnel)) {
+ if (ipip_build_fan_iphdr(tunnel, skb, &fiph))
+ goto tx_error;
tiph = &fiph;
} else {
tiph = &tunnel->parms.iph;
@@ -409,21 +421,65 @@ static bool ipip_netlink_encap_parms(struct nlattr *data[],
return ret;
}
+static void ipip_fan_free_map(struct ip_tunnel *t)
+{
+ memset(&t->fan.map, 0, sizeof(t->fan.map));
+}
+
+static int ipip_fan_set_map(struct ip_tunnel *t, struct ip_tunnel_fan_map *map)
+{
+ u32 overlay, overlay_mask, underlay, underlay_mask;
+
+ if ((map->underlay_prefix && map->underlay_prefix != 16) ||
+ (map->overlay_prefix && map->overlay_prefix != 8))
+ return -EINVAL;
+
+ overlay = ntohl(map->overlay);
+ overlay_mask = ntohl(inet_make_mask(map->overlay_prefix));
+
+ underlay = ntohl(map->underlay);
+ underlay_mask = ntohl(inet_make_mask(map->underlay_prefix));
+
+ if ((overlay & ~overlay_mask) || (underlay & ~underlay_mask))
+ return -EINVAL;
+
+ if (!(overlay & overlay_mask) && (underlay & underlay_mask))
+ return -EINVAL;
+
+ t->parms.i_flags |= TUNNEL_FAN;
+
+ /* Special case: overlay 0 and underlay 0 clears all mappings */
+ if (!overlay && !underlay) {
+ ipip_fan_free_map(t);
+ return 0;
+ }
+
+ overlay >>= (32 - map->overlay_prefix);
+ t->fan.map[overlay] = underlay;
+
+ return 0;
+}
+
+
static int ipip_netlink_fan(struct nlattr *data[], struct ip_tunnel *t,
struct ip_tunnel_parm *parms)
{
- u32 net = t->fan.underlay;
-
- if (!data[IFLA_IPTUN_FAN_UNDERLAY])
- goto err_check;
+ struct ip_tunnel_fan_map *map;
+ struct nlattr *attr;
+ int rem, rv;
- net = ntohl(nla_get_be32(data[IFLA_IPTUN_FAN_UNDERLAY])) & 0xffff0000;
+ if (!data[IFLA_IPTUN_FAN_MAP])
+ return 0;
-err_check:
- if (parms->iph.daddr && net)
+ if (parms->iph.daddr)
return -EINVAL;
- t->fan.underlay = net;
+ nla_for_each_nested(attr, data[IFLA_IPTUN_FAN_MAP], rem) {
+ map = nla_data(attr);
+ rv = ipip_fan_set_map(t, map);
+ if (rv)
+ return rv;
+ }
return 0;
}
@@ -500,8 +556,8 @@ static size_t ipip_get_size(const struct net_device *dev)
nla_total_size(2) +
/* IFLA_IPTUN_ENCAP_DPORT */
nla_total_size(2) +
- /* IFLA_IPTUN_FAN_UNDERLAY */
- nla_total_size(4) +
+ /* IFLA_IPTUN_FAN_MAP */
+ nla_total_size(sizeof(struct ip_tunnel_fan_map)) * 256 +
0;
}
@@ -529,10 +585,28 @@ static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
tunnel->encap.flags))
goto nla_put_failure;
- if (tunnel->fan.underlay)
- if (nla_put_be32(skb, IFLA_IPTUN_FAN_UNDERLAY,
- htonl(tunnel->fan.underlay)))
+ if (tunnel->parms.i_flags & TUNNEL_FAN) {
+ struct nlattr *fan_nest;
+ int i;
+
+ fan_nest = nla_nest_start(skb, IFLA_IPTUN_FAN_MAP);
+ if (!fan_nest)
goto nla_put_failure;
+ for (i = 0; i < 256; i++) {
+ if (tunnel->fan.map[i]) {
+ struct ip_tunnel_fan_map map;
+
+ map.underlay = htonl(tunnel->fan.map[i]);
+ map.underlay_prefix = 16;
+ map.overlay = htonl(i << 24);
+ map.overlay_prefix = 8;
+ if (nla_put(skb, IFLA_FAN_MAPPING,
+ sizeof(map), &map))
+ goto nla_put_failure;
+ }
+ }
+ nla_nest_end(skb, fan_nest);
+ }
return 0;
@@ -553,7 +627,7 @@ static const struct nla_policy ipip_policy[IFLA_IPTUN_MAX + 1] = {
[IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 },
[__IFLA_IPTUN_VENDOR_BREAK ... IFLA_IPTUN_MAX] = { .type = NLA_BINARY },
- [IFLA_IPTUN_FAN_UNDERLAY] = { .type = NLA_U32 },
+ [IFLA_IPTUN_FAN_MAP] = { .type = NLA_NESTED },
};
static struct rtnl_link_ops ipip_link_ops __read_mostly = {
@@ -595,7 +669,7 @@ static struct pernet_operations ipip_net_ops = {
#ifdef CONFIG_SYSCTL
static struct ctl_table_header *ipip_fan_header;
-static unsigned int ipip_fan_version = 1;
+static unsigned int ipip_fan_version = 3;
static struct ctl_table ipip_fan_sysctls[] = {
{
--
2.4.1