Containers: Use systemd-nspawn's --network-veth flag
Note that this causes the name of the host-side interface to change from c-<name> to ve-<name>.
This commit is contained in:
parent
810680bcae
commit
6f7aaf10a5
@ -213,8 +213,8 @@ $ ping -c1 10.233.4.2
|
||||
<para>Networking is implemented using a pair of virtual Ethernet
|
||||
devices. The network interface in the container is called
|
||||
<literal>eth0</literal>, while the matching interface in the host is
|
||||
called <literal>c-<replaceable>container-name</replaceable></literal>
|
||||
(e.g., <literal>c-foo</literal>). The container has its own network
|
||||
called <literal>ve-<replaceable>container-name</replaceable></literal>
|
||||
(e.g., <literal>ve-foo</literal>). The container has its own network
|
||||
namespace and the <literal>CAP_NET_ADMIN</literal> capability, so it
|
||||
can perform arbitrary network configuration such as setting up
|
||||
firewall rules, without affecting or having access to the host’s
|
||||
@ -228,11 +228,11 @@ on the host:
|
||||
|
||||
<programlisting>
|
||||
networking.nat.enable = true;
|
||||
networking.nat.internalInterfaces = ["c-+"];
|
||||
networking.nat.internalInterfaces = ["ve-+"];
|
||||
networking.nat.externalInterface = "eth0";
|
||||
</programlisting>
|
||||
where <literal>eth0</literal> should be replaced with the desired
|
||||
external interface. Note that <literal>c-+</literal> is a wildcard
|
||||
external interface. Note that <literal>ve-+</literal> is a wildcard
|
||||
that matches all container interfaces.</para>
|
||||
|
||||
</section>
|
||||
|
@ -4,6 +4,28 @@
|
||||
|
||||
<title>Release notes</title>
|
||||
|
||||
<!--==================================================================-->
|
||||
|
||||
<section xml:id="sec-release-14.10">
|
||||
|
||||
<title>Release 14.10 (“Caterpillar”, 2014/10/??)</title>
|
||||
|
||||
<para>When upgrading from a previous release, please be aware of the
|
||||
following incompatible changes:
|
||||
|
||||
<itemizedlist>
|
||||
|
||||
<listitem><para>The host side of a container virtual Ethernet pair
|
||||
is now called <literal>ve-<replaceable>container-name</replaceable></literal>
|
||||
rather than <literal>c-<replaceable>container-name</replaceable></literal>.</para></listitem>
|
||||
|
||||
</itemizedlist>
|
||||
|
||||
</para>
|
||||
|
||||
</section>
|
||||
|
||||
|
||||
<!--==================================================================-->
|
||||
|
||||
<section xml:id="sec-release-14.04">
|
||||
|
@ -34,9 +34,8 @@ let
|
||||
|
||||
# Ignore peth* devices; on Xen, they're renamed physical
|
||||
# Ethernet cards used for bridging. Likewise for vif* and tap*
|
||||
# (Xen) and virbr* and vnet* (libvirt) and c-* and ctmp-* (NixOS
|
||||
# containers).
|
||||
denyinterfaces ${toString ignoredInterfaces} lo peth* vif* tap* tun* virbr* vnet* vboxnet* c-* ctmp-*
|
||||
# (Xen) and virbr* and vnet* (libvirt).
|
||||
denyinterfaces ${toString ignoredInterfaces} lo peth* vif* tap* tun* virbr* vnet* vboxnet*
|
||||
|
||||
${config.networking.dhcpcd.extraConfig}
|
||||
'';
|
||||
|
@ -4,16 +4,6 @@ with lib;
|
||||
|
||||
let
|
||||
|
||||
runInNetns = pkgs.stdenv.mkDerivation {
|
||||
name = "run-in-netns";
|
||||
unpackPhase = "true";
|
||||
buildPhase = ''
|
||||
mkdir -p $out/bin
|
||||
gcc ${./run-in-netns.c} -o $out/bin/run-in-netns
|
||||
'';
|
||||
installPhase = "true";
|
||||
};
|
||||
|
||||
nixos-container = pkgs.substituteAll {
|
||||
name = "nixos-container";
|
||||
dir = "bin";
|
||||
@ -23,6 +13,28 @@ let
|
||||
inherit (pkgs) socat;
|
||||
};
|
||||
|
||||
# The container's init script, a small wrapper around the regular
|
||||
# NixOS stage-2 init script.
|
||||
containerInit = pkgs.writeScript "container-init"
|
||||
''
|
||||
#! ${pkgs.stdenv.shell} -e
|
||||
|
||||
# Initialise the container side of the veth pair.
|
||||
if [ "$PRIVATE_NETWORK" = 1 ]; then
|
||||
ip link set host0 name eth0
|
||||
ip link set dev eth0 up
|
||||
if [ -n "$HOST_ADDRESS" ]; then
|
||||
ip route add $HOST_ADDRESS dev eth0
|
||||
ip route add default via $HOST_ADDRESS
|
||||
fi
|
||||
if [ -n "$LOCAL_ADDRESS" ]; then
|
||||
ip addr add $LOCAL_ADDRESS dev eth0
|
||||
fi
|
||||
fi
|
||||
|
||||
exec "$1"
|
||||
'';
|
||||
|
||||
system = config.nixpkgs.system;
|
||||
|
||||
in
|
||||
@ -70,7 +82,7 @@ in
|
||||
Whether to give the container its own private virtual
|
||||
Ethernet interface. The interface is called
|
||||
<literal>eth0</literal>, and is hooked up to the interface
|
||||
<literal>c-<replaceable>container-name</replaceable></literal>
|
||||
<literal>ve-<replaceable>container-name</replaceable></literal>
|
||||
on the host. If this option is not set, then the
|
||||
container shares the network interfaces of the host,
|
||||
and can bind to any port on any interface.
|
||||
@ -176,39 +188,8 @@ in
|
||||
"/nix/var/nix/profiles/per-container/$INSTANCE" \
|
||||
"/nix/var/nix/gcroots/per-container/$INSTANCE"
|
||||
|
||||
if [ -f "/etc/containers/$INSTANCE.conf" ]; then
|
||||
. "/etc/containers/$INSTANCE.conf"
|
||||
fi
|
||||
|
||||
# Cleanup from last time.
|
||||
ifaceHost=c-$INSTANCE
|
||||
ifaceCont=ctmp-$INSTANCE
|
||||
ns=net-$INSTANCE
|
||||
ip netns del $ns 2> /dev/null || true
|
||||
ip link del $ifaceHost 2> /dev/null || true
|
||||
ip link del $ifaceCont 2> /dev/null || true
|
||||
|
||||
if [ "$PRIVATE_NETWORK" = 1 ]; then
|
||||
# Create a pair of virtual ethernet devices. On the host,
|
||||
# we get ‘c-<container-name’, and on the guest, we get
|
||||
# ‘eth0’.
|
||||
ip link add $ifaceHost type veth peer name $ifaceCont
|
||||
ip netns add $ns
|
||||
ip link set $ifaceCont netns $ns
|
||||
ip netns exec $ns ip link set $ifaceCont name eth0
|
||||
ip netns exec $ns ip link set dev eth0 up
|
||||
ip link set dev $ifaceHost up
|
||||
if [ -n "$HOST_ADDRESS" ]; then
|
||||
ip addr add $HOST_ADDRESS dev $ifaceHost
|
||||
ip netns exec $ns ip route add $HOST_ADDRESS dev eth0
|
||||
ip netns exec $ns ip route add default via $HOST_ADDRESS
|
||||
fi
|
||||
if [ -n "$LOCAL_ADDRESS" ]; then
|
||||
ip netns exec $ns ip addr add $LOCAL_ADDRESS dev eth0
|
||||
ip route add $LOCAL_ADDRESS dev $ifaceHost
|
||||
fi
|
||||
runInNetNs="${runInNetns}/bin/run-in-netns $ns"
|
||||
extraFlags="--capability=CAP_NET_ADMIN"
|
||||
extraFlags="--network-veth"
|
||||
fi
|
||||
|
||||
# If the host is 64-bit and the container is 32-bit, add a
|
||||
@ -219,7 +200,7 @@ in
|
||||
fi
|
||||
''}
|
||||
|
||||
exec $runInNetNs ${config.systemd.package}/bin/systemd-nspawn \
|
||||
exec ${config.systemd.package}/bin/systemd-nspawn \
|
||||
--keep-unit \
|
||||
-M "$INSTANCE" -D "$root" $extraFlags \
|
||||
--bind-ro=/nix/store \
|
||||
@ -227,7 +208,11 @@ in
|
||||
--bind-ro=/nix/var/nix/daemon-socket \
|
||||
--bind="/nix/var/nix/profiles/per-container/$INSTANCE:/nix/var/nix/profiles" \
|
||||
--bind="/nix/var/nix/gcroots/per-container/$INSTANCE:/nix/var/nix/gcroots" \
|
||||
"''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/init"
|
||||
--setenv PRIVATE_NETWORK="$PRIVATE_NETWORK" \
|
||||
--setenv HOST_ADDRESS="$HOST_ADDRESS" \
|
||||
--setenv LOCAL_ADDRESS="$LOCAL_ADDRESS" \
|
||||
--setenv PATH="$PATH" \
|
||||
${containerInit} "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/init"
|
||||
'';
|
||||
|
||||
postStart =
|
||||
@ -237,6 +222,17 @@ in
|
||||
# until the start timeout expires if systemd-nspawn exits.
|
||||
read x < $root/var/lib/startup-done
|
||||
rm -f $root/var/lib/startup-done
|
||||
|
||||
if [ "$PRIVATE_NETWORK" = 1 ]; then
|
||||
ifaceHost=ve-$INSTANCE
|
||||
ip link set dev $ifaceHost up
|
||||
if [ -n "$HOST_ADDRESS" ]; then
|
||||
ip addr add $HOST_ADDRESS dev $ifaceHost
|
||||
fi
|
||||
if [ -n "$LOCAL_ADDRESS" ]; then
|
||||
ip route add $LOCAL_ADDRESS dev $ifaceHost
|
||||
fi
|
||||
fi
|
||||
'';
|
||||
|
||||
preStop =
|
||||
@ -251,14 +247,13 @@ in
|
||||
''
|
||||
#! ${pkgs.stdenv.shell} -e
|
||||
SYSTEM_PATH=/nix/var/nix/profiles/system
|
||||
if [ -f "/etc/containers/$INSTANCE.conf" ]; then
|
||||
. "/etc/containers/$INSTANCE.conf"
|
||||
fi
|
||||
echo $SYSTEM_PATH/bin/switch-to-configuration test | \
|
||||
${pkgs.socat}/bin/socat unix:$root/var/lib/run-command.socket -
|
||||
'';
|
||||
|
||||
serviceConfig.SyslogIdentifier = "container %i";
|
||||
|
||||
serviceConfig.EnvironmentFile = "-/etc/containers/%i.conf";
|
||||
};
|
||||
|
||||
# Generate a configuration file in /etc/containers for each
|
||||
@ -288,6 +283,8 @@ in
|
||||
${cfg.localAddress} ${name}.containers
|
||||
'') config.containers);
|
||||
|
||||
networking.dhcpcd.denyInterfaces = [ "ve-*" ];
|
||||
|
||||
environment.systemPackages = [ nixos-container ];
|
||||
|
||||
};
|
||||
|
@ -1,50 +0,0 @@
|
||||
#define _GNU_SOURCE
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include <unistd.h>
|
||||
#include <sched.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/mount.h>
|
||||
#include <fcntl.h>
|
||||
#include <linux/limits.h>
|
||||
|
||||
int main(int argc, char * * argv)
|
||||
{
|
||||
if (argc < 3) {
|
||||
fprintf(stderr, "%s: missing arguments\n", argv[0]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
char nsPath[PATH_MAX];
|
||||
|
||||
sprintf(nsPath, "/run/netns/%s", argv[1]);
|
||||
|
||||
int fd = open(nsPath, O_RDONLY);
|
||||
if (fd == -1) {
|
||||
fprintf(stderr, "%s: opening network namespace: %s\n", argv[0], strerror(errno));
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (setns(fd, CLONE_NEWNET) == -1) {
|
||||
fprintf(stderr, "%s: setting network namespace: %s\n", argv[0], strerror(errno));
|
||||
return 1;
|
||||
}
|
||||
|
||||
umount2(nsPath, MNT_DETACH);
|
||||
if (unlink(nsPath) == -1) {
|
||||
fprintf(stderr, "%s: unlinking network namespace: %s\n", argv[0], strerror(errno));
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* FIXME: Remount /sys so that /sys/class/net reflects the
|
||||
interfaces visible in the network namespace. This requires
|
||||
bind-mounting /sys/fs/cgroups etc. */
|
||||
|
||||
execv(argv[2], argv + 2);
|
||||
fprintf(stderr, "%s: running command: %s\n", argv[0], strerror(errno));
|
||||
return 1;
|
||||
}
|
Loading…
Reference in New Issue
Block a user