declarative containers: additional veths

With these changes, a container can have more then one veth-pair. This allows for example to have LAN and DMZ as bridges on the host and add dedicated containers for proxies, ipv4-firewall and ipv6-firewall. Or to have a bridge for normal WAN, one bridge for administration and one bridge for customer-internal communication. So that web-server containers can be reached from outside per http, from the management via ssh and can talk to their database via the customer network.

The scripts to set up the containers are now rendered several times instead of just one template. The scripts now contain per-container code to configure the extra veth interfaces. The default template without support for extra-veths is still rendered for the imperative containers.

Also a test is there to see if extra veths can be placed into host-bridges or can be reached via routing.
This commit is contained in:
Arnold Krille 2016-05-07 00:04:28 +02:00
parent 263fff0ab8
commit 9045a8e24c
3 changed files with 386 additions and 181 deletions

View File

@ -6,35 +6,212 @@ let
# The container's init script, a small wrapper around the regular
# NixOS stage-2 init script.
containerInit = pkgs.writeScript "container-init"
containerInit = (cfg:
let
renderExtraVeth = (name: cfg:
''
echo "Bringing ${name} up"
ip link set dev ${name} up
${optionalString (cfg . "localAddress" or null != null) ''
echo "Setting ip for ${name}"
ip addr add ${cfg . "localAddress"} dev ${name}
''}
${optionalString (cfg . "localAddress6" or null != null) ''
echo "Setting ip6 for ${name}"
ip -6 addr add ${cfg . "localAddress6"} dev ${name}
''}
${optionalString (cfg . "hostAddress" or null != null) ''
echo "Setting route to host for ${name}"
ip route add ${cfg . "hostAddress"} dev ${name}
''}
${optionalString (cfg . "hostAddress6" or null != null) ''
echo "Setting route6 to host for ${name}"
ip -6 route add ${cfg . "hostAddress6"} dev ${name}
''}
''
);
in
pkgs.writeScript "container-init"
''
#! ${pkgs.stdenv.shell} -e
# Initialise the container side of the veth pair.
if [ "$PRIVATE_NETWORK" = 1 ]; then
ip link set host0 name eth0
ip link set dev eth0 up
if [ -n "$LOCAL_ADDRESS" ]; then
ip addr add $LOCAL_ADDRESS dev eth0
fi
if [ -n "$LOCAL_ADDRESS6" ]; then
ip -6 addr add $LOCAL_ADDRESS6 dev eth0
fi
if [ -n "$HOST_ADDRESS" ]; then
ip route add $HOST_ADDRESS dev eth0
ip route add default via $HOST_ADDRESS
fi
if [ -n "$HOST_ADDRESS6" ]; then
ip -6 route add $HOST_ADDRESS6 dev eth0
ip -6 route add default via $HOST_ADDRESS6
fi
${concatStringsSep "\n" (mapAttrsToList renderExtraVeth cfg . "extraVeths" or {})}
ip a
ip r
fi
# Start the regular stage 1 script.
exec "$1"
''
);
nspawnExtraVethArgs = (name: cfg: "--network-veth-extra=${name}");
startScript = (cfg:
''
#! ${pkgs.stdenv.shell} -e
mkdir -p -m 0755 "$root/etc" "$root/var/lib"
mkdir -p -m 0700 "$root/var/lib/private" "$root/root" /run/containers
if ! [ -e "$root/etc/os-release" ]; then
touch "$root/etc/os-release"
fi
if ! [ -e "$root/etc/machine-id" ]; then
touch "$root/etc/machine-id"
fi
mkdir -p -m 0755 \
"/nix/var/nix/profiles/per-container/$INSTANCE" \
"/nix/var/nix/gcroots/per-container/$INSTANCE"
cp --remove-destination /etc/resolv.conf "$root/etc/resolv.conf"
# Initialise the container side of the veth pair.
if [ "$PRIVATE_NETWORK" = 1 ]; then
ip link set host0 name eth0
ip link set dev eth0 up
if [ -n "$LOCAL_ADDRESS" ]; then
ip addr add $LOCAL_ADDRESS dev eth0
fi
if [ -n "$LOCAL_ADDRESS6" ]; then
ip -6 addr add $LOCAL_ADDRESS6 dev eth0
fi
if [ -n "$HOST_ADDRESS" ]; then
ip route add $HOST_ADDRESS dev eth0
ip route add default via $HOST_ADDRESS
fi
if [ -n "$HOST_ADDRESS6" ]; then
ip -6 route add $HOST_ADDRESS6 dev eth0
ip -6 route add default via $HOST_ADDRESS6
extraFlags+=" --network-veth"
if [ -n "$HOST_BRIDGE" ]; then
extraFlags+=" --network-bridge=$HOST_BRIDGE"
fi
fi
# Start the regular stage 1 script.
exec "$1"
'';
${if cfg . "extraVeths" or null != null then
''extraFlags+=" ${concatStringsSep " " (mapAttrsToList nspawnExtraVethArgs cfg . "extraVeths" or {})}"''
else
''# No extra veth pairs to create''
}
for iface in $INTERFACES; do
extraFlags+=" --network-interface=$iface"
done
for iface in $MACVLANS; do
extraFlags+=" --network-macvlan=$iface"
done
# If the host is 64-bit and the container is 32-bit, add a
# --personality flag.
${optionalString (config.nixpkgs.system == "x86_64-linux") ''
if [ "$(< ''${SYSTEM_PATH:-/nix/var/nix/profiles/per-container/$INSTANCE/system}/system)" = i686-linux ]; then
extraFlags+=" --personality=x86"
fi
''}
# Run systemd-nspawn without startup notification (we'll
# wait for the container systemd to signal readiness).
EXIT_ON_REBOOT=1 \
exec ${config.systemd.package}/bin/systemd-nspawn \
--keep-unit \
-M "$INSTANCE" -D "$root" $extraFlags \
$EXTRA_NSPAWN_FLAGS \
--notify-ready=yes \
--bind-ro=/nix/store \
--bind-ro=/nix/var/nix/db \
--bind-ro=/nix/var/nix/daemon-socket \
--bind="/nix/var/nix/profiles/per-container/$INSTANCE:/nix/var/nix/profiles" \
--bind="/nix/var/nix/gcroots/per-container/$INSTANCE:/nix/var/nix/gcroots" \
--setenv PRIVATE_NETWORK="$PRIVATE_NETWORK" \
--setenv HOST_BRIDGE="$HOST_BRIDGE" \
--setenv HOST_ADDRESS="$HOST_ADDRESS" \
--setenv LOCAL_ADDRESS="$LOCAL_ADDRESS" \
--setenv HOST_ADDRESS6="$HOST_ADDRESS6" \
--setenv LOCAL_ADDRESS6="$LOCAL_ADDRESS6" \
--setenv PATH="$PATH" \
${containerInit cfg} "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/init"
''
);
preStartScript = (cfg:
''
# Clean up existing machined registration and interfaces.
machinectl terminate "$INSTANCE" 2> /dev/null || true
if [ "$PRIVATE_NETWORK" = 1 ]; then
ip link del dev "ve-$INSTANCE" 2> /dev/null || true
ip link del dev "vb-$INSTANCE" 2> /dev/null || true
fi
${concatStringsSep "\n" (
mapAttrsToList (name: cfg:
''ip link del dev ${name} 2> /dev/null || true ''
) cfg . "extraVeths" or {}
)}
''
);
postStartScript = (cfg:
let
ipcall = (cfg: ipcmd: variable: attribute:
if cfg . attribute or null == null then
''
if [ -n "${variable}" ]; then
${ipcmd} add ${variable} dev $ifaceHost
fi
''
else
''${ipcmd} add ${cfg . attribute} dev $ifaceHost''
);
renderExtraVeth = (name: cfg:
if cfg . "hostBridge" or null != null then
''
# Add ${name} to bridge ${cfg.hostBridge}
ip link set dev ${name} master ${cfg.hostBridge} up
''
else
''
# Set IPs and routes for ${name}
${optionalString (cfg . "hostAddress" or null != null) ''
ip addr add ${cfg . "hostAddress"} dev ${name}
''}
${optionalString (cfg . "hostAddress6" or null != null) ''
ip -6 addr add ${cfg . "hostAddress6"} dev ${name}
''}
${optionalString (cfg . "localAddress" or null != null) ''
ip route add ${cfg . "localAddress"} dev ${name}
''}
${optionalString (cfg . "localAddress6" or null != null) ''
ip -6 route add ${cfg . "localAddress6"} dev ${name}
''}
''
);
in
''
if [ "$PRIVATE_NETWORK" = 1 ]; then
if [ -z "$HOST_BRIDGE" ]; then
ifaceHost=ve-$INSTANCE
ip link set dev $ifaceHost up
${ipcall cfg "ip addr" "$HOST_ADDRESS" "hostAddress"}
${ipcall cfg "ip -6 addr" "$HOST_ADDRESS6" "hostAddress6"}
${ipcall cfg "ip route" "$LOCAL_ADDRESS" "localAddress"}
${ipcall cfg "ip -6 route" "$LOCAL_ADDRESS6" "localAddress6"}
fi
${concatStringsSep "\n" (mapAttrsToList renderExtraVeth cfg . "extraVeths" or {})}
fi
# Get the leader PID so that we can signal it in
# preStop. We can't use machinectl there because D-Bus
# might be shutting down. FIXME: in systemd 219 we can
# just signal systemd-nspawn to do a clean shutdown.
machinectl show "$INSTANCE" | sed 's/Leader=\(.*\)/\1/;t;d' > "/run/containers/$INSTANCE.pid"
''
);
system = config.nixpkgs.system;
@ -73,6 +250,63 @@ let
mkBindFlags = bs: concatMapStrings mkBindFlag (lib.attrValues bs);
networkOptions = {
hostBridge = mkOption {
type = types.nullOr types.string;
default = null;
example = "br0";
description = ''
Put the host-side of the veth-pair into the named bridge.
Only one of hostAddress* or hostBridge can be given.
'';
};
hostAddress = mkOption {
type = types.nullOr types.str;
default = null;
example = "10.231.136.1";
description = ''
The IPv4 address assigned to the host interface.
(Not used when hostBridge is set.)
'';
};
hostAddress6 = mkOption {
type = types.nullOr types.string;
default = null;
example = "fc00::1";
description = ''
The IPv6 address assigned to the host interface.
(Not used when hostBridge is set.)
'';
};
localAddress = mkOption {
type = types.nullOr types.str;
default = null;
example = "10.231.136.2";
description = ''
The IPv4 address assigned to the interface in the container.
If a hostBridge is used, this should be given with netmask to access
the whole network. Otherwise the default netmask is /32 and routing is
set up from localAddress to hostAddress and back.
'';
};
localAddress6 = mkOption {
type = types.nullOr types.string;
default = null;
example = "fc00::2";
description = ''
The IPv6 address assigned to the interface in the container.
If a hostBridge is used, this should be given with netmask to access
the whole network. Otherwise the default netmask is /128 and routing is
set up from localAddress6 to hostAddress6 and back.
'';
};
};
in
{
@ -133,56 +367,6 @@ in
'';
};
hostBridge = mkOption {
type = types.nullOr types.string;
default = null;
example = "br0";
description = ''
Put the host-side of the veth-pair into the named bridge.
Only one of hostAddress* or hostBridge can be given.
'';
};
hostAddress = mkOption {
type = types.nullOr types.str;
default = null;
example = "10.231.136.1";
description = ''
The IPv4 address assigned to the host interface.
(Not used when hostBridge is set.)
'';
};
hostAddress6 = mkOption {
type = types.nullOr types.string;
default = null;
example = "fc00::1";
description = ''
The IPv6 address assigned to the host interface.
(Not used when hostBridge is set.)
'';
};
localAddress = mkOption {
type = types.nullOr types.str;
default = null;
example = "10.231.136.2";
description = ''
The IPv4 address assigned to <literal>eth0</literal>
in the container.
'';
};
localAddress6 = mkOption {
type = types.nullOr types.string;
default = null;
example = "fc00::2";
description = ''
The IPv6 address assigned to <literal>eth0</literal>
in the container.
'';
};
interfaces = mkOption {
type = types.listOf types.string;
default = [];
@ -192,6 +376,15 @@ in
'';
};
extraVeths = mkOption {
type = types.attrsOf types.optionSet;
default = {};
options = networkOptions;
description = ''
Extra veth-pairs to be created for the container
'';
};
autoStart = mkOption {
type = types.bool;
default = false;
@ -214,7 +407,7 @@ in
'';
};
};
} // networkOptions;
config = mkMerge
[ (mkIf options.config.isDefined {
@ -272,108 +465,11 @@ in
environment.INSTANCE = "%i";
environment.root = "/var/lib/containers/%i";
preStart =
''
# Clean up existing machined registration and interfaces.
machinectl terminate "$INSTANCE" 2> /dev/null || true
preStart = preStartScript {};
if [ "$PRIVATE_NETWORK" = 1 ]; then
ip link del dev "ve-$INSTANCE" 2> /dev/null || true
ip link del dev "vb-$INSTANCE" 2> /dev/null || true
fi
'';
script = startScript {};
script =
''
mkdir -p -m 0755 "$root/etc" "$root/var/lib"
mkdir -p -m 0700 "$root/var/lib/private" "$root/root" /run/containers
if ! [ -e "$root/etc/os-release" ]; then
touch "$root/etc/os-release"
fi
if ! [ -e "$root/etc/machine-id" ]; then
touch "$root/etc/machine-id"
fi
mkdir -p -m 0755 \
"/nix/var/nix/profiles/per-container/$INSTANCE" \
"/nix/var/nix/gcroots/per-container/$INSTANCE"
cp --remove-destination /etc/resolv.conf "$root/etc/resolv.conf"
if [ "$PRIVATE_NETWORK" = 1 ]; then
extraFlags+=" --network-veth"
if [ -n "$HOST_BRIDGE" ]; then
extraFlags+=" --network-bridge=$HOST_BRIDGE"
fi
fi
for iface in $INTERFACES; do
extraFlags+=" --network-interface=$iface"
done
for iface in $MACVLANS; do
extraFlags+=" --network-macvlan=$iface"
done
# If the host is 64-bit and the container is 32-bit, add a
# --personality flag.
${optionalString (config.nixpkgs.system == "x86_64-linux") ''
if [ "$(< ''${SYSTEM_PATH:-/nix/var/nix/profiles/per-container/$INSTANCE/system}/system)" = i686-linux ]; then
extraFlags+=" --personality=x86"
fi
''}
# Run systemd-nspawn without startup notification (we'll
# wait for the container systemd to signal readiness).
EXIT_ON_REBOOT=1 \
exec ${config.systemd.package}/bin/systemd-nspawn \
--keep-unit \
-M "$INSTANCE" -D "$root" $extraFlags \
$EXTRA_NSPAWN_FLAGS \
--notify-ready=yes \
--bind-ro=/nix/store \
--bind-ro=/nix/var/nix/db \
--bind-ro=/nix/var/nix/daemon-socket \
--bind="/nix/var/nix/profiles/per-container/$INSTANCE:/nix/var/nix/profiles" \
--bind="/nix/var/nix/gcroots/per-container/$INSTANCE:/nix/var/nix/gcroots" \
--setenv PRIVATE_NETWORK="$PRIVATE_NETWORK" \
--setenv HOST_BRIDGE="$HOST_BRIDGE" \
--setenv HOST_ADDRESS="$HOST_ADDRESS" \
--setenv LOCAL_ADDRESS="$LOCAL_ADDRESS" \
--setenv HOST_ADDRESS6="$HOST_ADDRESS6" \
--setenv LOCAL_ADDRESS6="$LOCAL_ADDRESS6" \
--setenv PATH="$PATH" \
${containerInit} "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/init"
'';
postStart =
''
if [ "$PRIVATE_NETWORK" = 1 ]; then
if [ -z "$HOST_BRIDGE" ]; then
ifaceHost=ve-$INSTANCE
ip link set dev $ifaceHost up
if [ -n "$HOST_ADDRESS" ]; then
ip addr add $HOST_ADDRESS dev $ifaceHost
fi
if [ -n "$HOST_ADDRESS6" ]; then
ip -6 addr add $HOST_ADDRESS6 dev $ifaceHost
fi
if [ -n "$LOCAL_ADDRESS" ]; then
ip route add $LOCAL_ADDRESS dev $ifaceHost
fi
if [ -n "$LOCAL_ADDRESS6" ]; then
ip -6 route add $LOCAL_ADDRESS6 dev $ifaceHost
fi
fi
fi
# Get the leader PID so that we can signal it in
# preStop. We can't use machinectl there because D-Bus
# might be shutting down. FIXME: in systemd 219 we can
# just signal systemd-nspawn to do a clean shutdown.
machinectl show "$INSTANCE" | sed 's/Leader=\(.*\)/\1/;t;d' > "/run/containers/$INSTANCE.pid"
'';
postStart = postStartScript {};
preStop =
''
@ -425,15 +521,20 @@ in
[{ name = "container@"; value = unit; }]
# declarative containers
++ (mapAttrsToList (name: cfg: nameValuePair "container@${name}" (
unit // {
preStart = preStartScript cfg;
script = startScript cfg;
postStart = postStartScript cfg;
} // (
if cfg.autoStart then
unit // {
{
wantedBy = [ "multi-user.target" ];
wants = [ "network.target" ];
after = [ "network.target" ];
restartTriggers = [ cfg.path ];
reloadIfChanged = true;
}
else null
else {})
)) config.containers)
));
@ -462,11 +563,11 @@ in
LOCAL_ADDRESS6=${cfg.localAddress6}
''}
''}
INTERFACES="${toString cfg.interfaces}"
${optionalString cfg.autoStart ''
AUTO_START=1
''}
EXTRA_NSPAWN_FLAGS="${mkBindFlags cfg.bindMounts}"
INTERFACES="${toString cfg.interfaces}"
${optionalString cfg.autoStart ''
AUTO_START=1
''}
EXTRA_NSPAWN_FLAGS="${mkBindFlags cfg.bindMounts}"
'';
}) config.containers;

View File

@ -218,6 +218,7 @@ in rec {
tests.containers-ipv6 = callTest tests/containers-ipv6.nix {};
tests.containers-bridge = callTest tests/containers-bridge.nix {};
tests.containers-imperative = callTest tests/containers-imperative.nix {};
tests.containers-extra_veth = callTest tests/containers-extra_veth.nix {};
tests.docker = hydraJob (import tests/docker.nix { system = "x86_64-linux"; });
tests.dockerRegistry = hydraJob (import tests/docker-registry.nix { system = "x86_64-linux"; });
tests.dnscrypt-proxy = callTest tests/dnscrypt-proxy.nix { system = "x86_64-linux"; };

View File

@ -0,0 +1,103 @@
# Test for NixOS' container support.
import ./make-test.nix ({ pkgs, ...} : {
name = "containers-bridge";
meta = with pkgs.stdenv.lib.maintainers; {
maintainers = [ aristid aszlig eelco chaoflow ];
};
machine =
{ config, pkgs, ... }:
{ imports = [ ../modules/installer/cd-dvd/channel.nix ];
virtualisation.writableStore = true;
virtualisation.memorySize = 768;
virtualisation.vlans = [];
networking.bridges = {
br0 = {
interfaces = [];
};
br1 = { interfaces = []; };
};
networking.interfaces = {
br0 = {
ip4 = [{ address = "192.168.0.1"; prefixLength = 24; }];
ip6 = [{ address = "fc00::1"; prefixLength = 7; }];
};
br1 = {
ip4 = [{ address = "192.168.1.1"; prefixLength = 24; }];
};
};
containers.webserver =
{
autoStart = true;
privateNetwork = true;
hostBridge = "br0";
localAddress = "192.168.0.100/24";
localAddress6 = "fc00::2/7";
extraVeths = {
veth1 = { hostBridge = "br1"; localAddress = "192.168.1.100/24"; };
veth2 = { hostAddress = "192.168.2.1"; localAddress = "192.168.2.100"; };
};
config =
{
networking.firewall.allowedTCPPorts = [ 80 ];
networking.firewall.allowPing = true;
};
};
virtualisation.pathsInNixDB = [ pkgs.stdenv ];
};
testScript =
''
$machine->waitForUnit("default.target");
$machine->succeed("nixos-container list") =~ /webserver/ or die;
# Status of the webserver container.
$machine->succeed("nixos-container status webserver") =~ /up/ or die;
# Debug
#$machine->succeed("nixos-container run webserver -- ip link >&2");
# Ensure that the veths are inside the container
$machine->succeed("nixos-container run webserver -- ip link show veth1") =~ /state UP/ or die;
$machine->succeed("nixos-container run webserver -- ip link show veth2") =~ /state UP/ or die;
# Debug
#$machine->succeed("ip link >&2");
# Ensure the presence of the extra veths
$machine->succeed("ip link show veth1") =~ /state UP/ or die;
$machine->succeed("ip link show veth2") =~ /state UP/ or die;
# Ensure the veth1 is part of br1 on the host
$machine->succeed("ip link show veth1") =~ /master br1/ or die;
# Debug
#$machine->succeed("ip -4 a >&2");
#$machine->succeed("ip -4 r >&2");
#$machine->succeed("nixos-container run webserver -- ip link >&2");
#$machine->succeed("nixos-container run webserver -- ip -4 a >&2");
#$machine->succeed("nixos-container run webserver -- ip -4 r >&2");
# Ping on main veth
$machine->succeed("ping -n -c 1 192.168.0.100");
$machine->succeed("ping6 -n -c 1 fc00::2");
# Ping on the first extra veth
$machine->succeed("ping -n -c 1 192.168.1.100 >&2");
# Ping on the second extra veth
$machine->succeed("ping -n -c 1 192.168.2.100 >&2");
# Stop the container.
$machine->succeed("nixos-container stop webserver");
$machine->fail("ping -n -c 1 192.168.1.100 >&2");
$machine->fail("ping -n -c 1 192.168.2.100 >&2");
# Destroying a declarative container should fail.
$machine->fail("nixos-container destroy webserver");
'';
})