nixos/kubernetes: docker -> containerd

also, nixos/containerd: module init
This commit is contained in:
Johan Thomsen 2021-02-25 16:00:59 +01:00 committed by zowoq
parent 7da62867be
commit 7b5c38e973
12 changed files with 129 additions and 185 deletions

View File

@ -788,6 +788,15 @@ self: super:
and use Maturin as their build tool. and use Maturin as their build tool.
</para> </para>
</listitem> </listitem>
<listitem>
<para>
Kubernetes has <link xlink:href="https://kubernetes.io/blog/2020/12/02/dont-panic-kubernetes-and-docker/">deprecated docker</link> as container runtime.
As a consequence, the Kubernetes module now has support for configuration of custom remote container runtimes and enables containerd by default.
Note that containerd is more strict regarding container image OCI-compliance.
As an example, images with CMD or ENTRYPOINT defined as strings (not lists) will fail on containerd, while working fine on docker.
Please test your setup and container images with containerd prior to upgrading.
</para>
</listitem>
</itemizedlist> </itemizedlist>
</section> </section>
</section> </section>

View File

@ -1053,6 +1053,7 @@
./testing/service-runner.nix ./testing/service-runner.nix
./virtualisation/anbox.nix ./virtualisation/anbox.nix
./virtualisation/container-config.nix ./virtualisation/container-config.nix
./virtualisation/containerd.nix
./virtualisation/containers.nix ./virtualisation/containers.nix
./virtualisation/nixos-containers.nix ./virtualisation/nixos-containers.nix
./virtualisation/oci-containers.nix ./virtualisation/oci-containers.nix

View File

@ -260,7 +260,6 @@ in
account token issuer. The issuer will sign issued ID tokens with this account token issuer. The issuer will sign issued ID tokens with this
private key. private key.
''; '';
default = top.serviceAccountSigningKeyFile;
type = path; type = path;
}; };
@ -272,7 +271,6 @@ in
different files. If unspecified, --tls-private-key-file is used. different files. If unspecified, --tls-private-key-file is used.
Must be specified when --service-account-signing-key is provided Must be specified when --service-account-signing-key is provided
''; '';
default = top.serviceAccountKeyFile;
type = path; type = path;
}; };

View File

@ -5,6 +5,29 @@ with lib;
let let
cfg = config.services.kubernetes; cfg = config.services.kubernetes;
defaultContainerdConfigFile = pkgs.writeText "containerd.toml" ''
version = 2
root = "/var/lib/containerd/daemon"
state = "/var/run/containerd/daemon"
oom_score = 0
[grpc]
address = "/var/run/containerd/containerd.sock"
[plugins."io.containerd.grpc.v1.cri"]
sandbox_image = "pause:latest"
[plugins."io.containerd.grpc.v1.cri".cni]
bin_dir = "/opt/cni/bin"
max_conf_num = 0
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
runtime_type = "io.containerd.runc.v2"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes."io.containerd.runc.v2".options]
SystemdCgroup = true
'';
mkKubeConfig = name: conf: pkgs.writeText "${name}-kubeconfig" (builtins.toJSON { mkKubeConfig = name: conf: pkgs.writeText "${name}-kubeconfig" (builtins.toJSON {
apiVersion = "v1"; apiVersion = "v1";
kind = "Config"; kind = "Config";
@ -222,14 +245,9 @@ in {
}) })
(mkIf cfg.kubelet.enable { (mkIf cfg.kubelet.enable {
virtualisation.docker = { virtualisation.containerd = {
enable = mkDefault true; enable = mkDefault true;
configFile = mkDefault defaultContainerdConfigFile;
# kubernetes needs access to logs
logDriver = mkDefault "json-file";
# iptables must be disabled for kubernetes
extraOptions = "--iptables=false --ip-masq=false";
}; };
}) })
@ -269,7 +287,6 @@ in {
users.users.kubernetes = { users.users.kubernetes = {
uid = config.ids.uids.kubernetes; uid = config.ids.uids.kubernetes;
description = "Kubernetes user"; description = "Kubernetes user";
extraGroups = [ "docker" ];
group = "kubernetes"; group = "kubernetes";
home = cfg.dataDir; home = cfg.dataDir;
createHome = true; createHome = true;

View File

@ -8,16 +8,6 @@ let
# we want flannel to use kubernetes itself as configuration backend, not direct etcd # we want flannel to use kubernetes itself as configuration backend, not direct etcd
storageBackend = "kubernetes"; storageBackend = "kubernetes";
# needed for flannel to pass options to docker
mkDockerOpts = pkgs.runCommand "mk-docker-opts" {
buildInputs = [ pkgs.makeWrapper ];
} ''
mkdir -p $out
# bashInteractive needed for `compgen`
makeWrapper ${pkgs.bashInteractive}/bin/bash $out/mk-docker-opts --add-flags "${pkgs.kubernetes}/bin/mk-docker-opts.sh"
'';
in in
{ {
###### interface ###### interface
@ -43,43 +33,17 @@ in
cniVersion = "0.3.1"; cniVersion = "0.3.1";
delegate = { delegate = {
isDefaultGateway = true; isDefaultGateway = true;
bridge = "docker0"; bridge = "mynet";
}; };
}]; }];
}; };
systemd.services.mk-docker-opts = {
description = "Pre-Docker Actions";
path = with pkgs; [ gawk gnugrep ];
script = ''
${mkDockerOpts}/mk-docker-opts -d /run/flannel/docker
systemctl restart docker
'';
serviceConfig.Type = "oneshot";
};
systemd.paths.flannel-subnet-env = {
wantedBy = [ "flannel.service" ];
pathConfig = {
PathModified = "/run/flannel/subnet.env";
Unit = "mk-docker-opts.service";
};
};
systemd.services.docker = {
environment.DOCKER_OPTS = "-b none";
serviceConfig.EnvironmentFile = "-/run/flannel/docker";
};
# read environment variables generated by mk-docker-opts
virtualisation.docker.extraOptions = "$DOCKER_OPTS";
networking = { networking = {
firewall.allowedUDPPorts = [ firewall.allowedUDPPorts = [
8285 # flannel udp 8285 # flannel udp
8472 # flannel vxlan 8472 # flannel vxlan
]; ];
dhcpcd.denyInterfaces = [ "docker*" "flannel*" ]; dhcpcd.denyInterfaces = [ "mynet*" "flannel*" ];
}; };
services.kubernetes.pki.certs = { services.kubernetes.pki.certs = {

View File

@ -23,7 +23,7 @@ let
name = "pause"; name = "pause";
tag = "latest"; tag = "latest";
contents = top.package.pause; contents = top.package.pause;
config.Cmd = "/bin/pause"; config.Cmd = ["/bin/pause"];
}; };
kubeconfig = top.lib.mkKubeConfig "kubelet" cfg.kubeconfig; kubeconfig = top.lib.mkKubeConfig "kubelet" cfg.kubeconfig;
@ -134,7 +134,7 @@ in
containerRuntimeEndpoint = mkOption { containerRuntimeEndpoint = mkOption {
description = "Endpoint at which to find the container runtime api interface/socket"; description = "Endpoint at which to find the container runtime api interface/socket";
type = str; type = str;
default = "unix:///var/run/docker/containerd/containerd.sock"; default = "unix:///var/run/containerd/containerd.sock";
}; };
enable = mkEnableOption "Kubernetes kubelet."; enable = mkEnableOption "Kubernetes kubelet.";
@ -247,16 +247,24 @@ in
###### implementation ###### implementation
config = mkMerge [ config = mkMerge [
(mkIf cfg.enable { (mkIf cfg.enable {
environment.etc."cni/net.d".source = cniConfig;
services.kubernetes.kubelet.seedDockerImages = [infraContainer]; services.kubernetes.kubelet.seedDockerImages = [infraContainer];
boot.kernel.sysctl = {
"net.bridge.bridge-nf-call-iptables" = 1;
"net.ipv4.ip_forward" = 1;
"net.bridge.bridge-nf-call-ip6tables" = 1;
};
systemd.services.kubelet = { systemd.services.kubelet = {
description = "Kubernetes Kubelet Service"; description = "Kubernetes Kubelet Service";
wantedBy = [ "kubernetes.target" ]; wantedBy = [ "kubernetes.target" ];
after = [ "network.target" "kube-apiserver.service" "sockets.target" ]; after = [ "containerd.service" "network.target" "kube-apiserver.service" ];
path = with pkgs; [ path = with pkgs; [
gitMinimal gitMinimal
openssh openssh
docker
util-linux util-linux
iproute iproute
ethtool ethtool
@ -266,8 +274,12 @@ in
] ++ lib.optional config.boot.zfs.enabled config.boot.zfs.package ++ top.path; ] ++ lib.optional config.boot.zfs.enabled config.boot.zfs.package ++ top.path;
preStart = '' preStart = ''
${concatMapStrings (img: '' ${concatMapStrings (img: ''
echo "Seeding docker image: ${img}" echo "Seeding container image: ${img}"
docker load <${img} ${if (lib.hasSuffix "gz" img) then
''${pkgs.gzip}/bin/zcat "${img}" | ${pkgs.containerd}/bin/ctr -n k8s.io image import -''
else
''${pkgs.coreutils}/bin/cat "${img}" | ${pkgs.containerd}/bin/ctr -n k8s.io image import -''
}
'') cfg.seedDockerImages} '') cfg.seedDockerImages}
rm /opt/cni/bin/* || true rm /opt/cni/bin/* || true
@ -320,6 +332,7 @@ in
${optionalString (cfg.verbosity != null) "--v=${toString cfg.verbosity}"} \ ${optionalString (cfg.verbosity != null) "--v=${toString cfg.verbosity}"} \
--container-runtime=${cfg.containerRuntime} \ --container-runtime=${cfg.containerRuntime} \
--container-runtime-endpoint=${cfg.containerRuntimeEndpoint} \ --container-runtime-endpoint=${cfg.containerRuntimeEndpoint} \
--cgroup-driver=systemd \
${cfg.extraOpts} ${cfg.extraOpts}
''; '';
WorkingDirectory = top.dataDir; WorkingDirectory = top.dataDir;
@ -329,7 +342,7 @@ in
# Allways include cni plugins # Allways include cni plugins
services.kubernetes.kubelet.cni.packages = [pkgs.cni-plugins]; services.kubernetes.kubelet.cni.packages = [pkgs.cni-plugins];
boot.kernelModules = ["br_netfilter"]; boot.kernelModules = ["br_netfilter" "overlay"];
services.kubernetes.kubelet.hostname = with config.networking; services.kubernetes.kubelet.hostname = with config.networking;
mkDefault (hostName + optionalString (domain != null) ".${domain}"); mkDefault (hostName + optionalString (domain != null) ".${domain}");

View File

@ -162,10 +162,7 @@ in {
NODE_NAME = cfg.nodeName; NODE_NAME = cfg.nodeName;
}; };
path = [ pkgs.iptables ]; path = [ pkgs.iptables ];
preStart = '' preStart = optionalString (cfg.storageBackend == "etcd") ''
mkdir -p /run/flannel
touch /run/flannel/docker
'' + optionalString (cfg.storageBackend == "etcd") ''
echo "setting network configuration" echo "setting network configuration"
until ${pkgs.etcdctl}/bin/etcdctl set /coreos.com/network/config '${builtins.toJSON networkConfig}' until ${pkgs.etcdctl}/bin/etcdctl set /coreos.com/network/config '${builtins.toJSON networkConfig}'
do do
@ -177,6 +174,7 @@ in {
ExecStart = "${cfg.package}/bin/flannel"; ExecStart = "${cfg.package}/bin/flannel";
Restart = "always"; Restart = "always";
RestartSec = "10s"; RestartSec = "10s";
RuntimeDirectory = "flannel";
}; };
}; };

View File

@ -0,0 +1,60 @@
{ pkgs, lib, config, ... }:
let
cfg = config.virtualisation.containerd;
containerdConfigChecked = pkgs.runCommand "containerd-config-checked.toml" { nativeBuildInputs = [pkgs.containerd]; } ''
containerd -c ${cfg.configFile} config dump >/dev/null
ln -s ${cfg.configFile} $out
'';
in
{
options.virtualisation.containerd = with lib.types; {
enable = lib.mkEnableOption "containerd container runtime";
configFile = lib.mkOption {
default = null;
description = "path to containerd config file";
type = nullOr path;
};
args = lib.mkOption {
default = {};
description = "extra args to append to the containerd cmdline";
type = attrsOf str;
};
};
config = lib.mkIf cfg.enable {
virtualisation.containerd.args.config = lib.mkIf (cfg.configFile != null) (toString containerdConfigChecked);
environment.systemPackages = [pkgs.containerd];
systemd.services.containerd = {
description = "containerd - container runtime";
wantedBy = [ "multi-user.target" ];
after = [ "network.target" ];
path = with pkgs; [
containerd
runc
iptables
];
serviceConfig = {
ExecStart = ''${pkgs.containerd}/bin/containerd ${lib.concatStringsSep " " (lib.cli.toGNUCommandLine {} cfg.args)}'';
Delegate = "yes";
KillMode = "process";
Type = "notify";
Restart = "always";
RestartSec = "5";
StartLimitBurst = "8";
StartLimitIntervalSec = "120s";
# "limits" defined below are adopted from upstream: https://github.com/containerd/containerd/blob/master/containerd.service
LimitNPROC = "infinity";
LimitCORE = "infinity";
LimitNOFILE = "infinity";
TasksMax = "infinity";
OOMScoreAdjust = "-999";
};
};
};
}

View File

@ -34,7 +34,7 @@ let
name = "redis"; name = "redis";
tag = "latest"; tag = "latest";
contents = [ pkgs.redis pkgs.bind.host ]; contents = [ pkgs.redis pkgs.bind.host ];
config.Entrypoint = "/bin/redis-server"; config.Entrypoint = ["/bin/redis-server"];
}; };
probePod = pkgs.writeText "probe-pod.json" (builtins.toJSON { probePod = pkgs.writeText "probe-pod.json" (builtins.toJSON {
@ -55,12 +55,11 @@ let
name = "probe"; name = "probe";
tag = "latest"; tag = "latest";
contents = [ pkgs.bind.host pkgs.busybox ]; contents = [ pkgs.bind.host pkgs.busybox ];
config.Entrypoint = "/bin/tail"; config.Entrypoint = ["/bin/tail"];
}; };
extraConfiguration = { config, pkgs, ... }: { extraConfiguration = { config, pkgs, lib, ... }: {
environment.systemPackages = [ pkgs.bind.host ]; environment.systemPackages = [ pkgs.bind.host ];
# virtualisation.docker.extraOptions = "--dns=${config.services.kubernetes.addons.dns.clusterIp}";
services.dnsmasq.enable = true; services.dnsmasq.enable = true;
services.dnsmasq.servers = [ services.dnsmasq.servers = [
"/cluster.local/${config.services.kubernetes.addons.dns.clusterIp}#53" "/cluster.local/${config.services.kubernetes.addons.dns.clusterIp}#53"
@ -77,7 +76,7 @@ let
# prepare machine1 for test # prepare machine1 for test
machine1.wait_until_succeeds("kubectl get node machine1.${domain} | grep -w Ready") machine1.wait_until_succeeds("kubectl get node machine1.${domain} | grep -w Ready")
machine1.wait_until_succeeds( machine1.wait_until_succeeds(
"docker load < ${redisImage}" "${pkgs.gzip}/bin/zcat ${redisImage} | ${pkgs.containerd}/bin/ctr -n k8s.io image import -"
) )
machine1.wait_until_succeeds( machine1.wait_until_succeeds(
"kubectl create -f ${redisPod}" "kubectl create -f ${redisPod}"
@ -86,7 +85,7 @@ let
"kubectl create -f ${redisService}" "kubectl create -f ${redisService}"
) )
machine1.wait_until_succeeds( machine1.wait_until_succeeds(
"docker load < ${probeImage}" "${pkgs.gzip}/bin/zcat ${probeImage} | ${pkgs.containerd}/bin/ctr -n k8s.io image import -"
) )
machine1.wait_until_succeeds( machine1.wait_until_succeeds(
"kubectl create -f ${probePod}" "kubectl create -f ${probePod}"
@ -118,7 +117,7 @@ let
# prepare machines for test # prepare machines for test
machine1.wait_until_succeeds("kubectl get node machine2.${domain} | grep -w Ready") machine1.wait_until_succeeds("kubectl get node machine2.${domain} | grep -w Ready")
machine2.wait_until_succeeds( machine2.wait_until_succeeds(
"docker load < ${redisImage}" "${pkgs.gzip}/bin/zcat ${redisImage} | ${pkgs.containerd}/bin/ctr -n k8s.io image import -"
) )
machine1.wait_until_succeeds( machine1.wait_until_succeeds(
"kubectl create -f ${redisPod}" "kubectl create -f ${redisPod}"
@ -127,7 +126,7 @@ let
"kubectl create -f ${redisService}" "kubectl create -f ${redisService}"
) )
machine2.wait_until_succeeds( machine2.wait_until_succeeds(
"docker load < ${probeImage}" "${pkgs.gzip}/bin/zcat ${probeImage} | ${pkgs.containerd}/bin/ctr -n k8s.io image import -"
) )
machine1.wait_until_succeeds( machine1.wait_until_succeeds(
"kubectl create -f ${probePod}" "kubectl create -f ${probePod}"

View File

@ -85,7 +85,7 @@ let
name = "kubectl"; name = "kubectl";
tag = "latest"; tag = "latest";
contents = [ kubectl pkgs.busybox kubectlPod2 ]; contents = [ kubectl pkgs.busybox kubectlPod2 ];
config.Entrypoint = "/bin/sh"; config.Entrypoint = ["/bin/sh"];
}; };
base = { base = {
@ -97,7 +97,7 @@ let
machine1.wait_until_succeeds("kubectl get node machine1.my.zyx | grep -w Ready") machine1.wait_until_succeeds("kubectl get node machine1.my.zyx | grep -w Ready")
machine1.wait_until_succeeds( machine1.wait_until_succeeds(
"docker load < ${kubectlImage}" "${pkgs.gzip}/bin/zcat ${kubectlImage} | ${pkgs.containerd}/bin/ctr -n k8s.io image import -"
) )
machine1.wait_until_succeeds( machine1.wait_until_succeeds(
@ -134,7 +134,7 @@ let
machine1.wait_until_succeeds("kubectl get node machine2.my.zyx | grep -w Ready") machine1.wait_until_succeeds("kubectl get node machine2.my.zyx | grep -w Ready")
machine2.wait_until_succeeds( machine2.wait_until_succeeds(
"docker load < ${kubectlImage}" "${pkgs.gzip}/bin/zcat ${kubectlImage} | ${pkgs.containerd}/bin/ctr -n k8s.io image import -"
) )
machine1.wait_until_succeeds( machine1.wait_until_succeeds(

View File

@ -77,8 +77,6 @@ stdenv.mkDerivation rec {
cp cluster/addons/addon-manager/kube-addons.sh $out/bin/kube-addons-lib.sh cp cluster/addons/addon-manager/kube-addons.sh $out/bin/kube-addons-lib.sh
cp ${./mk-docker-opts.sh} $out/bin/mk-docker-opts.sh
for tool in kubeadm kubectl; do for tool in kubeadm kubectl; do
installShellCompletion --cmd $tool \ installShellCompletion --cmd $tool \
--bash <($out/bin/$tool completion bash) \ --bash <($out/bin/$tool completion bash) \

View File

@ -1,113 +0,0 @@
#!/usr/bin/env bash
# Copyright 2014 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Generate Docker daemon options based on flannel env file.
# exit on any error
set -e
usage() {
echo "$0 [-f FLANNEL-ENV-FILE] [-d DOCKER-ENV-FILE] [-i] [-c] [-m] [-k COMBINED-KEY]
Generate Docker daemon options based on flannel env file
OPTIONS:
-f Path to flannel env file. Defaults to /run/flannel/subnet.env
-d Path to Docker env file to write to. Defaults to /run/docker_opts.env
-i Output each Docker option as individual var. e.g. DOCKER_OPT_MTU=1500
-c Output combined Docker options into DOCKER_OPTS var
-k Set the combined options key to this value (default DOCKER_OPTS=)
-m Do not output --ip-masq (useful for older Docker version)
" >/dev/stderr
exit 1
}
flannel_env="/run/flannel/subnet.env"
docker_env="/run/docker_opts.env"
combined_opts_key="DOCKER_OPTS"
indiv_opts=false
combined_opts=false
ipmasq=true
val=""
while getopts "f:d:icmk:" opt; do
case $opt in
f)
flannel_env=$OPTARG
;;
d)
docker_env=$OPTARG
;;
i)
indiv_opts=true
;;
c)
combined_opts=true
;;
m)
ipmasq=false
;;
k)
combined_opts_key=$OPTARG
;;
\?)
usage
;;
esac
done
if [[ $indiv_opts = false ]] && [[ $combined_opts = false ]]; then
indiv_opts=true
combined_opts=true
fi
if [[ -f "${flannel_env}" ]]; then
source "${flannel_env}"
fi
if [[ -n "$FLANNEL_SUBNET" ]]; then
# shellcheck disable=SC2034 # Variable name referenced in OPT_LOOP below
DOCKER_OPT_BIP="--bip=$FLANNEL_SUBNET"
fi
if [[ -n "$FLANNEL_MTU" ]]; then
# shellcheck disable=SC2034 # Variable name referenced in OPT_LOOP below
DOCKER_OPT_MTU="--mtu=$FLANNEL_MTU"
fi
if [[ "$FLANNEL_IPMASQ" = true ]] && [[ $ipmasq = true ]]; then
# shellcheck disable=SC2034 # Variable name referenced in OPT_LOOP below
DOCKER_OPT_IPMASQ="--ip-masq=false"
fi
eval docker_opts="\$${combined_opts_key}"
docker_opts+=" "
echo -n "" >"${docker_env}"
# OPT_LOOP
for opt in $(compgen -v DOCKER_OPT_); do
eval val=\$"${opt}"
if [[ "$indiv_opts" = true ]]; then
echo "$opt=\"$val\"" >>"${docker_env}"
fi
docker_opts+="$val "
done
if [[ "$combined_opts" = true ]]; then
echo "${combined_opts_key}=\"${docker_opts}\"" >>"${docker_env}"
fi