nixos/kubernetes: Stabilize services startup across machines

by adding targets and curl wait loops to services to ensure services
are not started before their depended services are reachable.

Extra targets cfssl-online.target and kube-apiserver-online.target
syncronize starts across machines and node-online.target ensures
docker is restarted and ready to deploy containers on after flannel
has discussed the network cidr with apiserver.

Since flannel needs to be started before addon-manager to configure
the docker interface, it has to have its own rbac bootstrap service.

The curl wait loops within the other services exists to ensure that when
starting the service it is able to do its work immediately without
clobbering the log about failing conditions.

By ensuring kubernetes.target is only reached after starting the
cluster it can be used in the tests as a wait condition.

In kube-certmgr-bootstrap mkdir is needed for it to not fail to start.

The following is the relevant part of systemctl list-dependencies

default.target
● ├─certmgr.service
● ├─cfssl.service
● ├─docker.service
● ├─etcd.service
● ├─flannel.service
● ├─kubernetes.target
● │ ├─kube-addon-manager.service
● │ ├─kube-proxy.service
● │ ├─kube-apiserver-online.target
● │ │ ├─flannel-rbac-bootstrap.service
● │ │ ├─kube-apiserver-online.service
● │ │ ├─kube-apiserver.service
● │ │ ├─kube-controller-manager.service
● │ │ └─kube-scheduler.service
● │ └─node-online.target
● │   ├─node-online.service
● │   ├─flannel.target
● │   │ ├─flannel.service
● │   │ └─mk-docker-opts.service
● │   └─kubelet.target
● │     └─kubelet.service
● ├─network-online.target
● │ └─cfssl-online.target
● │   ├─certmgr.service
● │   ├─cfssl-online.service
● │   └─kube-certmgr-bootstrap.service
This commit is contained in:
Christian Albrecht 2019-03-01 08:44:45 +01:00
parent f9e2f76a59
commit 62f03750e4
No known key found for this signature in database
GPG Key ID: 866AF4B25DF7EB00
11 changed files with 251 additions and 51 deletions

View File

@ -72,7 +72,7 @@ in
systemd.services.kube-addon-manager = {
description = "Kubernetes addon manager";
wantedBy = [ "kubernetes.target" ];
after = [ "kube-apiserver.service" ];
after = [ "kube-apiserver-online.target" "node-online.target" ];
environment.ADDON_PATH = "/etc/kubernetes/addons/";
path = [ pkgs.gawk ];
serviceConfig = {

View File

@ -293,8 +293,9 @@ in
in {
systemd.services.kube-apiserver = {
description = "Kubernetes APIServer Service";
wantedBy = [ "kubernetes.target" ];
after = [ "network.target" ];
wantedBy = [ "kube-apiserver-online.target" ];
after = [ "certmgr.service" ];
before = [ "kube-apiserver-online.target" ];
serviceConfig = {
Slice = "kubernetes.slice";
ExecStart = ''${top.package}/bin/kube-apiserver \
@ -459,7 +460,28 @@ in
};
}))
{
systemd.targets.kube-apiserver-online = {
wantedBy = [ "kubernetes.target" ];
before = [ "kubernetes.target" ];
};
systemd.services.kube-apiserver-online = mkIf top.flannel.enable {
description = "apiserver control plane is online";
wantedBy = [ "kube-apiserver-online.target" ];
after = [ "kube-scheduler.service" "kube-controller-manager.service" ];
before = [ "kube-apiserver-online.target" ];
preStart = ''
${top.lib.mkWaitCurl (with top.pki.certs.flannelClient; {
sleep = 3;
path = "/healthz";
cacert = top.caFile;
inherit cert key;
})}
'';
script = "echo apiserver control plane is online";
};
}
];
}

View File

@ -116,8 +116,17 @@ in
systemd.services.kube-controller-manager = {
description = "Kubernetes Controller Manager Service";
wantedBy = [ "kubernetes.target" ];
wantedBy = [ "kube-apiserver-online.target" ];
after = [ "kube-apiserver.service" ];
before = [ "kube-apiserver-online.target" ];
preStart = ''
${top.lib.mkWaitCurl (with top.pki.certs.controllerManagerClient; {
sleep = 1;
path = "/api";
cacert = top.caFile;
inherit cert key;
})}
'';
serviceConfig = {
RestartSec = "30s";
Restart = "on-failure";

View File

@ -73,6 +73,18 @@ let
};
};
mkWaitCurl = { address ? cfg.apiserverAddress, sleep ? 2, path ? "", args ? "-o /dev/null",
cacert ? null, cert ? null, key ? null, }: ''
while ! ${pkgs.curl}/bin/curl --fail-early -fs \
${if cacert != null then "--cacert ${cacert}" else ""} \
${if cert != null then "--cert ${cert}" else ""} \
${if key != null then "--key ${key}" else ""} \
${address}${path} ${args} ; do
sleep ${toString sleep}
echo Waiting to be able to reach ${address}${path}
done
'';
kubeConfigDefaults = {
server = mkDefault cfg.kubeconfig.server;
caFile = mkDefault cfg.kubeconfig.caFile;
@ -162,6 +174,7 @@ in {
inherit mkCert;
inherit mkKubeConfig;
inherit mkKubeConfigOptions;
inherit mkWaitCurl;
};
type = types.attrs;
};

View File

@ -27,7 +27,12 @@ in
};
###### implementation
config = mkIf cfg.enable {
config = mkIf cfg.enable (let
flannelBootstrapPaths = mkIf top.apiserver.enable [
top.pki.certs.clusterAdmin.cert
top.pki.certs.clusterAdmin.key
];
in {
services.flannel = {
enable = mkDefault true;
@ -48,8 +53,10 @@ in
}];
};
systemd.services."mk-docker-opts" = {
systemd.services.mk-docker-opts = {
description = "Pre-Docker Actions";
wantedBy = [ "flannel.target" ];
before = [ "flannel.target" ];
path = with pkgs; [ gawk gnugrep ];
script = ''
${mkDockerOpts}/mk-docker-opts -d /run/flannel/docker
@ -68,6 +75,17 @@ in
};
};
systemd.targets.flannel = {
wantedBy = [ "node-online.target" ];
before = [ "node-online.target" ];
};
systemd.services.flannel = {
wantedBy = [ "flannel.target" ];
after = [ "kubelet.target" ];
before = [ "flannel.target" ];
};
systemd.services.docker = {
environment.DOCKER_OPTS = "-b none";
serviceConfig.EnvironmentFile = "-/run/flannel/docker";
@ -93,44 +111,69 @@ in
};
# give flannel som kubernetes rbac permissions if applicable
services.kubernetes.addonManager.bootstrapAddons = mkIf ((storageBackend == "kubernetes") && (elem "RBAC" top.apiserver.authorizationMode)) {
systemd.services.flannel-rbac-bootstrap = mkIf (top.apiserver.enable && (elem "RBAC" top.apiserver.authorizationMode)) {
flannel-cr = {
apiVersion = "rbac.authorization.k8s.io/v1beta1";
kind = "ClusterRole";
metadata = { name = "flannel"; };
rules = [{
apiGroups = [ "" ];
resources = [ "pods" ];
verbs = [ "get" ];
}
{
apiGroups = [ "" ];
resources = [ "nodes" ];
verbs = [ "list" "watch" ];
}
{
apiGroups = [ "" ];
resources = [ "nodes/status" ];
verbs = [ "patch" ];
}];
};
wantedBy = [ "kube-apiserver-online.target" ];
after = [ "kube-apiserver-online.target" ];
before = [ "flannel.service" ];
path = with pkgs; [ kubectl ];
preStart = let
files = mapAttrsToList (n: v: pkgs.writeText "${n}.json" (builtins.toJSON v)) {
flannel-cr = {
apiVersion = "rbac.authorization.k8s.io/v1beta1";
kind = "ClusterRole";
metadata = { name = "flannel"; };
rules = [{
apiGroups = [ "" ];
resources = [ "pods" ];
verbs = [ "get" ];
}
{
apiGroups = [ "" ];
resources = [ "nodes" ];
verbs = [ "list" "watch" ];
}
{
apiGroups = [ "" ];
resources = [ "nodes/status" ];
verbs = [ "patch" ];
}];
};
flannel-crb = {
apiVersion = "rbac.authorization.k8s.io/v1beta1";
kind = "ClusterRoleBinding";
metadata = { name = "flannel"; };
roleRef = {
apiGroup = "rbac.authorization.k8s.io";
kind = "ClusterRole";
name = "flannel";
flannel-crb = {
apiVersion = "rbac.authorization.k8s.io/v1beta1";
kind = "ClusterRoleBinding";
metadata = { name = "flannel"; };
roleRef = {
apiGroup = "rbac.authorization.k8s.io";
kind = "ClusterRole";
name = "flannel";
};
subjects = [{
kind = "User";
name = "flannel-client";
}];
};
};
subjects = [{
kind = "User";
name = "flannel-client";
}];
};
in ''
${top.lib.mkWaitCurl (with top.pki.certs.clusterAdmin; {
path = "/";
cacert = top.caFile;
inherit cert key;
})}
kubectl -s ${top.apiserverAddress} --certificate-authority=${top.caFile} --client-certificate=${top.pki.certs.clusterAdmin.cert} --client-key=${top.pki.certs.clusterAdmin.key} apply -f ${concatStringsSep " \\\n -f " files}
'';
script = "echo Ok";
unitConfig.ConditionPathExists = flannelBootstrapPaths;
};
};
systemd.paths.flannel-rbac-bootstrap = mkIf top.apiserver.enable {
wantedBy = [ "flannel-rbac-bootstrap.service" ];
pathConfig = {
PathExists = flannelBootstrapPaths;
PathChanged = flannelBootstrapPaths;
};
};
});
}

View File

@ -252,8 +252,9 @@ in
systemd.services.kubelet = {
description = "Kubernetes Kubelet Service";
wantedBy = [ "kubernetes.target" ];
after = [ "network.target" "docker.service" "kube-apiserver.service" ];
wantedBy = [ "kubelet.target" ];
after = [ "kube-apiserver-online.target" ];
before = [ "kubelet.target" ];
path = with pkgs; [ gitMinimal openssh docker utillinux iproute ethtool thin-provisioning-tools iptables socat ] ++ top.path;
preStart = ''
${concatMapStrings (img: ''
@ -325,6 +326,30 @@ in
};
};
systemd.services.docker.before = [ "kubelet.service" ];
systemd.services.node-online = {
wantedBy = [ "node-online.target" ];
after = [ "flannel.target" "kubelet.target" ];
before = [ "node-online.target" ];
# it is complicated. flannel needs kubelet to run the pause container before
# it discusses the node CIDR with apiserver and afterwards configures and restarts
# dockerd. Until then prevent creating any pods because they have to be recreated anyway
# because the network of docker0 has been changed by flannel.
script = let
docker-env = "/run/flannel/docker";
flannel-date = "stat --print=%Y ${docker-env}";
docker-date = "systemctl show --property=ActiveEnterTimestamp --value docker";
in ''
while ! test -f ${docker-env} ; do sleep 1 ; done
while test `${flannel-date}` -gt `date +%s --date="$(${docker-date})"` ; do
sleep 1
done
'';
serviceConfig.Type = "oneshot";
serviceConfig.Slice = "kubernetes.slice";
};
# Allways include cni plugins
services.kubernetes.kubelet.cni.packages = [pkgs.cni-plugins];
@ -369,5 +394,16 @@ in
};
})
{
systemd.targets.kubelet = {
wantedBy = [ "node-online.target" ];
before = [ "node-online.target" ];
};
systemd.targets.node-online = {
wantedBy = [ "kubernetes.target" ];
before = [ "kubernetes.target" ];
};
}
];
}

View File

@ -119,6 +119,7 @@ in
cfsslCertPathPrefix = "${config.services.cfssl.dataDir}/cfssl";
cfsslCert = "${cfsslCertPathPrefix}.pem";
cfsslKey = "${cfsslCertPathPrefix}-key.pem";
cfsslPort = toString config.services.cfssl.port;
certmgrPaths = [
top.caFile
@ -191,13 +192,39 @@ in
chown cfssl "${cfsslAPITokenPath}" && chmod 400 "${cfsslAPITokenPath}"
'')]);
systemd.targets.cfssl-online = {
wantedBy = [ "network-online.target" ];
after = [ "cfssl.service" "network-online.target" "cfssl-online.service" ];
};
systemd.services.cfssl-online = {
description = "Wait for ${remote} to be reachable.";
wantedBy = [ "cfssl-online.target" ];
before = [ "cfssl-online.target" ];
preStart = ''
${top.lib.mkWaitCurl {
address = remote;
path = "/api/v1/cfssl/info";
args = "-kd '{}' -o /dev/null";
}}
'';
script = "echo Ok";
serviceConfig = {
TimeoutSec = "300";
};
};
systemd.services.kube-certmgr-bootstrap = {
description = "Kubernetes certmgr bootstrapper";
wantedBy = [ "certmgr.service" ];
after = [ "cfssl.target" ];
wantedBy = [ "cfssl-online.target" ];
after = [ "cfssl-online.target" ];
before = [ "certmgr.service" ];
script = concatStringsSep "\n" [''
set -e
mkdir -p $(dirname ${certmgrAPITokenPath})
mkdir -p $(dirname ${top.caFile})
# If there's a cfssl (cert issuer) running locally, then don't rely on user to
# manually paste it in place. Just symlink.
# otherwise, create the target file, ready for users to insert the token
@ -209,14 +236,18 @@ in
fi
''
(optionalString (cfg.pkiTrustOnBootstrap) ''
if [ ! -f "${top.caFile}" ] || [ $(cat "${top.caFile}" | wc -c) -lt 1 ]; then
${pkgs.curl}/bin/curl --fail-early -f -kd '{}' ${remote}/api/v1/cfssl/info | \
${pkgs.cfssl}/bin/cfssljson -stdout >${top.caFile}
if [ ! -s "${top.caFile}" ]; then
${top.lib.mkWaitCurl {
address = "https://${top.masterAddress}:${cfsslPort}";
path = "/api/v1/cfssl/info";
args = "-kd '{}' -o - | ${pkgs.cfssl}/bin/cfssljson -stdout >${top.caFile}";
}}
fi
'')
];
serviceConfig = {
RestartSec = "10s";
TimeoutSec = "300";
RestartSec = "1s";
Restart = "on-failure";
};
};
@ -254,6 +285,14 @@ in
};
systemd.services.certmgr = {
wantedBy = [ "cfssl-online.target" ];
after = [ "cfssl-online.target" "kube-certmgr-bootstrap.service" ];
preStart = ''
while ! test -s ${certmgrAPITokenPath} ; do
sleep 1
echo Waiting for ${certmgrAPITokenPath}
done
'';
unitConfig.ConditionPathExists = certmgrPaths;
};
@ -289,6 +328,12 @@ in
''
export KUBECONFIG=${clusterAdminKubeconfig}
${kubectl}/bin/kubectl apply -f ${concatStringsSep " \\\n -f " files}
${top.lib.mkWaitCurl (with top.pki.certs.addonManager; {
path = "/api/v1/namespaces/kube-system/serviceaccounts/default";
cacert = top.caFile;
inherit cert key;
})}
'';
})
{
@ -384,6 +429,14 @@ in
};
systemd.services.flannel = {
preStart = ''
${top.lib.mkWaitCurl (with top.pki.certs.flannelClient; {
path = "/api/v1/nodes";
cacert = top.caFile;
inherit cert key;
args = "-o - | grep podCIDR >/dev/null";
})}
'';
unitConfig.ConditionPathExists = flannelPaths;
};

View File

@ -49,8 +49,16 @@ in
systemd.services.kube-proxy = {
description = "Kubernetes Proxy Service";
wantedBy = [ "kubernetes.target" ];
after = [ "kube-apiserver.service" ];
after = [ "node-online.target" ];
before = [ "kubernetes.target" ];
path = with pkgs; [ iptables conntrack_tools ];
preStart = ''
${top.lib.mkWaitCurl (with top.pki.certs.kubeProxyClient; {
path = "/api/v1/nodes/${top.kubelet.hostname}";
cacert = top.caFile;
inherit cert key;
})}
'';
serviceConfig = {
Slice = "kubernetes.slice";
ExecStart = ''${top.package}/bin/kube-proxy \

View File

@ -59,8 +59,17 @@ in
config = mkIf cfg.enable {
systemd.services.kube-scheduler = {
description = "Kubernetes Scheduler Service";
wantedBy = [ "kubernetes.target" ];
wantedBy = [ "kube-apiserver-online.target" ];
after = [ "kube-apiserver.service" ];
before = [ "kube-apiserver-online.target" ];
preStart = ''
${top.lib.mkWaitCurl (with top.pki.certs.schedulerClient; {
sleep = 1;
path = "/api";
cacert = top.caFile;
inherit cert key;
})}
'';
serviceConfig = {
Slice = "kubernetes.slice";
ExecStart = ''${top.package}/bin/kube-scheduler \

View File

@ -77,6 +77,7 @@ let
singleNodeTest = {
test = ''
# prepare machine1 for test
$machine1->waitForUnit("kubernetes.target");
$machine1->waitUntilSucceeds("kubectl get node machine1.${domain} | grep -w Ready");
$machine1->waitUntilSucceeds("docker load < ${redisImage}");
$machine1->waitUntilSucceeds("kubectl create -f ${redisPod}");
@ -102,6 +103,8 @@ let
# Node token exchange
$machine1->waitUntilSucceeds("cp -f /var/lib/cfssl/apitoken.secret /tmp/shared/apitoken.secret");
$machine2->waitUntilSucceeds("cat /tmp/shared/apitoken.secret | nixos-kubernetes-node-join");
$machine1->waitForUnit("kubernetes.target");
$machine2->waitForUnit("kubernetes.target");
# prepare machines for test
$machine1->waitUntilSucceeds("kubectl get node machine2.${domain} | grep -w Ready");

View File

@ -94,6 +94,8 @@ let
singlenode = base // {
test = ''
$machine1->waitForUnit("kubernetes.target");
$machine1->waitUntilSucceeds("kubectl get node machine1.my.zyx | grep -w Ready");
$machine1->waitUntilSucceeds("docker load < ${kubectlImage}");
@ -116,6 +118,8 @@ let
# Node token exchange
$machine1->waitUntilSucceeds("cp -f /var/lib/cfssl/apitoken.secret /tmp/shared/apitoken.secret");
$machine2->waitUntilSucceeds("cat /tmp/shared/apitoken.secret | nixos-kubernetes-node-join");
$machine1->waitForUnit("kubernetes.target");
$machine2->waitForUnit("kubernetes.target");
$machine1->waitUntilSucceeds("kubectl get node machine2.my.zyx | grep -w Ready");