nixos/kubernetes: Stabilize services startup across machines
by adding targets and curl wait loops to services to ensure services are not started before their depended services are reachable. Extra targets cfssl-online.target and kube-apiserver-online.target syncronize starts across machines and node-online.target ensures docker is restarted and ready to deploy containers on after flannel has discussed the network cidr with apiserver. Since flannel needs to be started before addon-manager to configure the docker interface, it has to have its own rbac bootstrap service. The curl wait loops within the other services exists to ensure that when starting the service it is able to do its work immediately without clobbering the log about failing conditions. By ensuring kubernetes.target is only reached after starting the cluster it can be used in the tests as a wait condition. In kube-certmgr-bootstrap mkdir is needed for it to not fail to start. The following is the relevant part of systemctl list-dependencies default.target ● ├─certmgr.service ● ├─cfssl.service ● ├─docker.service ● ├─etcd.service ● ├─flannel.service ● ├─kubernetes.target ● │ ├─kube-addon-manager.service ● │ ├─kube-proxy.service ● │ ├─kube-apiserver-online.target ● │ │ ├─flannel-rbac-bootstrap.service ● │ │ ├─kube-apiserver-online.service ● │ │ ├─kube-apiserver.service ● │ │ ├─kube-controller-manager.service ● │ │ └─kube-scheduler.service ● │ └─node-online.target ● │ ├─node-online.service ● │ ├─flannel.target ● │ │ ├─flannel.service ● │ │ └─mk-docker-opts.service ● │ └─kubelet.target ● │ └─kubelet.service ● ├─network-online.target ● │ └─cfssl-online.target ● │ ├─certmgr.service ● │ ├─cfssl-online.service ● │ └─kube-certmgr-bootstrap.service
This commit is contained in:
parent
f9e2f76a59
commit
62f03750e4
@ -72,7 +72,7 @@ in
|
||||
systemd.services.kube-addon-manager = {
|
||||
description = "Kubernetes addon manager";
|
||||
wantedBy = [ "kubernetes.target" ];
|
||||
after = [ "kube-apiserver.service" ];
|
||||
after = [ "kube-apiserver-online.target" "node-online.target" ];
|
||||
environment.ADDON_PATH = "/etc/kubernetes/addons/";
|
||||
path = [ pkgs.gawk ];
|
||||
serviceConfig = {
|
||||
|
@ -293,8 +293,9 @@ in
|
||||
in {
|
||||
systemd.services.kube-apiserver = {
|
||||
description = "Kubernetes APIServer Service";
|
||||
wantedBy = [ "kubernetes.target" ];
|
||||
after = [ "network.target" ];
|
||||
wantedBy = [ "kube-apiserver-online.target" ];
|
||||
after = [ "certmgr.service" ];
|
||||
before = [ "kube-apiserver-online.target" ];
|
||||
serviceConfig = {
|
||||
Slice = "kubernetes.slice";
|
||||
ExecStart = ''${top.package}/bin/kube-apiserver \
|
||||
@ -459,7 +460,28 @@ in
|
||||
};
|
||||
|
||||
}))
|
||||
{
|
||||
systemd.targets.kube-apiserver-online = {
|
||||
wantedBy = [ "kubernetes.target" ];
|
||||
before = [ "kubernetes.target" ];
|
||||
};
|
||||
|
||||
systemd.services.kube-apiserver-online = mkIf top.flannel.enable {
|
||||
description = "apiserver control plane is online";
|
||||
wantedBy = [ "kube-apiserver-online.target" ];
|
||||
after = [ "kube-scheduler.service" "kube-controller-manager.service" ];
|
||||
before = [ "kube-apiserver-online.target" ];
|
||||
preStart = ''
|
||||
${top.lib.mkWaitCurl (with top.pki.certs.flannelClient; {
|
||||
sleep = 3;
|
||||
path = "/healthz";
|
||||
cacert = top.caFile;
|
||||
inherit cert key;
|
||||
})}
|
||||
'';
|
||||
script = "echo apiserver control plane is online";
|
||||
};
|
||||
}
|
||||
];
|
||||
|
||||
}
|
||||
|
@ -116,8 +116,17 @@ in
|
||||
|
||||
systemd.services.kube-controller-manager = {
|
||||
description = "Kubernetes Controller Manager Service";
|
||||
wantedBy = [ "kubernetes.target" ];
|
||||
wantedBy = [ "kube-apiserver-online.target" ];
|
||||
after = [ "kube-apiserver.service" ];
|
||||
before = [ "kube-apiserver-online.target" ];
|
||||
preStart = ''
|
||||
${top.lib.mkWaitCurl (with top.pki.certs.controllerManagerClient; {
|
||||
sleep = 1;
|
||||
path = "/api";
|
||||
cacert = top.caFile;
|
||||
inherit cert key;
|
||||
})}
|
||||
'';
|
||||
serviceConfig = {
|
||||
RestartSec = "30s";
|
||||
Restart = "on-failure";
|
||||
|
@ -73,6 +73,18 @@ let
|
||||
};
|
||||
};
|
||||
|
||||
mkWaitCurl = { address ? cfg.apiserverAddress, sleep ? 2, path ? "", args ? "-o /dev/null",
|
||||
cacert ? null, cert ? null, key ? null, }: ''
|
||||
while ! ${pkgs.curl}/bin/curl --fail-early -fs \
|
||||
${if cacert != null then "--cacert ${cacert}" else ""} \
|
||||
${if cert != null then "--cert ${cert}" else ""} \
|
||||
${if key != null then "--key ${key}" else ""} \
|
||||
${address}${path} ${args} ; do
|
||||
sleep ${toString sleep}
|
||||
echo Waiting to be able to reach ${address}${path}
|
||||
done
|
||||
'';
|
||||
|
||||
kubeConfigDefaults = {
|
||||
server = mkDefault cfg.kubeconfig.server;
|
||||
caFile = mkDefault cfg.kubeconfig.caFile;
|
||||
@ -162,6 +174,7 @@ in {
|
||||
inherit mkCert;
|
||||
inherit mkKubeConfig;
|
||||
inherit mkKubeConfigOptions;
|
||||
inherit mkWaitCurl;
|
||||
};
|
||||
type = types.attrs;
|
||||
};
|
||||
|
@ -27,7 +27,12 @@ in
|
||||
};
|
||||
|
||||
###### implementation
|
||||
config = mkIf cfg.enable {
|
||||
config = mkIf cfg.enable (let
|
||||
flannelBootstrapPaths = mkIf top.apiserver.enable [
|
||||
top.pki.certs.clusterAdmin.cert
|
||||
top.pki.certs.clusterAdmin.key
|
||||
];
|
||||
in {
|
||||
services.flannel = {
|
||||
|
||||
enable = mkDefault true;
|
||||
@ -48,8 +53,10 @@ in
|
||||
}];
|
||||
};
|
||||
|
||||
systemd.services."mk-docker-opts" = {
|
||||
systemd.services.mk-docker-opts = {
|
||||
description = "Pre-Docker Actions";
|
||||
wantedBy = [ "flannel.target" ];
|
||||
before = [ "flannel.target" ];
|
||||
path = with pkgs; [ gawk gnugrep ];
|
||||
script = ''
|
||||
${mkDockerOpts}/mk-docker-opts -d /run/flannel/docker
|
||||
@ -68,6 +75,17 @@ in
|
||||
};
|
||||
};
|
||||
|
||||
systemd.targets.flannel = {
|
||||
wantedBy = [ "node-online.target" ];
|
||||
before = [ "node-online.target" ];
|
||||
};
|
||||
|
||||
systemd.services.flannel = {
|
||||
wantedBy = [ "flannel.target" ];
|
||||
after = [ "kubelet.target" ];
|
||||
before = [ "flannel.target" ];
|
||||
};
|
||||
|
||||
systemd.services.docker = {
|
||||
environment.DOCKER_OPTS = "-b none";
|
||||
serviceConfig.EnvironmentFile = "-/run/flannel/docker";
|
||||
@ -93,8 +111,14 @@ in
|
||||
};
|
||||
|
||||
# give flannel som kubernetes rbac permissions if applicable
|
||||
services.kubernetes.addonManager.bootstrapAddons = mkIf ((storageBackend == "kubernetes") && (elem "RBAC" top.apiserver.authorizationMode)) {
|
||||
systemd.services.flannel-rbac-bootstrap = mkIf (top.apiserver.enable && (elem "RBAC" top.apiserver.authorizationMode)) {
|
||||
|
||||
wantedBy = [ "kube-apiserver-online.target" ];
|
||||
after = [ "kube-apiserver-online.target" ];
|
||||
before = [ "flannel.service" ];
|
||||
path = with pkgs; [ kubectl ];
|
||||
preStart = let
|
||||
files = mapAttrsToList (n: v: pkgs.writeText "${n}.json" (builtins.toJSON v)) {
|
||||
flannel-cr = {
|
||||
apiVersion = "rbac.authorization.k8s.io/v1beta1";
|
||||
kind = "ClusterRole";
|
||||
@ -130,7 +154,26 @@ in
|
||||
name = "flannel-client";
|
||||
}];
|
||||
};
|
||||
};
|
||||
in ''
|
||||
${top.lib.mkWaitCurl (with top.pki.certs.clusterAdmin; {
|
||||
path = "/";
|
||||
cacert = top.caFile;
|
||||
inherit cert key;
|
||||
})}
|
||||
|
||||
kubectl -s ${top.apiserverAddress} --certificate-authority=${top.caFile} --client-certificate=${top.pki.certs.clusterAdmin.cert} --client-key=${top.pki.certs.clusterAdmin.key} apply -f ${concatStringsSep " \\\n -f " files}
|
||||
'';
|
||||
script = "echo Ok";
|
||||
unitConfig.ConditionPathExists = flannelBootstrapPaths;
|
||||
};
|
||||
|
||||
systemd.paths.flannel-rbac-bootstrap = mkIf top.apiserver.enable {
|
||||
wantedBy = [ "flannel-rbac-bootstrap.service" ];
|
||||
pathConfig = {
|
||||
PathExists = flannelBootstrapPaths;
|
||||
PathChanged = flannelBootstrapPaths;
|
||||
};
|
||||
};
|
||||
});
|
||||
}
|
||||
|
@ -252,8 +252,9 @@ in
|
||||
|
||||
systemd.services.kubelet = {
|
||||
description = "Kubernetes Kubelet Service";
|
||||
wantedBy = [ "kubernetes.target" ];
|
||||
after = [ "network.target" "docker.service" "kube-apiserver.service" ];
|
||||
wantedBy = [ "kubelet.target" ];
|
||||
after = [ "kube-apiserver-online.target" ];
|
||||
before = [ "kubelet.target" ];
|
||||
path = with pkgs; [ gitMinimal openssh docker utillinux iproute ethtool thin-provisioning-tools iptables socat ] ++ top.path;
|
||||
preStart = ''
|
||||
${concatMapStrings (img: ''
|
||||
@ -325,6 +326,30 @@ in
|
||||
};
|
||||
};
|
||||
|
||||
systemd.services.docker.before = [ "kubelet.service" ];
|
||||
|
||||
systemd.services.node-online = {
|
||||
wantedBy = [ "node-online.target" ];
|
||||
after = [ "flannel.target" "kubelet.target" ];
|
||||
before = [ "node-online.target" ];
|
||||
# it is complicated. flannel needs kubelet to run the pause container before
|
||||
# it discusses the node CIDR with apiserver and afterwards configures and restarts
|
||||
# dockerd. Until then prevent creating any pods because they have to be recreated anyway
|
||||
# because the network of docker0 has been changed by flannel.
|
||||
script = let
|
||||
docker-env = "/run/flannel/docker";
|
||||
flannel-date = "stat --print=%Y ${docker-env}";
|
||||
docker-date = "systemctl show --property=ActiveEnterTimestamp --value docker";
|
||||
in ''
|
||||
while ! test -f ${docker-env} ; do sleep 1 ; done
|
||||
while test `${flannel-date}` -gt `date +%s --date="$(${docker-date})"` ; do
|
||||
sleep 1
|
||||
done
|
||||
'';
|
||||
serviceConfig.Type = "oneshot";
|
||||
serviceConfig.Slice = "kubernetes.slice";
|
||||
};
|
||||
|
||||
# Allways include cni plugins
|
||||
services.kubernetes.kubelet.cni.packages = [pkgs.cni-plugins];
|
||||
|
||||
@ -369,5 +394,16 @@ in
|
||||
};
|
||||
})
|
||||
|
||||
{
|
||||
systemd.targets.kubelet = {
|
||||
wantedBy = [ "node-online.target" ];
|
||||
before = [ "node-online.target" ];
|
||||
};
|
||||
|
||||
systemd.targets.node-online = {
|
||||
wantedBy = [ "kubernetes.target" ];
|
||||
before = [ "kubernetes.target" ];
|
||||
};
|
||||
}
|
||||
];
|
||||
}
|
||||
|
@ -119,6 +119,7 @@ in
|
||||
cfsslCertPathPrefix = "${config.services.cfssl.dataDir}/cfssl";
|
||||
cfsslCert = "${cfsslCertPathPrefix}.pem";
|
||||
cfsslKey = "${cfsslCertPathPrefix}-key.pem";
|
||||
cfsslPort = toString config.services.cfssl.port;
|
||||
|
||||
certmgrPaths = [
|
||||
top.caFile
|
||||
@ -191,13 +192,39 @@ in
|
||||
chown cfssl "${cfsslAPITokenPath}" && chmod 400 "${cfsslAPITokenPath}"
|
||||
'')]);
|
||||
|
||||
systemd.targets.cfssl-online = {
|
||||
wantedBy = [ "network-online.target" ];
|
||||
after = [ "cfssl.service" "network-online.target" "cfssl-online.service" ];
|
||||
};
|
||||
|
||||
systemd.services.cfssl-online = {
|
||||
description = "Wait for ${remote} to be reachable.";
|
||||
wantedBy = [ "cfssl-online.target" ];
|
||||
before = [ "cfssl-online.target" ];
|
||||
preStart = ''
|
||||
${top.lib.mkWaitCurl {
|
||||
address = remote;
|
||||
path = "/api/v1/cfssl/info";
|
||||
args = "-kd '{}' -o /dev/null";
|
||||
}}
|
||||
'';
|
||||
script = "echo Ok";
|
||||
serviceConfig = {
|
||||
TimeoutSec = "300";
|
||||
};
|
||||
};
|
||||
|
||||
systemd.services.kube-certmgr-bootstrap = {
|
||||
description = "Kubernetes certmgr bootstrapper";
|
||||
wantedBy = [ "certmgr.service" ];
|
||||
after = [ "cfssl.target" ];
|
||||
wantedBy = [ "cfssl-online.target" ];
|
||||
after = [ "cfssl-online.target" ];
|
||||
before = [ "certmgr.service" ];
|
||||
script = concatStringsSep "\n" [''
|
||||
set -e
|
||||
|
||||
mkdir -p $(dirname ${certmgrAPITokenPath})
|
||||
mkdir -p $(dirname ${top.caFile})
|
||||
|
||||
# If there's a cfssl (cert issuer) running locally, then don't rely on user to
|
||||
# manually paste it in place. Just symlink.
|
||||
# otherwise, create the target file, ready for users to insert the token
|
||||
@ -209,14 +236,18 @@ in
|
||||
fi
|
||||
''
|
||||
(optionalString (cfg.pkiTrustOnBootstrap) ''
|
||||
if [ ! -f "${top.caFile}" ] || [ $(cat "${top.caFile}" | wc -c) -lt 1 ]; then
|
||||
${pkgs.curl}/bin/curl --fail-early -f -kd '{}' ${remote}/api/v1/cfssl/info | \
|
||||
${pkgs.cfssl}/bin/cfssljson -stdout >${top.caFile}
|
||||
if [ ! -s "${top.caFile}" ]; then
|
||||
${top.lib.mkWaitCurl {
|
||||
address = "https://${top.masterAddress}:${cfsslPort}";
|
||||
path = "/api/v1/cfssl/info";
|
||||
args = "-kd '{}' -o - | ${pkgs.cfssl}/bin/cfssljson -stdout >${top.caFile}";
|
||||
}}
|
||||
fi
|
||||
'')
|
||||
];
|
||||
serviceConfig = {
|
||||
RestartSec = "10s";
|
||||
TimeoutSec = "300";
|
||||
RestartSec = "1s";
|
||||
Restart = "on-failure";
|
||||
};
|
||||
};
|
||||
@ -254,6 +285,14 @@ in
|
||||
};
|
||||
|
||||
systemd.services.certmgr = {
|
||||
wantedBy = [ "cfssl-online.target" ];
|
||||
after = [ "cfssl-online.target" "kube-certmgr-bootstrap.service" ];
|
||||
preStart = ''
|
||||
while ! test -s ${certmgrAPITokenPath} ; do
|
||||
sleep 1
|
||||
echo Waiting for ${certmgrAPITokenPath}
|
||||
done
|
||||
'';
|
||||
unitConfig.ConditionPathExists = certmgrPaths;
|
||||
};
|
||||
|
||||
@ -289,6 +328,12 @@ in
|
||||
''
|
||||
export KUBECONFIG=${clusterAdminKubeconfig}
|
||||
${kubectl}/bin/kubectl apply -f ${concatStringsSep " \\\n -f " files}
|
||||
|
||||
${top.lib.mkWaitCurl (with top.pki.certs.addonManager; {
|
||||
path = "/api/v1/namespaces/kube-system/serviceaccounts/default";
|
||||
cacert = top.caFile;
|
||||
inherit cert key;
|
||||
})}
|
||||
'';
|
||||
})
|
||||
{
|
||||
@ -384,6 +429,14 @@ in
|
||||
};
|
||||
|
||||
systemd.services.flannel = {
|
||||
preStart = ''
|
||||
${top.lib.mkWaitCurl (with top.pki.certs.flannelClient; {
|
||||
path = "/api/v1/nodes";
|
||||
cacert = top.caFile;
|
||||
inherit cert key;
|
||||
args = "-o - | grep podCIDR >/dev/null";
|
||||
})}
|
||||
'';
|
||||
unitConfig.ConditionPathExists = flannelPaths;
|
||||
};
|
||||
|
||||
|
@ -49,8 +49,16 @@ in
|
||||
systemd.services.kube-proxy = {
|
||||
description = "Kubernetes Proxy Service";
|
||||
wantedBy = [ "kubernetes.target" ];
|
||||
after = [ "kube-apiserver.service" ];
|
||||
after = [ "node-online.target" ];
|
||||
before = [ "kubernetes.target" ];
|
||||
path = with pkgs; [ iptables conntrack_tools ];
|
||||
preStart = ''
|
||||
${top.lib.mkWaitCurl (with top.pki.certs.kubeProxyClient; {
|
||||
path = "/api/v1/nodes/${top.kubelet.hostname}";
|
||||
cacert = top.caFile;
|
||||
inherit cert key;
|
||||
})}
|
||||
'';
|
||||
serviceConfig = {
|
||||
Slice = "kubernetes.slice";
|
||||
ExecStart = ''${top.package}/bin/kube-proxy \
|
||||
|
@ -59,8 +59,17 @@ in
|
||||
config = mkIf cfg.enable {
|
||||
systemd.services.kube-scheduler = {
|
||||
description = "Kubernetes Scheduler Service";
|
||||
wantedBy = [ "kubernetes.target" ];
|
||||
wantedBy = [ "kube-apiserver-online.target" ];
|
||||
after = [ "kube-apiserver.service" ];
|
||||
before = [ "kube-apiserver-online.target" ];
|
||||
preStart = ''
|
||||
${top.lib.mkWaitCurl (with top.pki.certs.schedulerClient; {
|
||||
sleep = 1;
|
||||
path = "/api";
|
||||
cacert = top.caFile;
|
||||
inherit cert key;
|
||||
})}
|
||||
'';
|
||||
serviceConfig = {
|
||||
Slice = "kubernetes.slice";
|
||||
ExecStart = ''${top.package}/bin/kube-scheduler \
|
||||
|
@ -77,6 +77,7 @@ let
|
||||
singleNodeTest = {
|
||||
test = ''
|
||||
# prepare machine1 for test
|
||||
$machine1->waitForUnit("kubernetes.target");
|
||||
$machine1->waitUntilSucceeds("kubectl get node machine1.${domain} | grep -w Ready");
|
||||
$machine1->waitUntilSucceeds("docker load < ${redisImage}");
|
||||
$machine1->waitUntilSucceeds("kubectl create -f ${redisPod}");
|
||||
@ -102,6 +103,8 @@ let
|
||||
# Node token exchange
|
||||
$machine1->waitUntilSucceeds("cp -f /var/lib/cfssl/apitoken.secret /tmp/shared/apitoken.secret");
|
||||
$machine2->waitUntilSucceeds("cat /tmp/shared/apitoken.secret | nixos-kubernetes-node-join");
|
||||
$machine1->waitForUnit("kubernetes.target");
|
||||
$machine2->waitForUnit("kubernetes.target");
|
||||
|
||||
# prepare machines for test
|
||||
$machine1->waitUntilSucceeds("kubectl get node machine2.${domain} | grep -w Ready");
|
||||
|
@ -94,6 +94,8 @@ let
|
||||
|
||||
singlenode = base // {
|
||||
test = ''
|
||||
$machine1->waitForUnit("kubernetes.target");
|
||||
|
||||
$machine1->waitUntilSucceeds("kubectl get node machine1.my.zyx | grep -w Ready");
|
||||
|
||||
$machine1->waitUntilSucceeds("docker load < ${kubectlImage}");
|
||||
@ -116,6 +118,8 @@ let
|
||||
# Node token exchange
|
||||
$machine1->waitUntilSucceeds("cp -f /var/lib/cfssl/apitoken.secret /tmp/shared/apitoken.secret");
|
||||
$machine2->waitUntilSucceeds("cat /tmp/shared/apitoken.secret | nixos-kubernetes-node-join");
|
||||
$machine1->waitForUnit("kubernetes.target");
|
||||
$machine2->waitForUnit("kubernetes.target");
|
||||
|
||||
$machine1->waitUntilSucceeds("kubectl get node machine2.my.zyx | grep -w Ready");
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user