From e2380e79e191cdcb92790fd02cf57c47067718b1 Mon Sep 17 00:00:00 2001 From: Johan Thomsen Date: Sun, 22 Jul 2018 13:14:20 +0200 Subject: [PATCH 1/9] nixos/kubernetes: major module refactor - All kubernetes components have been seperated into different files - All TLS-enabled ports have been deprecated and disabled by default - EasyCert option added to support automatic cluster PKI-bootstrap - RBAC has been enforced for all cluster components by default - NixOS kubernetes test cases make use of easyCerts to setup PKI --- nixos/modules/module-list.nix | 12 +- nixos/modules/rename.nix | 10 + .../cluster/kubernetes/addon-manager.nix | 167 +++ .../kubernetes/{ => addons}/dashboard.nix | 0 .../cluster/kubernetes/{ => addons}/dns.nix | 0 .../services/cluster/kubernetes/apiserver.nix | 427 +++++++ .../cluster/kubernetes/controller-manager.nix | 162 +++ .../services/cluster/kubernetes/default.nix | 1127 ++--------------- .../services/cluster/kubernetes/flannel.nix | 79 ++ .../services/cluster/kubernetes/kubelet.nix | 367 ++++++ .../services/cluster/kubernetes/pki.nix | 374 ++++++ .../services/cluster/kubernetes/proxy.nix | 80 ++ .../services/cluster/kubernetes/scheduler.nix | 92 ++ nixos/tests/kubernetes/base.nix | 38 +- nixos/tests/kubernetes/certs.nix | 219 ---- nixos/tests/kubernetes/dns.nix | 7 +- nixos/tests/kubernetes/kubernetes-common.nix | 57 - nixos/tests/kubernetes/rbac.nix | 9 +- 18 files changed, 1900 insertions(+), 1327 deletions(-) create mode 100644 nixos/modules/services/cluster/kubernetes/addon-manager.nix rename nixos/modules/services/cluster/kubernetes/{ => addons}/dashboard.nix (100%) rename nixos/modules/services/cluster/kubernetes/{ => addons}/dns.nix (100%) create mode 100644 nixos/modules/services/cluster/kubernetes/apiserver.nix create mode 100644 nixos/modules/services/cluster/kubernetes/controller-manager.nix create mode 100644 nixos/modules/services/cluster/kubernetes/flannel.nix create mode 100644 nixos/modules/services/cluster/kubernetes/kubelet.nix create mode 100644 nixos/modules/services/cluster/kubernetes/pki.nix create mode 100644 nixos/modules/services/cluster/kubernetes/proxy.nix create mode 100644 nixos/modules/services/cluster/kubernetes/scheduler.nix delete mode 100644 nixos/tests/kubernetes/certs.nix delete mode 100644 nixos/tests/kubernetes/kubernetes-common.nix diff --git a/nixos/modules/module-list.nix b/nixos/modules/module-list.nix index 8a9a936d3b20..146a0f6af622 100644 --- a/nixos/modules/module-list.nix +++ b/nixos/modules/module-list.nix @@ -196,9 +196,17 @@ ./services/backup/tarsnap.nix ./services/backup/znapzend.nix ./services/cluster/hadoop/default.nix + ./services/cluster/kubernetes/addons/dns.nix + ./services/cluster/kubernetes/addons/dashboard.nix + ./services/cluster/kubernetes/addon-manager.nix + ./services/cluster/kubernetes/apiserver.nix + ./services/cluster/kubernetes/controller-manager.nix ./services/cluster/kubernetes/default.nix - ./services/cluster/kubernetes/dns.nix - ./services/cluster/kubernetes/dashboard.nix + ./services/cluster/kubernetes/flannel.nix + ./services/cluster/kubernetes/kubelet.nix + ./services/cluster/kubernetes/pki.nix + ./services/cluster/kubernetes/proxy.nix + ./services/cluster/kubernetes/scheduler.nix ./services/computing/boinc/client.nix ./services/computing/torque/server.nix ./services/computing/torque/mom.nix diff --git a/nixos/modules/rename.nix b/nixos/modules/rename.nix index 7bcdfdb52309..1e6557e1f0e0 100644 --- a/nixos/modules/rename.nix +++ b/nixos/modules/rename.nix @@ -40,9 +40,19 @@ with lib; (mkRenamedOptionModule [ "services" "kibana" "host" ] [ "services" "kibana" "listenAddress" ]) (mkRenamedOptionModule [ "services" "kubernetes" "apiserver" "admissionControl" ] [ "services" "kubernetes" "apiserver" "enableAdmissionPlugins" ]) (mkRenamedOptionModule [ "services" "kubernetes" "apiserver" "address" ] ["services" "kubernetes" "apiserver" "bindAddress"]) + (mkRenamedOptionModule [ "services" "kubernetes" "apiserver" "port" ] ["services" "kubernetes" "apiserver" "insecurePort"]) (mkRemovedOptionModule [ "services" "kubernetes" "apiserver" "publicAddress" ] "") (mkRenamedOptionModule [ "services" "kubernetes" "addons" "dashboard" "enableRBAC" ] [ "services" "kubernetes" "addons" "dashboard" "rbac" "enable" ]) + (mkRenamedOptionModule [ "services" "kubernetes" "controllerManager" "address" ] ["services" "kubernetes" "controllerManager" "bindAddress"]) + (mkRenamedOptionModule [ "services" "kubernetes" "controllerManager" "port" ] ["services" "kubernetes" "controllerManager" "insecurePort"]) + (mkRenamedOptionModule [ "services" "kubernetes" "etcd" "servers" ] [ "services" "kubernetes" "apiserver" "etcd" "servers" ]) + (mkRenamedOptionModule [ "services" "kubernetes" "etcd" "keyFile" ] [ "services" "kubernetes" "apiserver" "etcd" "keyFile" ]) + (mkRenamedOptionModule [ "services" "kubernetes" "etcd" "certFile" ] [ "services" "kubernetes" "apiserver" "etcd" "certFile" ]) + (mkRenamedOptionModule [ "services" "kubernetes" "etcd" "caFile" ] [ "services" "kubernetes" "apiserver" "etcd" "caFile" ]) + (mkRemovedOptionModule [ "services" "kubernetes" "kubelet" "applyManifests" ] "") (mkRemovedOptionModule [ "services" "kubernetes" "kubelet" "cadvisorPort" ] "") + (mkRenamedOptionModule [ "services" "kubernetes" "proxy" "address" ] ["services" "kubernetes" "proxy" "bindAddress"]) + (mkRemovedOptionModule [ "services" "kubernetes" "verbose" ] "") (mkRenamedOptionModule [ "services" "logstash" "address" ] [ "services" "logstash" "listenAddress" ]) (mkRenamedOptionModule [ "services" "mpd" "network" "host" ] [ "services" "mpd" "network" "listenAddress" ]) (mkRenamedOptionModule [ "services" "neo4j" "host" ] [ "services" "neo4j" "defaultListenAddress" ]) diff --git a/nixos/modules/services/cluster/kubernetes/addon-manager.nix b/nixos/modules/services/cluster/kubernetes/addon-manager.nix new file mode 100644 index 000000000000..17f2dde31a71 --- /dev/null +++ b/nixos/modules/services/cluster/kubernetes/addon-manager.nix @@ -0,0 +1,167 @@ +{ config, lib, pkgs, ... }: + +with lib; + +let + top = config.services.kubernetes; + cfg = top.addonManager; + + isRBACEnabled = elem "RBAC" top.apiserver.authorizationMode; + + addons = pkgs.runCommand "kubernetes-addons" { } '' + mkdir -p $out + # since we are mounting the addons to the addon manager, they need to be copied + ${concatMapStringsSep ";" (a: "cp -v ${a}/* $out/") (mapAttrsToList (name: addon: + pkgs.writeTextDir "${name}.json" (builtins.toJSON addon) + ) (cfg.addons))} + ''; +in +{ + ###### interface + options.services.kubernetes.addonManager = with lib.types; { + + bootstrapAddons = mkOption { + description = '' + Bootstrap addons are like regular addons, but they are applied with cluster-admin rigths. + They are applied at addon-manager startup only. + ''; + default = { }; + type = attrsOf attrs; + example = literalExample '' + { + "my-service" = { + "apiVersion" = "v1"; + "kind" = "Service"; + "metadata" = { + "name" = "my-service"; + "namespace" = "default"; + }; + "spec" = { ... }; + }; + } + ''; + }; + + addons = mkOption { + description = "Kubernetes addons (any kind of Kubernetes resource can be an addon)."; + default = { }; + type = attrsOf (either attrs (listOf attrs)); + example = literalExample '' + { + "my-service" = { + "apiVersion" = "v1"; + "kind" = "Service"; + "metadata" = { + "name" = "my-service"; + "namespace" = "default"; + }; + "spec" = { ... }; + }; + } + // import { cfg = config.services.kubernetes; }; + ''; + }; + + enable = mkEnableOption "Whether to enable Kubernetes addon manager."; + }; + + ###### implementation + config = mkIf cfg.enable { + environment.etc."kubernetes/addons".source = "${addons}/"; + + systemd.services.kube-addon-manager = { + description = "Kubernetes addon manager"; + wantedBy = [ "kubernetes.target" ]; + after = [ "kube-apiserver.service" ]; + environment.ADDON_PATH = "/etc/kubernetes/addons/"; + path = [ pkgs.gawk ]; + serviceConfig = { + Slice = "kubernetes.slice"; + ExecStart = "${top.package}/bin/kube-addons"; + WorkingDirectory = top.dataDir; + User = "kubernetes"; + Group = "kubernetes"; + Restart = "on-failure"; + RestartSec = 10; + }; + }; + + services.kubernetes.addonManager.bootstrapAddons = mkIf isRBACEnabled + (let + name = system:kube-addon-manager; + namespace = "kube-system"; + in + { + + kube-addon-manager-r = { + apiVersion = "rbac.authorization.k8s.io/v1"; + kind = "Role"; + metadata = { + inherit name namespace; + }; + rules = [{ + apiGroups = ["*"]; + resources = ["*"]; + verbs = ["*"]; + }]; + }; + + kube-addon-manager-rb = { + apiVersion = "rbac.authorization.k8s.io/v1"; + kind = "RoleBinding"; + metadata = { + inherit name namespace; + }; + roleRef = { + apiGroup = "rbac.authorization.k8s.io"; + kind = "Role"; + inherit name; + }; + subjects = [{ + apiGroup = "rbac.authorization.k8s.io"; + kind = "User"; + inherit name; + }]; + }; + + kube-addon-manager-cluster-lister-cr = { + apiVersion = "rbac.authorization.k8s.io/v1"; + kind = "ClusterRole"; + metadata = { + name = "${name}:cluster-lister"; + }; + rules = [{ + apiGroups = ["*"]; + resources = ["*"]; + verbs = ["list"]; + }]; + }; + + kube-addon-manager-cluster-lister-crb = { + apiVersion = "rbac.authorization.k8s.io/v1"; + kind = "ClusterRoleBinding"; + metadata = { + name = "${name}:cluster-lister"; + }; + roleRef = { + apiGroup = "rbac.authorization.k8s.io"; + kind = "ClusterRole"; + name = "${name}:cluster-lister"; + }; + subjects = [{ + kind = "User"; + inherit name; + }]; + }; + }); + + services.kubernetes.pki.certs = { + addonManager = top.lib.mkCert { + name = "kube-addon-manager"; + CN = "system:kube-addon-manager"; + action = "systemctl restart kube-addon-manager.service"; + }; + }; + }; + +} diff --git a/nixos/modules/services/cluster/kubernetes/dashboard.nix b/nixos/modules/services/cluster/kubernetes/addons/dashboard.nix similarity index 100% rename from nixos/modules/services/cluster/kubernetes/dashboard.nix rename to nixos/modules/services/cluster/kubernetes/addons/dashboard.nix diff --git a/nixos/modules/services/cluster/kubernetes/dns.nix b/nixos/modules/services/cluster/kubernetes/addons/dns.nix similarity index 100% rename from nixos/modules/services/cluster/kubernetes/dns.nix rename to nixos/modules/services/cluster/kubernetes/addons/dns.nix diff --git a/nixos/modules/services/cluster/kubernetes/apiserver.nix b/nixos/modules/services/cluster/kubernetes/apiserver.nix new file mode 100644 index 000000000000..465d74d83c8b --- /dev/null +++ b/nixos/modules/services/cluster/kubernetes/apiserver.nix @@ -0,0 +1,427 @@ + { config, lib, pkgs, ... }: + +with lib; + +let + top = config.services.kubernetes; + cfg = top.apiserver; + + isRBACEnabled = elem "RBAC" cfg.authorizationMode; + + apiserverServiceIP = (concatStringsSep "." ( + take 3 (splitString "." cfg.serviceClusterIpRange + )) + ".1"); +in +{ + ###### interface + options.services.kubernetes.apiserver = with lib.types; { + + advertiseAddress = mkOption { + description = '' + Kubernetes apiserver IP address on which to advertise the apiserver + to members of the cluster. This address must be reachable by the rest + of the cluster. + ''; + default = null; + type = nullOr str; + }; + + allowPrivileged = mkOption { + description = "Whether to allow privileged containers on Kubernetes."; + default = false; + type = bool; + }; + + authorizationMode = mkOption { + description = '' + Kubernetes apiserver authorization mode (AlwaysAllow/AlwaysDeny/ABAC/Webhook/RBAC/Node). See + + ''; + default = ["RBAC" "Node"]; # Enabling RBAC by default, although kubernetes default is AllowAllow + type = listOf (enum ["AlwaysAllow" "AlwaysDeny" "ABAC" "Webhook" "RBAC" "Node"]); + }; + + authorizationPolicy = mkOption { + description = '' + Kubernetes apiserver authorization policy file. See + + ''; + default = []; + type = listOf attrs; + }; + + basicAuthFile = mkOption { + description = '' + Kubernetes apiserver basic authentication file. See + + ''; + default = null; + type = nullOr path; + }; + + bindAddress = mkOption { + description = '' + The IP address on which to listen for the --secure-port port. + The associated interface(s) must be reachable by the rest + of the cluster, and by CLI/web clients. + ''; + default = "0.0.0.0"; + type = str; + }; + + clientCaFile = mkOption { + description = "Kubernetes apiserver CA file for client auth."; + default = top.caFile; + type = nullOr path; + }; + + disableAdmissionPlugins = mkOption { + description = '' + Kubernetes admission control plugins to disable. See + + ''; + default = []; + type = listOf str; + }; + + enable = mkEnableOption "Kubernetes apiserver"; + + enableAdmissionPlugins = mkOption { + description = '' + Kubernetes admission control plugins to enable. See + + ''; + default = [ + "NamespaceLifecycle" "LimitRanger" "ServiceAccount" + "ResourceQuota" "DefaultStorageClass" "DefaultTolerationSeconds" + "NodeRestriction" + ]; + example = [ + "NamespaceLifecycle" "NamespaceExists" "LimitRanger" + "SecurityContextDeny" "ServiceAccount" "ResourceQuota" + "PodSecurityPolicy" "NodeRestriction" "DefaultStorageClass" + ]; + type = listOf str; + }; + + etcd = { + servers = mkOption { + description = "List of etcd servers."; + default = ["http://127.0.0.1:2379"]; + type = types.listOf types.str; + }; + + keyFile = mkOption { + description = "Etcd key file."; + default = null; + type = types.nullOr types.path; + }; + + certFile = mkOption { + description = "Etcd cert file."; + default = null; + type = types.nullOr types.path; + }; + + caFile = mkOption { + description = "Etcd ca file."; + default = top.caFile; + type = types.nullOr types.path; + }; + }; + + extraOpts = mkOption { + description = "Kubernetes apiserver extra command line options."; + default = ""; + type = str; + }; + + extraSANs = mkOption { + description = "Extra x509 Subject Alternative Names to be added to the kubernetes apiserver tls cert."; + default = []; + type = listOf str; + }; + + featureGates = mkOption { + description = "List set of feature gates"; + default = top.featureGates; + type = listOf str; + }; + + insecureBindAddress = mkOption { + description = "The IP address on which to serve the --insecure-port."; + default = "127.0.0.1"; + type = str; + }; + + insecurePort = mkOption { + description = "Kubernetes apiserver insecure listening port. (0 = disabled)"; + default = 0; + type = int; + }; + + kubeletClientCaFile = mkOption { + description = "Path to a cert file for connecting to kubelet."; + default = top.caFile; + type = nullOr path; + }; + + kubeletClientCertFile = mkOption { + description = "Client certificate to use for connections to kubelet."; + default = null; + type = nullOr path; + }; + + kubeletClientKeyFile = mkOption { + description = "Key to use for connections to kubelet."; + default = null; + type = nullOr path; + }; + + kubeletHttps = mkOption { + description = "Whether to use https for connections to kubelet."; + default = true; + type = bool; + }; + + runtimeConfig = mkOption { + description = '' + Api runtime configuration. See + + ''; + default = "authentication.k8s.io/v1beta1=true"; + example = "api/all=false,api/v1=true"; + type = str; + }; + + storageBackend = mkOption { + description = '' + Kubernetes apiserver storage backend. + ''; + default = "etcd3"; + type = enum ["etcd2" "etcd3"]; + }; + + securePort = mkOption { + description = "Kubernetes apiserver secure port."; + default = 6443; + type = int; + }; + + serviceAccountKeyFile = mkOption { + description = '' + Kubernetes apiserver PEM-encoded x509 RSA private or public key file, + used to verify ServiceAccount tokens. By default tls private key file + is used. + ''; + default = null; + type = nullOr path; + }; + + serviceClusterIpRange = mkOption { + description = '' + A CIDR notation IP range from which to assign service cluster IPs. + This must not overlap with any IP ranges assigned to nodes for pods. + ''; + default = "10.0.0.0/24"; + type = str; + }; + + tlsCertFile = mkOption { + description = "Kubernetes apiserver certificate file."; + default = null; + type = nullOr path; + }; + + tlsKeyFile = mkOption { + description = "Kubernetes apiserver private key file."; + default = null; + type = nullOr path; + }; + + tokenAuthFile = mkOption { + description = '' + Kubernetes apiserver token authentication file. See + + ''; + default = null; + type = nullOr path; + }; + + verbosity = mkOption { + description = '' + Optional glog verbosity level for logging statements. See + + ''; + default = null; + type = nullOr int; + }; + + webhookConfig = mkOption { + description = '' + Kubernetes apiserver Webhook config file. It uses the kubeconfig file format. + See + ''; + default = null; + type = nullOr path; + }; + + }; + + + ###### implementation + config = mkMerge [ + + (mkIf cfg.enable { + systemd.services.kube-apiserver = { + description = "Kubernetes APIServer Service"; + wantedBy = [ "kubernetes.target" ]; + after = [ "network.target" ]; + serviceConfig = { + Slice = "kubernetes.slice"; + ExecStart = ''${top.package}/bin/kube-apiserver \ + --allow-privileged=${boolToString cfg.allowPrivileged} \ + --authorization-mode=${concatStringsSep "," cfg.authorizationMode} \ + ${optionalString (elem "ABAC" cfg.authorizationMode) + "--authorization-policy-file=${ + pkgs.writeText "kube-auth-policy.jsonl" + (concatMapStringsSep "\n" (l: builtins.toJSON l) cfg.authorizationPolicy) + }" + } \ + ${optionalString (elem "Webhook" cfg.authorizationMode) + "--authorization-webhook-config-file=${cfg.webhookConfig}" + } \ + --bind-address=${cfg.bindAddress} \ + ${optionalString (cfg.advertiseAddress != null) + "--advertise-address=${cfg.advertiseAddress}"} \ + ${optionalString (cfg.clientCaFile != null) + "--client-ca-file=${cfg.clientCaFile}"} \ + --disable-admission-plugins=${concatStringsSep "," cfg.disableAdmissionPlugins} \ + --enable-admission-plugins=${concatStringsSep "," cfg.enableAdmissionPlugins} \ + --etcd-servers=${concatStringsSep "," cfg.etcd.servers} \ + ${optionalString (cfg.etcd.caFile != null) + "--etcd-cafile=${cfg.etcd.caFile}"} \ + ${optionalString (cfg.etcd.certFile != null) + "--etcd-certfile=${cfg.etcd.certFile}"} \ + ${optionalString (cfg.etcd.keyFile != null) + "--etcd-keyfile=${cfg.etcd.keyFile}"} \ + ${optionalString (cfg.featureGates != []) + "--feature-gates=${concatMapStringsSep "," (feature: "${feature}=true") cfg.featureGates}"} \ + ${optionalString (cfg.basicAuthFile != null) + "--basic-auth-file=${cfg.basicAuthFile}"} \ + --kubelet-https=${boolToString cfg.kubeletHttps} \ + ${optionalString (cfg.kubeletClientCaFile != null) + "--kubelet-certificate-authority=${cfg.kubeletClientCaFile}"} \ + ${optionalString (cfg.kubeletClientCertFile != null) + "--kubelet-client-certificate=${cfg.kubeletClientCertFile}"} \ + ${optionalString (cfg.kubeletClientKeyFile != null) + "--kubelet-client-key=${cfg.kubeletClientKeyFile}"} \ + --insecure-bind-address=${cfg.insecureBindAddress} \ + --insecure-port=${toString cfg.insecurePort} \ + ${optionalString (cfg.runtimeConfig != "") + "--runtime-config=${cfg.runtimeConfig}"} \ + --secure-port=${toString cfg.securePort} \ + ${optionalString (cfg.serviceAccountKeyFile!=null) + "--service-account-key-file=${cfg.serviceAccountKeyFile}"} \ + --service-cluster-ip-range=${cfg.serviceClusterIpRange} \ + --storage-backend=${cfg.storageBackend} \ + ${optionalString (cfg.tlsCertFile != null) + "--tls-cert-file=${cfg.tlsCertFile}"} \ + ${optionalString (cfg.tlsKeyFile != null) + "--tls-private-key-file=${cfg.tlsKeyFile}"} \ + ${optionalString (cfg.tokenAuthFile != null) + "--token-auth-file=${cfg.tokenAuthFile}"} \ + ${optionalString (cfg.verbosity != null) "--v=${toString cfg.verbosity}"} \ + ${cfg.extraOpts} + ''; + WorkingDirectory = top.dataDir; + User = "kubernetes"; + Group = "kubernetes"; + AmbientCapabilities = "cap_net_bind_service"; + Restart = "on-failure"; + RestartSec = 5; + }; + }; + + services.etcd = { + clientCertAuth = mkDefault true; + peerClientCertAuth = mkDefault true; + listenClientUrls = mkDefault ["https://0.0.0.0:2379"]; + listenPeerUrls = mkDefault ["https://0.0.0.0:2380"]; + advertiseClientUrls = mkDefault ["https://${top.masterAddress}:2379"]; + initialCluster = mkDefault ["${top.masterAddress}=https://${top.masterAddress}:2380"]; + name = top.masterAddress; + initialAdvertisePeerUrls = mkDefault ["https://${top.masterAddress}:2380"]; + }; + + services.kubernetes.addonManager.bootstrapAddons = mkIf isRBACEnabled { + + apiserver-kubelet-api-admin-crb = { + apiVersion = "rbac.authorization.k8s.io/v1"; + kind = "ClusterRoleBinding"; + metadata = { + name = "system:kube-apiserver:kubelet-api-admin"; + }; + roleRef = { + apiGroup = "rbac.authorization.k8s.io"; + kind = "ClusterRole"; + name = "system:kubelet-api-admin"; + }; + subjects = [{ + kind = "User"; + name = "system:kube-apiserver"; + }]; + }; + + }; + + services.kubernetes.pki.certs = with top.lib; { + apiServer = mkCert { + name = "kube-apiserver"; + CN = "kubernetes"; + hosts = [ + "kubernetes.default.svc" + "kubernetes.default.svc.${top.addons.dns.clusterDomain}" + cfg.advertiseAddress + top.masterAddress + apiserverServiceIP + "127.0.0.1" + ] ++ cfg.extraSANs; + action = "systemctl restart kube-apiserver.service"; + }; + apiserverKubeletClient = mkCert { + name = "kube-apiserver-kubelet-client"; + CN = "system:kube-apiserver"; + action = "systemctl restart kube-apiserver.service"; + }; + apiserverEtcdClient = mkCert { + name = "kube-apiserver-etcd-client"; + CN = "etcd-client"; + action = "systemctl restart kube-apiserver.service"; + }; + clusterAdmin = mkCert { + name = "cluster-admin"; + CN = "cluster-admin"; + fields = { + O = "system:masters"; + }; + privateKeyOwner = "root"; + }; + etcd = mkCert { + name = "etcd"; + CN = top.masterAddress; + hosts = [ + "etcd.${top.addons.dns.clusterDomain}" + top.masterAddress + cfg.advertiseAddress + ]; + privateKeyOwner = "etcd"; + action = "systemctl restart etcd.service"; + }; + }; + + }) + + ]; + +} diff --git a/nixos/modules/services/cluster/kubernetes/controller-manager.nix b/nixos/modules/services/cluster/kubernetes/controller-manager.nix new file mode 100644 index 000000000000..dff97f144d55 --- /dev/null +++ b/nixos/modules/services/cluster/kubernetes/controller-manager.nix @@ -0,0 +1,162 @@ +{ config, lib, pkgs, ... }: + +with lib; + +let + top = config.services.kubernetes; + cfg = top.controllerManager; +in +{ + ###### interface + options.services.kubernetes.controllerManager = with lib.types; { + + allocateNodeCIDRs = mkOption { + description = "Whether to automatically allocate CIDR ranges for cluster nodes."; + default = true; + type = bool; + }; + + bindAddress = mkOption { + description = "Kubernetes controller manager listening address."; + default = "127.0.0.1"; + type = str; + }; + + clusterCidr = mkOption { + description = "Kubernetes CIDR Range for Pods in cluster."; + default = top.clusterCidr; + type = str; + }; + + enable = mkEnableOption "Kubernetes controller manager."; + + extraOpts = mkOption { + description = "Kubernetes controller manager extra command line options."; + default = ""; + type = str; + }; + + featureGates = mkOption { + description = "List set of feature gates"; + default = top.featureGates; + type = listOf str; + }; + + insecurePort = mkOption { + description = "Kubernetes controller manager insecure listening port."; + default = 0; + type = int; + }; + + kubeconfig = top.lib.mkKubeConfigOptions "Kubernetes controller manager"; + + leaderElect = mkOption { + description = "Whether to start leader election before executing main loop."; + type = bool; + default = true; + }; + + rootCaFile = mkOption { + description = '' + Kubernetes controller manager certificate authority file included in + service account's token secret. + ''; + default = top.caFile; + type = nullOr path; + }; + + securePort = mkOption { + description = "Kubernetes controller manager secure listening port."; + default = 10252; + type = int; + }; + + serviceAccountKeyFile = mkOption { + description = '' + Kubernetes controller manager PEM-encoded private RSA key file used to + sign service account tokens + ''; + default = null; + type = nullOr path; + }; + + tlsCertFile = mkOption { + description = "Kubernetes controller-manager certificate file."; + default = null; + type = nullOr path; + }; + + tlsKeyFile = mkOption { + description = "Kubernetes controller-manager private key file."; + default = null; + type = nullOr path; + }; + + verbosity = mkOption { + description = '' + Optional glog verbosity level for logging statements. See + + ''; + default = null; + type = nullOr int; + }; + + }; + + ###### implementation + config = mkIf cfg.enable { + systemd.services.kube-controller-manager = { + description = "Kubernetes Controller Manager Service"; + wantedBy = [ "kubernetes.target" ]; + after = [ "kube-apiserver.service" ]; + serviceConfig = { + RestartSec = "30s"; + Restart = "on-failure"; + Slice = "kubernetes.slice"; + ExecStart = ''${top.package}/bin/kube-controller-manager \ + --allocate-node-cidrs=${boolToString cfg.allocateNodeCIDRs} \ + --bind-address=${cfg.bindAddress} \ + ${optionalString (cfg.clusterCidr!=null) + "--cluster-cidr=${cfg.clusterCidr}"} \ + ${optionalString (cfg.featureGates != []) + "--feature-gates=${concatMapStringsSep "," (feature: "${feature}=true") cfg.featureGates}"} \ + --kubeconfig=${top.lib.mkKubeConfig "kube-controller-manager" cfg.kubeconfig} \ + --leader-elect=${boolToString cfg.leaderElect} \ + ${optionalString (cfg.rootCaFile!=null) + "--root-ca-file=${cfg.rootCaFile}"} \ + --port=${toString cfg.insecurePort} \ + --secure-port=${toString cfg.securePort} \ + ${optionalString (cfg.serviceAccountKeyFile!=null) + "--service-account-private-key-file=${cfg.serviceAccountKeyFile}"} \ + ${optionalString (cfg.tlsCertFile!=null) + "--tls-cert-file=${cfg.tlsCertFile}"} \ + ${optionalString (cfg.tlsKeyFile!=null) + "--tls-key-file=${cfg.tlsKeyFile}"} \ + ${optionalString (elem "RBAC" top.apiserver.authorizationMode) + "--use-service-account-credentials"} \ + ${optionalString (cfg.verbosity != null) "--v=${toString cfg.verbosity}"} \ + ${cfg.extraOpts} + ''; + WorkingDirectory = top.dataDir; + User = "kubernetes"; + Group = "kubernetes"; + }; + path = top.path; + }; + + services.kubernetes.pki.certs = with top.lib; { + controllerManager = mkCert { + name = "kube-controller-manager"; + CN = "kube-controller-manager"; + action = "systemctl restart kube-controller-manager.service"; + }; + controllerManagerClient = mkCert { + name = "kube-controller-manager-client"; + CN = "system:kube-controller-manager"; + action = "systemctl restart kube-controller-manager.service"; + }; + }; + + services.kubernetes.controllerManager.kubeconfig.server = mkDefault top.apiserverAddress; + }; +} diff --git a/nixos/modules/services/cluster/kubernetes/default.nix b/nixos/modules/services/cluster/kubernetes/default.nix index 6f3c45b29bf2..a5a59f4a5fc9 100644 --- a/nixos/modules/services/cluster/kubernetes/default.nix +++ b/nixos/modules/services/cluster/kubernetes/default.nix @@ -5,74 +5,52 @@ with lib; let cfg = config.services.kubernetes; - # YAML config; see: - # https://kubernetes.io/docs/tasks/administer-cluster/kubelet-config-file/ - # https://github.com/kubernetes/kubernetes/blob/release-1.10/pkg/kubelet/apis/kubeletconfig/v1beta1/types.go - # - # TODO: migrate the following flags to this config file - # - # --pod-manifest-path - # --address - # --port - # --tls-cert-file - # --tls-private-key-file - # --client-ca-file - # --authentication-token-webhook - # --authentication-token-webhook-cache-ttl - # --authorization-mode - # --healthz-bind-address - # --healthz-port - # --allow-privileged - # --cluster-dns - # --cluster-domain - # --hairpin-mode - # --feature-gates - kubeletConfig = pkgs.runCommand "kubelet-config.yaml" { } '' - echo > $out ${pkgs.lib.escapeShellArg (builtins.toJSON { - kind = "KubeletConfiguration"; - apiVersion = "kubelet.config.k8s.io/v1beta1"; - ${if cfg.kubelet.applyManifests then "staticPodPath" else null} = - manifests; - })} - ''; - - infraContainer = pkgs.dockerTools.buildImage { - name = "pause"; - tag = "latest"; - contents = cfg.package.pause; - config.Cmd = "/bin/pause"; - }; - - mkKubeConfig = name: cfg: pkgs.writeText "${name}-kubeconfig" (builtins.toJSON { + mkKubeConfig = name: conf: pkgs.writeText "${name}-kubeconfig" (builtins.toJSON { apiVersion = "v1"; kind = "Config"; clusters = [{ name = "local"; cluster.certificate-authority = cfg.caFile; - cluster.server = cfg.server; + cluster.server = conf.server; }]; users = [{ - name = "kubelet"; + inherit name; user = { - client-certificate = cfg.certFile; - client-key = cfg.keyFile; + client-certificate = conf.certFile; + client-key = conf.keyFile; }; }]; contexts = [{ context = { cluster = "local"; - user = "kubelet"; + user = name; }; - current-context = "kubelet-context"; + current-context = "local"; }]; }); + caCert = secret "ca"; + + etcdEndpoints = ["https://${cfg.masterAddress}:2379"]; + + mkCert = { name, CN, hosts ? [], fields ? {}, action ? "", + privateKeyOwner ? "kubernetes" }: rec { + inherit name caCert CN hosts fields action; + cert = secret name; + key = secret "${name}-key"; + privateKeyOptions = { + owner = privateKeyOwner; + group = "nogroup"; + mode = "0600"; + path = key; + }; + }; + + secret = name: "${cfg.secretsPath}/${name}.pem"; + mkKubeConfigOptions = prefix: { server = mkOption { description = "${prefix} kube-apiserver server address."; - default = "http://${if cfg.apiserver.advertiseAddress != null - then cfg.apiserver.advertiseAddress - else "127.0.0.1"}:${toString cfg.apiserver.port}"; type = types.str; }; @@ -101,66 +79,6 @@ let certFile = mkDefault cfg.kubeconfig.certFile; keyFile = mkDefault cfg.kubeconfig.keyFile; }; - - cniConfig = - if cfg.kubelet.cni.config != [] && !(isNull cfg.kubelet.cni.configDir) then - throw "Verbatim CNI-config and CNI configDir cannot both be set." - else if !(isNull cfg.kubelet.cni.configDir) then - cfg.kubelet.cni.configDir - else - (pkgs.buildEnv { - name = "kubernetes-cni-config"; - paths = imap (i: entry: - pkgs.writeTextDir "${toString (10+i)}-${entry.type}.conf" (builtins.toJSON entry) - ) cfg.kubelet.cni.config; - }); - - manifests = pkgs.buildEnv { - name = "kubernetes-manifests"; - paths = mapAttrsToList (name: manifest: - pkgs.writeTextDir "${name}.json" (builtins.toJSON manifest) - ) cfg.kubelet.manifests; - }; - - addons = pkgs.runCommand "kubernetes-addons" { } '' - mkdir -p $out - # since we are mounting the addons to the addon manager, they need to be copied - ${concatMapStringsSep ";" (a: "cp -v ${a}/* $out/") (mapAttrsToList (name: addon: - pkgs.writeTextDir "${name}.json" (builtins.toJSON addon) - ) (cfg.addonManager.addons))} - ''; - - taintOptions = { name, ... }: { - options = { - key = mkOption { - description = "Key of taint."; - default = name; - type = types.str; - }; - value = mkOption { - description = "Value of taint."; - type = types.str; - }; - effect = mkOption { - description = "Effect of taint."; - example = "NoSchedule"; - type = types.enum ["NoSchedule" "PreferNoSchedule" "NoExecute"]; - }; - }; - }; - - taints = concatMapStringsSep "," (v: "${v.key}=${v.value}:${v.effect}") (mapAttrsToList (n: v: v) cfg.kubelet.taints); - - # needed for flannel to pass options to docker - mkDockerOpts = pkgs.runCommand "mk-docker-opts" { - buildInputs = [ pkgs.makeWrapper ]; - } '' - mkdir -p $out - cp ${pkgs.kubernetes.src}/cluster/centos/node/bin/mk-docker-opts.sh $out/mk-docker-opts.sh - - # bashInteractive needed for `compgen` - makeWrapper ${pkgs.bashInteractive}/bin/bash $out/mk-docker-opts --add-flags "$out/mk-docker-opts.sh" - ''; in { ###### interface @@ -170,8 +88,9 @@ in { description = '' Kubernetes role that this machine should take. - Master role will enable etcd, apiserver, scheduler and controller manager - services. Node role will enable etcd, docker, kubelet and proxy services. + Master role will enable etcd, apiserver, scheduler, controller manager + addon manager, flannel and proxy services. + Node role will enable flannel, docker, kubelet and proxy services. ''; default = []; type = types.listOf (types.enum ["master" "node"]); @@ -184,40 +103,17 @@ in { defaultText = "pkgs.kubernetes"; }; - verbose = mkOption { - description = "Kubernetes enable verbose mode for debugging."; - default = false; - type = types.bool; - }; - - etcd = { - servers = mkOption { - description = "List of etcd servers. By default etcd is started, except if this option is changed."; - default = ["http://127.0.0.1:2379"]; - type = types.listOf types.str; - }; - - keyFile = mkOption { - description = "Etcd key file."; - default = null; - type = types.nullOr types.path; - }; - - certFile = mkOption { - description = "Etcd cert file."; - default = null; - type = types.nullOr types.path; - }; - - caFile = mkOption { - description = "Etcd ca file."; - default = cfg.caFile; - type = types.nullOr types.path; - }; - }; - kubeconfig = mkKubeConfigOptions "Default kubeconfig"; + apiserverAddress = mkOption { + description = '' + Clusterwide accessible address for the kubernetes apiserver, + including protocol and optional port. + ''; + example = "https://kubernetes-apiserver.example.com:6443"; + type = types.str; + }; + caFile = mkOption { description = "Default kubernetes certificate authority"; type = types.nullOr types.path; @@ -230,549 +126,22 @@ in { type = types.path; }; + easyCerts = mkOption { + description = "Automatically setup x509 certificates and keys for the entire cluster."; + default = false; + type = types.bool; + }; + featureGates = mkOption { - description = "List set of feature gates"; + description = "List set of feature gates."; default = []; type = types.listOf types.str; }; - apiserver = { - enable = mkOption { - description = "Whether to enable Kubernetes apiserver."; - default = false; - type = types.bool; - }; - - featureGates = mkOption { - description = "List set of feature gates"; - default = cfg.featureGates; - type = types.listOf types.str; - }; - - bindAddress = mkOption { - description = '' - The IP address on which to listen for the --secure-port port. - The associated interface(s) must be reachable by the rest - of the cluster, and by CLI/web clients. - ''; - default = "0.0.0.0"; - type = types.str; - }; - - advertiseAddress = mkOption { - description = '' - Kubernetes apiserver IP address on which to advertise the apiserver - to members of the cluster. This address must be reachable by the rest - of the cluster. - ''; - default = null; - type = types.nullOr types.str; - }; - - storageBackend = mkOption { - description = '' - Kubernetes apiserver storage backend. - ''; - default = "etcd3"; - type = types.enum ["etcd2" "etcd3"]; - }; - - port = mkOption { - description = "Kubernetes apiserver listening port."; - default = 8080; - type = types.int; - }; - - securePort = mkOption { - description = "Kubernetes apiserver secure port."; - default = 443; - type = types.int; - }; - - tlsCertFile = mkOption { - description = "Kubernetes apiserver certificate file."; - default = null; - type = types.nullOr types.path; - }; - - tlsKeyFile = mkOption { - description = "Kubernetes apiserver private key file."; - default = null; - type = types.nullOr types.path; - }; - - clientCaFile = mkOption { - description = "Kubernetes apiserver CA file for client auth."; - default = cfg.caFile; - type = types.nullOr types.path; - }; - - tokenAuthFile = mkOption { - description = '' - Kubernetes apiserver token authentication file. See - - ''; - default = null; - type = types.nullOr types.path; - }; - - basicAuthFile = mkOption { - description = '' - Kubernetes apiserver basic authentication file. See - - ''; - default = pkgs.writeText "users" '' - kubernetes,admin,0 - ''; - type = types.nullOr types.path; - }; - - authorizationMode = mkOption { - description = '' - Kubernetes apiserver authorization mode (AlwaysAllow/AlwaysDeny/ABAC/Webhook/RBAC/Node). See - - ''; - default = ["RBAC" "Node"]; - type = types.listOf (types.enum ["AlwaysAllow" "AlwaysDeny" "ABAC" "Webhook" "RBAC" "Node"]); - }; - - authorizationPolicy = mkOption { - description = '' - Kubernetes apiserver authorization policy file. See - - ''; - default = []; - type = types.listOf types.attrs; - }; - - webhookConfig = mkOption { - description = '' - Kubernetes apiserver Webhook config file. It uses the kubeconfig file format. - See - ''; - default = null; - type = types.nullOr types.path; - }; - - allowPrivileged = mkOption { - description = "Whether to allow privileged containers on Kubernetes."; - default = true; - type = types.bool; - }; - - serviceClusterIpRange = mkOption { - description = '' - A CIDR notation IP range from which to assign service cluster IPs. - This must not overlap with any IP ranges assigned to nodes for pods. - ''; - default = "10.0.0.0/24"; - type = types.str; - }; - - runtimeConfig = mkOption { - description = '' - Api runtime configuration. See - - ''; - default = "authentication.k8s.io/v1beta1=true"; - example = "api/all=false,api/v1=true"; - type = types.str; - }; - - enableAdmissionPlugins = mkOption { - description = '' - Kubernetes admission control plugins to enable. See - - ''; - default = ["NamespaceLifecycle" "LimitRanger" "ServiceAccount" "ResourceQuota" "DefaultStorageClass" "DefaultTolerationSeconds" "NodeRestriction"]; - example = [ - "NamespaceLifecycle" "NamespaceExists" "LimitRanger" - "SecurityContextDeny" "ServiceAccount" "ResourceQuota" - "PodSecurityPolicy" "NodeRestriction" "DefaultStorageClass" - ]; - type = types.listOf types.str; - }; - - disableAdmissionPlugins = mkOption { - description = '' - Kubernetes admission control plugins to disable. See - - ''; - default = []; - type = types.listOf types.str; - }; - - serviceAccountKeyFile = mkOption { - description = '' - Kubernetes apiserver PEM-encoded x509 RSA private or public key file, - used to verify ServiceAccount tokens. By default tls private key file - is used. - ''; - default = null; - type = types.nullOr types.path; - }; - - kubeletClientCaFile = mkOption { - description = "Path to a cert file for connecting to kubelet."; - default = cfg.caFile; - type = types.nullOr types.path; - }; - - kubeletClientCertFile = mkOption { - description = "Client certificate to use for connections to kubelet."; - default = null; - type = types.nullOr types.path; - }; - - kubeletClientKeyFile = mkOption { - description = "Key to use for connections to kubelet."; - default = null; - type = types.nullOr types.path; - }; - - kubeletHttps = mkOption { - description = "Whether to use https for connections to kubelet."; - default = true; - type = types.bool; - }; - - extraOpts = mkOption { - description = "Kubernetes apiserver extra command line options."; - default = ""; - type = types.str; - }; - }; - - scheduler = { - enable = mkOption { - description = "Whether to enable Kubernetes scheduler."; - default = false; - type = types.bool; - }; - - featureGates = mkOption { - description = "List set of feature gates"; - default = cfg.featureGates; - type = types.listOf types.str; - }; - - address = mkOption { - description = "Kubernetes scheduler listening address."; - default = "127.0.0.1"; - type = types.str; - }; - - port = mkOption { - description = "Kubernetes scheduler listening port."; - default = 10251; - type = types.int; - }; - - leaderElect = mkOption { - description = "Whether to start leader election before executing main loop."; - type = types.bool; - default = true; - }; - - kubeconfig = mkKubeConfigOptions "Kubernetes scheduler"; - - extraOpts = mkOption { - description = "Kubernetes scheduler extra command line options."; - default = ""; - type = types.str; - }; - }; - - controllerManager = { - enable = mkOption { - description = "Whether to enable Kubernetes controller manager."; - default = false; - type = types.bool; - }; - - featureGates = mkOption { - description = "List set of feature gates"; - default = cfg.featureGates; - type = types.listOf types.str; - }; - - address = mkOption { - description = "Kubernetes controller manager listening address."; - default = "127.0.0.1"; - type = types.str; - }; - - port = mkOption { - description = "Kubernetes controller manager listening port."; - default = 10252; - type = types.int; - }; - - leaderElect = mkOption { - description = "Whether to start leader election before executing main loop."; - type = types.bool; - default = true; - }; - - serviceAccountKeyFile = mkOption { - description = '' - Kubernetes controller manager PEM-encoded private RSA key file used to - sign service account tokens - ''; - default = null; - type = types.nullOr types.path; - }; - - rootCaFile = mkOption { - description = '' - Kubernetes controller manager certificate authority file included in - service account's token secret. - ''; - default = cfg.caFile; - type = types.nullOr types.path; - }; - - kubeconfig = mkKubeConfigOptions "Kubernetes controller manager"; - - extraOpts = mkOption { - description = "Kubernetes controller manager extra command line options."; - default = ""; - type = types.str; - }; - }; - - kubelet = { - enable = mkOption { - description = "Whether to enable Kubernetes kubelet."; - default = false; - type = types.bool; - }; - - featureGates = mkOption { - description = "List set of feature gates"; - default = cfg.featureGates; - type = types.listOf types.str; - }; - - seedDockerImages = mkOption { - description = "List of docker images to preload on system"; - default = []; - type = types.listOf types.package; - }; - - registerNode = mkOption { - description = "Whether to auto register kubelet with API server."; - default = true; - type = types.bool; - }; - - address = mkOption { - description = "Kubernetes kubelet info server listening address."; - default = "0.0.0.0"; - type = types.str; - }; - - port = mkOption { - description = "Kubernetes kubelet info server listening port."; - default = 10250; - type = types.int; - }; - - tlsCertFile = mkOption { - description = "File containing x509 Certificate for HTTPS."; - default = null; - type = types.nullOr types.path; - }; - - tlsKeyFile = mkOption { - description = "File containing x509 private key matching tlsCertFile."; - default = null; - type = types.nullOr types.path; - }; - - clientCaFile = mkOption { - description = "Kubernetes apiserver CA file for client authentication."; - default = cfg.caFile; - type = types.nullOr types.path; - }; - - healthz = { - bind = mkOption { - description = "Kubernetes kubelet healthz listening address."; - default = "127.0.0.1"; - type = types.str; - }; - - port = mkOption { - description = "Kubernetes kubelet healthz port."; - default = 10248; - type = types.int; - }; - }; - - hostname = mkOption { - description = "Kubernetes kubelet hostname override."; - default = config.networking.hostName; - type = types.str; - }; - - allowPrivileged = mkOption { - description = "Whether to allow Kubernetes containers to request privileged mode."; - default = true; - type = types.bool; - }; - - clusterDns = mkOption { - description = "Use alternative DNS."; - default = "10.1.0.1"; - type = types.str; - }; - - clusterDomain = mkOption { - description = "Use alternative domain."; - default = config.services.kubernetes.addons.dns.clusterDomain; - type = types.str; - }; - - networkPlugin = mkOption { - description = "Network plugin to use by Kubernetes."; - type = types.nullOr (types.enum ["cni" "kubenet"]); - default = "kubenet"; - }; - - cni = { - packages = mkOption { - description = "List of network plugin packages to install."; - type = types.listOf types.package; - default = []; - }; - - config = mkOption { - description = "Kubernetes CNI configuration."; - type = types.listOf types.attrs; - default = []; - example = literalExample '' - [{ - "cniVersion": "0.2.0", - "name": "mynet", - "type": "bridge", - "bridge": "cni0", - "isGateway": true, - "ipMasq": true, - "ipam": { - "type": "host-local", - "subnet": "10.22.0.0/16", - "routes": [ - { "dst": "0.0.0.0/0" } - ] - } - } { - "cniVersion": "0.2.0", - "type": "loopback" - }] - ''; - }; - - configDir = mkOption { - description = "Path to Kubernetes CNI configuration directory."; - type = types.nullOr types.path; - default = null; - }; - }; - - manifests = mkOption { - description = "List of manifests to bootstrap with kubelet (only pods can be created as manifest entry)"; - type = types.attrsOf types.attrs; - default = {}; - }; - - applyManifests = mkOption { - description = "Whether to apply manifests (this is true for master node)."; - default = false; - type = types.bool; - }; - - unschedulable = mkOption { - description = "Whether to set node taint to unschedulable=true as it is the case of node that has only master role."; - default = false; - type = types.bool; - }; - - taints = mkOption { - description = "Node taints (https://kubernetes.io/docs/concepts/configuration/assign-pod-node/)."; - default = {}; - type = types.attrsOf (types.submodule [ taintOptions ]); - }; - - nodeIp = mkOption { - description = "IP address of the node. If set, kubelet will use this IP address for the node."; - default = null; - type = types.nullOr types.str; - }; - - kubeconfig = mkKubeConfigOptions "Kubelet"; - - extraOpts = mkOption { - description = "Kubernetes kubelet extra command line options."; - default = ""; - type = types.str; - }; - }; - - proxy = { - enable = mkOption { - description = "Whether to enable Kubernetes proxy."; - default = false; - type = types.bool; - }; - - featureGates = mkOption { - description = "List set of feature gates"; - default = cfg.featureGates; - type = types.listOf types.str; - }; - - address = mkOption { - description = "Kubernetes proxy listening address."; - default = "0.0.0.0"; - type = types.str; - }; - - kubeconfig = mkKubeConfigOptions "Kubernetes proxy"; - - extraOpts = mkOption { - description = "Kubernetes proxy extra command line options."; - default = ""; - type = types.str; - }; - }; - - addonManager = { - enable = mkOption { - description = "Whether to enable Kubernetes addon manager."; - default = false; - type = types.bool; - }; - - addons = mkOption { - description = "Kubernetes addons (any kind of Kubernetes resource can be an addon)."; - default = { }; - type = types.attrsOf (types.either types.attrs (types.listOf types.attrs)); - example = literalExample '' - { - "my-service" = { - "apiVersion" = "v1"; - "kind" = "Service"; - "metadata" = { - "name" = "my-service"; - "namespace" = "default"; - }; - "spec" = { ... }; - }; - } - // import { cfg = config.services.kubernetes; }; - ''; - }; + masterAddress = mkOption { + description = "Clusterwide available network address or hostname for the kubernetes master server."; + example = "master.example.com"; + type = types.str; }; path = mkOption { @@ -787,304 +156,65 @@ in { type = types.nullOr types.str; }; - flannel.enable = mkOption { - description = "Whether to enable flannel networking"; - default = false; - type = types.bool; + lib = mkOption { + description = "Common functions for the kubernetes modules."; + default = { + inherit mkCert; + inherit mkKubeConfig; + inherit mkKubeConfigOptions; + }; + type = types.attrs; }; + secretsPath = mkOption { + description = "Default location for kubernetes secrets. Not a store location."; + type = types.path; + default = cfg.dataDir + "/secrets"; + }; }; ###### implementation config = mkMerge [ - (mkIf cfg.kubelet.enable { - services.kubernetes.kubelet.seedDockerImages = [infraContainer]; - systemd.services.kubelet-bootstrap = { - description = "Boostrap Kubelet"; - wantedBy = ["kubernetes.target"]; - after = ["docker.service" "network.target"]; - path = with pkgs; [ docker ]; - script = '' - ${concatMapStrings (img: '' - echo "Seeding docker image: ${img}" - docker load <${img} - '') cfg.kubelet.seedDockerImages} - - rm /opt/cni/bin/* || true - ${concatMapStrings (package: '' - echo "Linking cni package: ${package}" - ln -fs ${package}/bin/* /opt/cni/bin - '') cfg.kubelet.cni.packages} - ''; - serviceConfig = { - Slice = "kubernetes.slice"; - Type = "oneshot"; - }; - }; - - systemd.services.kubelet = { - description = "Kubernetes Kubelet Service"; - wantedBy = [ "kubernetes.target" ]; - after = [ "network.target" "docker.service" "kube-apiserver.service" "kubelet-bootstrap.service" ]; - path = with pkgs; [ gitMinimal openssh docker utillinux iproute ethtool thin-provisioning-tools iptables socat ] ++ cfg.path; - serviceConfig = { - Slice = "kubernetes.slice"; - CPUAccounting = true; - MemoryAccounting = true; - ExecStart = ''${cfg.package}/bin/kubelet \ - ${optionalString (taints != "") - "--register-with-taints=${taints}"} \ - --kubeconfig=${mkKubeConfig "kubelet" cfg.kubelet.kubeconfig} \ - --config=${kubeletConfig} \ - --address=${cfg.kubelet.address} \ - --port=${toString cfg.kubelet.port} \ - --register-node=${boolToString cfg.kubelet.registerNode} \ - ${optionalString (cfg.kubelet.tlsCertFile != null) - "--tls-cert-file=${cfg.kubelet.tlsCertFile}"} \ - ${optionalString (cfg.kubelet.tlsKeyFile != null) - "--tls-private-key-file=${cfg.kubelet.tlsKeyFile}"} \ - ${optionalString (cfg.kubelet.clientCaFile != null) - "--client-ca-file=${cfg.kubelet.clientCaFile}"} \ - --authentication-token-webhook \ - --authentication-token-webhook-cache-ttl="10s" \ - --authorization-mode=Webhook \ - --healthz-bind-address=${cfg.kubelet.healthz.bind} \ - --healthz-port=${toString cfg.kubelet.healthz.port} \ - --hostname-override=${cfg.kubelet.hostname} \ - --allow-privileged=${boolToString cfg.kubelet.allowPrivileged} \ - --root-dir=${cfg.dataDir} \ - ${optionalString (cfg.kubelet.clusterDns != "") - "--cluster-dns=${cfg.kubelet.clusterDns}"} \ - ${optionalString (cfg.kubelet.clusterDomain != "") - "--cluster-domain=${cfg.kubelet.clusterDomain}"} \ - --pod-infra-container-image=pause \ - ${optionalString (cfg.kubelet.networkPlugin != null) - "--network-plugin=${cfg.kubelet.networkPlugin}"} \ - --cni-conf-dir=${cniConfig} \ - --hairpin-mode=hairpin-veth \ - ${optionalString (cfg.kubelet.nodeIp != null) - "--node-ip=${cfg.kubelet.nodeIp}"} \ - ${optionalString (cfg.kubelet.featureGates != []) - "--feature-gates=${concatMapStringsSep "," (feature: "${feature}=true") cfg.kubelet.featureGates}"} \ - ${optionalString cfg.verbose "--v=6 --log_flush_frequency=1s"} \ - ${cfg.kubelet.extraOpts} - ''; - WorkingDirectory = cfg.dataDir; - }; - }; - - # Allways include cni plugins - services.kubernetes.kubelet.cni.packages = [pkgs.cni-plugins]; - - boot.kernelModules = ["br_netfilter"]; - - services.kubernetes.kubelet.kubeconfig = kubeConfigDefaults; + (mkIf cfg.easyCerts { + services.kubernetes.pki.enable = mkDefault true; + services.kubernetes.caFile = caCert; }) - (mkIf (cfg.kubelet.applyManifests && cfg.kubelet.enable) { - environment.etc = mapAttrs' (name: manifest: - nameValuePair "kubernetes/manifests/${name}.json" { - text = builtins.toJSON manifest; - mode = "0755"; - } - ) cfg.kubelet.manifests; - }) - - (mkIf (cfg.kubelet.unschedulable && cfg.kubelet.enable) { - services.kubernetes.kubelet.taints.unschedulable = { - value = "true"; - effect = "NoSchedule"; - }; - }) - - (mkIf cfg.apiserver.enable { - systemd.services.kube-apiserver = { - description = "Kubernetes APIServer Service"; - wantedBy = [ "kubernetes.target" ]; - after = [ "network.target" "docker.service" ]; - serviceConfig = { - Slice = "kubernetes.slice"; - ExecStart = ''${cfg.package}/bin/kube-apiserver \ - --etcd-servers=${concatStringsSep "," cfg.etcd.servers} \ - ${optionalString (cfg.etcd.caFile != null) - "--etcd-cafile=${cfg.etcd.caFile}"} \ - ${optionalString (cfg.etcd.certFile != null) - "--etcd-certfile=${cfg.etcd.certFile}"} \ - ${optionalString (cfg.etcd.keyFile != null) - "--etcd-keyfile=${cfg.etcd.keyFile}"} \ - --insecure-port=${toString cfg.apiserver.port} \ - --bind-address=${cfg.apiserver.bindAddress} \ - ${optionalString (cfg.apiserver.advertiseAddress != null) - "--advertise-address=${cfg.apiserver.advertiseAddress}"} \ - --allow-privileged=${boolToString cfg.apiserver.allowPrivileged}\ - ${optionalString (cfg.apiserver.tlsCertFile != null) - "--tls-cert-file=${cfg.apiserver.tlsCertFile}"} \ - ${optionalString (cfg.apiserver.tlsKeyFile != null) - "--tls-private-key-file=${cfg.apiserver.tlsKeyFile}"} \ - ${optionalString (cfg.apiserver.tokenAuthFile != null) - "--token-auth-file=${cfg.apiserver.tokenAuthFile}"} \ - ${optionalString (cfg.apiserver.basicAuthFile != null) - "--basic-auth-file=${cfg.apiserver.basicAuthFile}"} \ - --kubelet-https=${if cfg.apiserver.kubeletHttps then "true" else "false"} \ - ${optionalString (cfg.apiserver.kubeletClientCaFile != null) - "--kubelet-certificate-authority=${cfg.apiserver.kubeletClientCaFile}"} \ - ${optionalString (cfg.apiserver.kubeletClientCertFile != null) - "--kubelet-client-certificate=${cfg.apiserver.kubeletClientCertFile}"} \ - ${optionalString (cfg.apiserver.kubeletClientKeyFile != null) - "--kubelet-client-key=${cfg.apiserver.kubeletClientKeyFile}"} \ - ${optionalString (cfg.apiserver.clientCaFile != null) - "--client-ca-file=${cfg.apiserver.clientCaFile}"} \ - --authorization-mode=${concatStringsSep "," cfg.apiserver.authorizationMode} \ - ${optionalString (elem "ABAC" cfg.apiserver.authorizationMode) - "--authorization-policy-file=${ - pkgs.writeText "kube-auth-policy.jsonl" - (concatMapStringsSep "\n" (l: builtins.toJSON l) cfg.apiserver.authorizationPolicy) - }" - } \ - ${optionalString (elem "Webhook" cfg.apiserver.authorizationMode) - "--authorization-webhook-config-file=${cfg.apiserver.webhookConfig}" - } \ - --secure-port=${toString cfg.apiserver.securePort} \ - --service-cluster-ip-range=${cfg.apiserver.serviceClusterIpRange} \ - ${optionalString (cfg.apiserver.runtimeConfig != "") - "--runtime-config=${cfg.apiserver.runtimeConfig}"} \ - --enable-admission-plugins=${concatStringsSep "," cfg.apiserver.enableAdmissionPlugins} \ - --disable-admission-plugins=${concatStringsSep "," cfg.apiserver.disableAdmissionPlugins} \ - ${optionalString (cfg.apiserver.serviceAccountKeyFile!=null) - "--service-account-key-file=${cfg.apiserver.serviceAccountKeyFile}"} \ - ${optionalString cfg.verbose "--v=6"} \ - ${optionalString cfg.verbose "--log-flush-frequency=1s"} \ - --storage-backend=${cfg.apiserver.storageBackend} \ - ${optionalString (cfg.kubelet.featureGates != []) - "--feature-gates=${concatMapStringsSep "," (feature: "${feature}=true") cfg.kubelet.featureGates}"} \ - ${cfg.apiserver.extraOpts} - ''; - WorkingDirectory = cfg.dataDir; - User = "kubernetes"; - Group = "kubernetes"; - AmbientCapabilities = "cap_net_bind_service"; - Restart = "on-failure"; - RestartSec = 5; - }; - }; - }) - - (mkIf cfg.scheduler.enable { - systemd.services.kube-scheduler = { - description = "Kubernetes Scheduler Service"; - wantedBy = [ "kubernetes.target" ]; - after = [ "kube-apiserver.service" ]; - serviceConfig = { - Slice = "kubernetes.slice"; - ExecStart = ''${cfg.package}/bin/kube-scheduler \ - --address=${cfg.scheduler.address} \ - --port=${toString cfg.scheduler.port} \ - --leader-elect=${boolToString cfg.scheduler.leaderElect} \ - --kubeconfig=${mkKubeConfig "kube-scheduler" cfg.scheduler.kubeconfig} \ - ${optionalString cfg.verbose "--v=6"} \ - ${optionalString cfg.verbose "--log-flush-frequency=1s"} \ - ${optionalString (cfg.scheduler.featureGates != []) - "--feature-gates=${concatMapStringsSep "," (feature: "${feature}=true") cfg.scheduler.featureGates}"} \ - ${cfg.scheduler.extraOpts} - ''; - WorkingDirectory = cfg.dataDir; - User = "kubernetes"; - Group = "kubernetes"; - }; - }; - - services.kubernetes.scheduler.kubeconfig = kubeConfigDefaults; - }) - - (mkIf cfg.controllerManager.enable { - systemd.services.kube-controller-manager = { - description = "Kubernetes Controller Manager Service"; - wantedBy = [ "kubernetes.target" ]; - after = [ "kube-apiserver.service" ]; - serviceConfig = { - RestartSec = "30s"; - Restart = "on-failure"; - Slice = "kubernetes.slice"; - ExecStart = ''${cfg.package}/bin/kube-controller-manager \ - --address=${cfg.controllerManager.address} \ - --port=${toString cfg.controllerManager.port} \ - --kubeconfig=${mkKubeConfig "kube-controller-manager" cfg.controllerManager.kubeconfig} \ - --leader-elect=${boolToString cfg.controllerManager.leaderElect} \ - ${if (cfg.controllerManager.serviceAccountKeyFile!=null) - then "--service-account-private-key-file=${cfg.controllerManager.serviceAccountKeyFile}" - else "--service-account-private-key-file=/var/run/kubernetes/apiserver.key"} \ - ${if (cfg.controllerManager.rootCaFile!=null) - then "--root-ca-file=${cfg.controllerManager.rootCaFile}" - else "--root-ca-file=/var/run/kubernetes/apiserver.crt"} \ - ${if (cfg.clusterCidr!=null) - then "--cluster-cidr=${cfg.clusterCidr} --allocate-node-cidrs=true" - else "--allocate-node-cidrs=false"} \ - ${optionalString (cfg.controllerManager.featureGates != []) - "--feature-gates=${concatMapStringsSep "," (feature: "${feature}=true") cfg.controllerManager.featureGates}"} \ - ${optionalString cfg.verbose "--v=6"} \ - ${optionalString cfg.verbose "--log-flush-frequency=1s"} \ - ${cfg.controllerManager.extraOpts} - ''; - WorkingDirectory = cfg.dataDir; - User = "kubernetes"; - Group = "kubernetes"; - }; - path = cfg.path; - }; - - services.kubernetes.controllerManager.kubeconfig = kubeConfigDefaults; - }) - - (mkIf cfg.proxy.enable { - systemd.services.kube-proxy = { - description = "Kubernetes Proxy Service"; - wantedBy = [ "kubernetes.target" ]; - after = [ "kube-apiserver.service" ]; - path = [pkgs.iptables pkgs.conntrack_tools]; - serviceConfig = { - Slice = "kubernetes.slice"; - ExecStart = ''${cfg.package}/bin/kube-proxy \ - --kubeconfig=${mkKubeConfig "kube-proxy" cfg.proxy.kubeconfig} \ - --bind-address=${cfg.proxy.address} \ - ${optionalString (cfg.proxy.featureGates != []) - "--feature-gates=${concatMapStringsSep "," (feature: "${feature}=true") cfg.proxy.featureGates}"} \ - ${optionalString cfg.verbose "--v=6"} \ - ${optionalString cfg.verbose "--log-flush-frequency=1s"} \ - ${optionalString (cfg.clusterCidr!=null) - "--cluster-cidr=${cfg.clusterCidr}"} \ - ${cfg.proxy.extraOpts} - ''; - WorkingDirectory = cfg.dataDir; - }; - }; - - # kube-proxy needs iptables - networking.firewall.enable = mkDefault true; - - services.kubernetes.proxy.kubeconfig = kubeConfigDefaults; - }) - - (mkIf (any (el: el == "master") cfg.roles) { - virtualisation.docker.enable = mkDefault true; - services.kubernetes.kubelet.enable = mkDefault true; - services.kubernetes.kubelet.allowPrivileged = mkDefault true; - services.kubernetes.kubelet.applyManifests = mkDefault true; + (mkIf (elem "master" cfg.roles) { services.kubernetes.apiserver.enable = mkDefault true; services.kubernetes.scheduler.enable = mkDefault true; services.kubernetes.controllerManager.enable = mkDefault true; - services.etcd.enable = mkDefault (cfg.etcd.servers == ["http://127.0.0.1:2379"]); services.kubernetes.addonManager.enable = mkDefault true; services.kubernetes.proxy.enable = mkDefault true; + services.etcd.enable = true; # Cannot mkDefault because of flannel default options }) - # if this node is only a master make it unschedulable by default + (mkIf (all (el: el == "master") cfg.roles) { + # if this node is only a master make it unschedulable by default services.kubernetes.kubelet.unschedulable = mkDefault true; }) - (mkIf (any (el: el == "node") cfg.roles) { + (mkIf (elem "node" cfg.roles) { + services.kubernetes.kubelet.enable = mkDefault true; + services.kubernetes.proxy.enable = mkDefault true; + }) + + # Using "services.kubernetes.roles" will automatically enable easyCerts and flannel + (mkIf (cfg.roles != []) { + services.kubernetes.flannel.enable = mkDefault true; + services.flannel.etcd.endpoints = mkDefault etcdEndpoints; + services.kubernetes.easyCerts = mkDefault true; + }) + + (mkIf cfg.apiserver.enable { + services.kubernetes.pki.etcClusterAdminKubeconfig = mkDefault "kubernetes/cluster-admin.kubeconfig"; + services.kubernetes.apiserver.etcd.servers = mkDefault etcdEndpoints; + }) + + (mkIf cfg.kubelet.enable { virtualisation.docker = { enable = mkDefault true; @@ -1094,26 +224,18 @@ in { # iptables must be disabled for kubernetes extraOptions = "--iptables=false --ip-masq=false"; }; - - services.kubernetes.kubelet.enable = mkDefault true; - services.kubernetes.proxy.enable = mkDefault true; }) - (mkIf cfg.addonManager.enable { - environment.etc."kubernetes/addons".source = "${addons}/"; - - systemd.services.kube-addon-manager = { - description = "Kubernetes addon manager"; - wantedBy = [ "kubernetes.target" ]; - after = [ "kube-apiserver.service" ]; - environment.ADDON_PATH = "/etc/kubernetes/addons/"; - path = [ pkgs.gawk ]; - serviceConfig = { - Slice = "kubernetes.slice"; - ExecStart = "${cfg.package}/bin/kube-addons"; - WorkingDirectory = cfg.dataDir; - User = "kubernetes"; - Group = "kubernetes"; + (mkIf (cfg.apiserver.enable || cfg.controllerManager.enable) { + services.kubernetes.pki.certs = { + serviceAccount = mkCert { + name = "service-account"; + CN = "system:service-account-signer"; + action = '' + systemctl reload \ + kube-apiserver.service \ + kube-controller-manager.service + ''; }; }; }) @@ -1123,7 +245,8 @@ in { cfg.scheduler.enable || cfg.controllerManager.enable || cfg.kubelet.enable || - cfg.proxy.enable + cfg.proxy.enable || + cfg.addonManager.enable ) { systemd.targets.kubernetes = { description = "Kubernetes"; @@ -1132,11 +255,10 @@ in { systemd.tmpfiles.rules = [ "d /opt/cni/bin 0755 root root -" - "d /var/run/kubernetes 0755 kubernetes kubernetes -" + "d /run/kubernetes 0755 kubernetes kubernetes -" "d /var/lib/kubernetes 0755 kubernetes kubernetes -" ]; - environment.systemPackages = [ cfg.package ]; users.users = singleton { name = "kubernetes"; uid = config.ids.uids.kubernetes; @@ -1148,53 +270,12 @@ in { }; users.groups.kubernetes.gid = config.ids.gids.kubernetes; - # dns addon is enabled by default + # dns addon is enabled by default services.kubernetes.addons.dns.enable = mkDefault true; - }) - (mkIf cfg.flannel.enable { - services.flannel = { - enable = mkDefault true; - network = mkDefault cfg.clusterCidr; - etcd = mkDefault { - endpoints = cfg.etcd.servers; - inherit (cfg.etcd) caFile certFile keyFile; - }; - }; - - services.kubernetes.kubelet = { - networkPlugin = mkDefault "cni"; - cni.config = mkDefault [{ - name = "mynet"; - type = "flannel"; - delegate = { - isDefaultGateway = true; - bridge = "docker0"; - }; - }]; - }; - - systemd.services."mk-docker-opts" = { - description = "Pre-Docker Actions"; - wantedBy = [ "flannel.service" ]; - before = [ "docker.service" ]; - after = [ "flannel.service" ]; - path = [ pkgs.gawk pkgs.gnugrep ]; - script = '' - mkdir -p /run/flannel - ${mkDockerOpts}/mk-docker-opts -d /run/flannel/docker - ''; - serviceConfig.Type = "oneshot"; - }; - systemd.services.docker.serviceConfig.EnvironmentFile = "/run/flannel/docker"; - - # read environment variables generated by mk-docker-opts - virtualisation.docker.extraOptions = "$DOCKER_OPTS"; - - networking.firewall.allowedUDPPorts = [ - 8285 # flannel udp - 8472 # flannel vxlan - ]; + services.kubernetes.apiserverAddress = mkDefault ("https://${if cfg.apiserver.advertiseAddress != null + then cfg.apiserver.advertiseAddress + else "${cfg.masterAddress}:${toString cfg.apiserver.securePort}"}"); }) ]; } diff --git a/nixos/modules/services/cluster/kubernetes/flannel.nix b/nixos/modules/services/cluster/kubernetes/flannel.nix new file mode 100644 index 000000000000..38dc1e2b47d4 --- /dev/null +++ b/nixos/modules/services/cluster/kubernetes/flannel.nix @@ -0,0 +1,79 @@ +{ config, lib, pkgs, ... }: + +with lib; + +let + top = config.services.kubernetes; + cfg = top.flannel; + + # needed for flannel to pass options to docker + mkDockerOpts = pkgs.runCommand "mk-docker-opts" { + buildInputs = [ pkgs.makeWrapper ]; + } '' + mkdir -p $out + cp ${pkgs.kubernetes.src}/cluster/centos/node/bin/mk-docker-opts.sh $out/mk-docker-opts.sh + + # bashInteractive needed for `compgen` + makeWrapper ${pkgs.bashInteractive}/bin/bash $out/mk-docker-opts --add-flags "$out/mk-docker-opts.sh" + ''; +in +{ + ###### interface + options.services.kubernetes.flannel = { + enable = mkEnableOption "enable flannel networking"; + }; + + ###### implementation + config = mkIf cfg.enable { + services.flannel = { + + enable = mkDefault true; + network = mkDefault top.clusterCidr; + }; + + services.kubernetes.kubelet = { + networkPlugin = mkDefault "cni"; + cni.config = mkDefault [{ + name = "mynet"; + type = "flannel"; + delegate = { + isDefaultGateway = true; + bridge = "docker0"; + }; + }]; + }; + + systemd.services."mk-docker-opts" = { + description = "Pre-Docker Actions"; + wantedBy = [ "flannel.service" ]; + before = [ "docker.service" ]; + after = [ "flannel.service" ]; + path = with pkgs; [ gawk gnugrep ]; + script = '' + mkdir -p /run/flannel + ${mkDockerOpts}/mk-docker-opts -d /run/flannel/docker + ''; + serviceConfig.Type = "oneshot"; + }; + systemd.services.docker.serviceConfig.EnvironmentFile = "/run/flannel/docker"; + + # read environment variables generated by mk-docker-opts + virtualisation.docker.extraOptions = "$DOCKER_OPTS"; + + networking = { + firewall.allowedUDPPorts = [ + 8285 # flannel udp + 8472 # flannel vxlan + ]; + dhcpcd.denyInterfaces = [ "docker*" "flannel*" ]; + }; + + services.kubernetes.pki.certs = { + flannelEtcdClient = top.lib.mkCert { + name = "flannel-etcd-client"; + CN = "flannel-etcd-client"; + action = "systemctl restart flannel.service"; + }; + }; + }; +} diff --git a/nixos/modules/services/cluster/kubernetes/kubelet.nix b/nixos/modules/services/cluster/kubernetes/kubelet.nix new file mode 100644 index 000000000000..51d1fd30959b --- /dev/null +++ b/nixos/modules/services/cluster/kubernetes/kubelet.nix @@ -0,0 +1,367 @@ +{ config, lib, pkgs, ... }: + +with lib; + +let + top = config.services.kubernetes; + cfg = top.kubelet; + + cniConfig = + if cfg.cni.config != [] && !(isNull cfg.cni.configDir) then + throw "Verbatim CNI-config and CNI configDir cannot both be set." + else if !(isNull cfg.cni.configDir) then + cfg.cni.configDir + else + (pkgs.buildEnv { + name = "kubernetes-cni-config"; + paths = imap (i: entry: + pkgs.writeTextDir "${toString (10+i)}-${entry.type}.conf" (builtins.toJSON entry) + ) cfg.cni.config; + }); + + infraContainer = pkgs.dockerTools.buildImage { + name = "pause"; + tag = "latest"; + contents = top.package.pause; + config.Cmd = "/bin/pause"; + }; + + kubeconfig = top.lib.mkKubeConfig "kubelet" cfg.kubeconfig; + + manifests = pkgs.buildEnv { + name = "kubernetes-manifests"; + paths = mapAttrsToList (name: manifest: + pkgs.writeTextDir "${name}.json" (builtins.toJSON manifest) + ) cfg.manifests; + }; + + manifestPath = "kubernetes/manifests"; + + taintOptions = with lib.types; { name, ... }: { + options = { + key = mkOption { + description = "Key of taint."; + default = name; + type = str; + }; + value = mkOption { + description = "Value of taint."; + type = str; + }; + effect = mkOption { + description = "Effect of taint."; + example = "NoSchedule"; + type = enum ["NoSchedule" "PreferNoSchedule" "NoExecute"]; + }; + }; + }; + + taints = concatMapStringsSep "," (v: "${v.key}=${v.value}:${v.effect}") (mapAttrsToList (n: v: v) cfg.taints); +in +{ + ###### interface + options.services.kubernetes.kubelet = with lib.types; { + + address = mkOption { + description = "Kubernetes kubelet info server listening address."; + default = "0.0.0.0"; + type = str; + }; + + allowPrivileged = mkOption { + description = "Whether to allow Kubernetes containers to request privileged mode."; + default = false; + type = bool; + }; + + clusterDns = mkOption { + description = "Use alternative DNS."; + default = "10.1.0.1"; + type = str; + }; + + clusterDomain = mkOption { + description = "Use alternative domain."; + default = config.services.kubernetes.addons.dns.clusterDomain; + type = str; + }; + + clientCaFile = mkOption { + description = "Kubernetes apiserver CA file for client authentication."; + default = top.caFile; + type = nullOr path; + }; + + cni = { + packages = mkOption { + description = "List of network plugin packages to install."; + type = listOf package; + default = []; + }; + + config = mkOption { + description = "Kubernetes CNI configuration."; + type = listOf attrs; + default = []; + example = literalExample '' + [{ + "cniVersion": "0.2.0", + "name": "mynet", + "type": "bridge", + "bridge": "cni0", + "isGateway": true, + "ipMasq": true, + "ipam": { + "type": "host-local", + "subnet": "10.22.0.0/16", + "routes": [ + { "dst": "0.0.0.0/0" } + ] + } + } { + "cniVersion": "0.2.0", + "type": "loopback" + }] + ''; + }; + + configDir = mkOption { + description = "Path to Kubernetes CNI configuration directory."; + type = nullOr path; + default = null; + }; + }; + + enable = mkEnableOption "Kubernetes kubelet."; + + extraOpts = mkOption { + description = "Kubernetes kubelet extra command line options."; + default = ""; + type = str; + }; + + featureGates = mkOption { + description = "List set of feature gates"; + default = top.featureGates; + type = listOf str; + }; + + healthz = { + bind = mkOption { + description = "Kubernetes kubelet healthz listening address."; + default = "127.0.0.1"; + type = str; + }; + + port = mkOption { + description = "Kubernetes kubelet healthz port."; + default = 10248; + type = int; + }; + }; + + hostname = mkOption { + description = "Kubernetes kubelet hostname override."; + default = config.networking.hostName; + type = str; + }; + + kubeconfig = top.lib.mkKubeConfigOptions "Kubelet"; + + manifests = mkOption { + description = "List of manifests to bootstrap with kubelet (only pods can be created as manifest entry)"; + type = attrsOf attrs; + default = {}; + }; + + networkPlugin = mkOption { + description = "Network plugin to use by Kubernetes."; + type = nullOr (enum ["cni" "kubenet"]); + default = "kubenet"; + }; + + nodeIp = mkOption { + description = "IP address of the node. If set, kubelet will use this IP address for the node."; + default = null; + type = nullOr str; + }; + + registerNode = mkOption { + description = "Whether to auto register kubelet with API server."; + default = true; + type = bool; + }; + + port = mkOption { + description = "Kubernetes kubelet info server listening port."; + default = 10250; + type = int; + }; + + seedDockerImages = mkOption { + description = "List of docker images to preload on system"; + default = []; + type = listOf package; + }; + + taints = mkOption { + description = "Node taints (https://kubernetes.io/docs/concepts/configuration/assign-pod-node/)."; + default = {}; + type = attrsOf (submodule [ taintOptions ]); + }; + + tlsCertFile = mkOption { + description = "File containing x509 Certificate for HTTPS."; + default = null; + type = nullOr path; + }; + + tlsKeyFile = mkOption { + description = "File containing x509 private key matching tlsCertFile."; + default = null; + type = nullOr path; + }; + + unschedulable = mkOption { + description = "Whether to set node taint to unschedulable=true as it is the case of node that has only master role."; + default = false; + type = bool; + }; + + verbosity = mkOption { + description = '' + Optional glog verbosity level for logging statements. See + + ''; + default = null; + type = nullOr int; + }; + + }; + + ###### implementation + config = mkMerge [ + (mkIf cfg.enable { + services.kubernetes.kubelet.seedDockerImages = [infraContainer]; + + systemd.services.kubelet-bootstrap = { + description = "Boostrap Kubelet"; + wantedBy = ["kubernetes.target"]; + after = ["docker.service" "network.target"]; + path = with pkgs; [ docker ]; + script = '' + ${concatMapStrings (img: '' + echo "Seeding docker image: ${img}" + docker load <${img} + '') cfg.seedDockerImages} + + rm /opt/cni/bin/* || true + ${concatMapStrings (package: '' + echo "Linking cni package: ${package}" + ln -fs ${package}/bin/* /opt/cni/bin + '') cfg.cni.packages} + ''; + serviceConfig = { + Slice = "kubernetes.slice"; + Type = "oneshot"; + }; + }; + + systemd.services.kubelet = { + description = "Kubernetes Kubelet Service"; + wantedBy = [ "kubernetes.target" ]; + after = [ "network.target" "docker.service" "kube-apiserver.service" "kubelet-bootstrap.service" ]; + path = with pkgs; [ gitMinimal openssh docker utillinux iproute ethtool thin-provisioning-tools iptables socat ] ++ top.path; + serviceConfig = { + Slice = "kubernetes.slice"; + CPUAccounting = true; + MemoryAccounting = true; + ExecStart = ''${top.package}/bin/kubelet \ + --address=${cfg.address} \ + --allow-privileged=${boolToString cfg.allowPrivileged} \ + --authentication-token-webhook \ + --authentication-token-webhook-cache-ttl="10s" \ + --authorization-mode=Webhook \ + ${optionalString (cfg.clientCaFile != null) + "--client-ca-file=${cfg.clientCaFile}"} \ + ${optionalString (cfg.clusterDns != "") + "--cluster-dns=${cfg.clusterDns}"} \ + ${optionalString (cfg.clusterDomain != "") + "--cluster-domain=${cfg.clusterDomain}"} \ + --cni-conf-dir=${cniConfig} \ + ${optionalString (cfg.featureGates != []) + "--feature-gates=${concatMapStringsSep "," (feature: "${feature}=true") cfg.featureGates}"} \ + --hairpin-mode=hairpin-veth \ + --healthz-bind-address=${cfg.healthz.bind} \ + --healthz-port=${toString cfg.healthz.port} \ + --hostname-override=${cfg.hostname} \ + --kubeconfig=${kubeconfig} \ + ${optionalString (cfg.networkPlugin != null) + "--network-plugin=${cfg.networkPlugin}"} \ + ${optionalString (cfg.nodeIp != null) + "--node-ip=${cfg.nodeIp}"} \ + --pod-infra-container-image=pause \ + ${optionalString (cfg.manifests != {}) + "--pod-manifest-path=/etc/${manifestPath}"} \ + --port=${toString cfg.port} \ + --register-node=${boolToString cfg.registerNode} \ + ${optionalString (taints != "") + "--register-with-taints=${taints}"} \ + --root-dir=${top.dataDir} \ + ${optionalString (cfg.tlsCertFile != null) + "--tls-cert-file=${cfg.tlsCertFile}"} \ + ${optionalString (cfg.tlsKeyFile != null) + "--tls-private-key-file=${cfg.tlsKeyFile}"} \ + ${optionalString (cfg.verbosity != null) "--v=${toString cfg.verbosity}"} \ + ${cfg.extraOpts} + ''; + WorkingDirectory = top.dataDir; + }; + }; + + # Allways include cni plugins + services.kubernetes.kubelet.cni.packages = [pkgs.cni-plugins]; + + boot.kernelModules = ["br_netfilter"]; + + services.kubernetes.kubelet.hostname = with config.networking; + mkDefault (hostName + optionalString (!isNull domain) ".${domain}"); + + services.kubernetes.pki.certs = with top.lib; { + kubelet = mkCert { + name = "kubelet"; + CN = top.kubelet.hostname; + action = "systemctl restart kubelet.service"; + + }; + kubeletClient = mkCert { + name = "kubelet-client"; + CN = "system:node:${top.kubelet.hostname}"; + fields = { + O = "system:nodes"; + }; + action = "systemctl restart kubelet.service"; + }; + }; + + services.kubernetes.kubelet.kubeconfig.server = mkDefault top.apiserverAddress; + }) + + (mkIf (cfg.enable && cfg.manifests != {}) { + environment.etc = mapAttrs' (name: manifest: + nameValuePair "${manifestPath}/${name}.json" { + text = builtins.toJSON manifest; + mode = "0755"; + } + ) cfg.manifests; + }) + + (mkIf (cfg.unschedulable && cfg.enable) { + services.kubernetes.kubelet.taints.unschedulable = { + value = "true"; + effect = "NoSchedule"; + }; + }) + + ]; +} diff --git a/nixos/modules/services/cluster/kubernetes/pki.nix b/nixos/modules/services/cluster/kubernetes/pki.nix new file mode 100644 index 000000000000..e560bb9b29a0 --- /dev/null +++ b/nixos/modules/services/cluster/kubernetes/pki.nix @@ -0,0 +1,374 @@ +{ config, lib, pkgs, ... }: + +with lib; + +let + top = config.services.kubernetes; + cfg = top.pki; + + csrCA = pkgs.writeText "kube-pki-cacert-csr.json" (builtins.toJSON { + key = { + algo = "rsa"; + size = 2048; + }; + names = singleton cfg.caSpec; + }); + + csrCfssl = pkgs.writeText "kube-pki-cfssl-csr.json" (builtins.toJSON { + key = { + algo = "rsa"; + size = 2048; + }; + CN = top.masterAddress; + }); + + cfsslAPITokenBaseName = "apitoken.secret"; + cfsslAPITokenPath = "${config.services.cfssl.dataDir}/${cfsslAPITokenBaseName}"; + certmgrAPITokenPath = "${top.secretsPath}/${cfsslAPITokenBaseName}"; + cfsslAPITokenLength = 32; + + clusterAdminKubeconfig = with cfg.certs.clusterAdmin; + top.lib.mkKubeConfig "cluster-admin" { + server = top.apiserverAddress; + certFile = cert; + keyFile = key; + }; + + remote = with config.services; "https://${kubernetes.masterAddress}:${toString cfssl.port}"; +in +{ + ###### interface + options.services.kubernetes.pki = with lib.types; { + + enable = mkEnableOption "Whether to enable easyCert issuer service."; + + certs = mkOption { + description = "List of certificate specs to feed to cert generator."; + default = {}; + type = attrs; + }; + + genCfsslCACert = mkOption { + description = '' + Whether to automatically generate cfssl CA certificate and key, + if they don't exist. + ''; + default = true; + type = bool; + }; + + genCfsslAPICerts = mkOption { + description = '' + Whether to automatically generate cfssl API webserver TLS cert and key, + if they don't exist. + ''; + default = true; + type = bool; + }; + + genCfsslAPIToken = mkOption { + description = '' + Whether to automatically generate cfssl API-token secret, + if they doesn't exist. + ''; + default = true; + type = bool; + }; + + pkiTrustOnBootstrap = mkOption { + description = "Whether to always trust remote cfssl server upon initial PKI bootstrap."; + default = true; + type = bool; + }; + + caCertPathPrefix = mkOption { + description = '' + Path-prefrix for the CA-certificate to be used for cfssl signing. + Suffixes ".pem" and "-key.pem" will be automatically appended for + the public and private keys respectively. + ''; + default = "${config.services.cfssl.dataDir}/ca"; + type = str; + }; + + caSpec = mkOption { + description = "Certificate specification for the auto-generated CAcert."; + default = { + CN = "kubernetes-cluster-ca"; + O = "NixOS"; + OU = "services.kubernetes.pki.caSpec"; + L = "auto-generated"; + }; + type = attrs; + }; + + etcClusterAdminKubeconfig = mkOption { + description = '' + Symlink a kubeconfig with cluster-admin privileges to environment path + (/etc/<path>). + ''; + default = null; + type = nullOr str; + }; + + }; + + ###### implementation + config = mkIf cfg.enable + (let + cfsslCertPathPrefix = "${config.services.cfssl.dataDir}/cfssl"; + cfsslCert = "${cfsslCertPathPrefix}.pem"; + cfsslKey = "${cfsslCertPathPrefix}-key.pem"; + in + { + + services.cfssl = mkIf (top.apiserver.enable) { + enable = true; + address = "0.0.0.0"; + tlsCert = cfsslCert; + tlsKey = cfsslKey; + configFile = toString (pkgs.writeText "cfssl-config.json" (builtins.toJSON { + signing = { + profiles = { + default = { + usages = ["digital signature"]; + auth_key = "default"; + expiry = "720h"; + }; + }; + }; + auth_keys = { + default = { + type = "standard"; + key = "file:${cfsslAPITokenPath}"; + }; + }; + })); + }; + + systemd.services.cfssl.preStart = with pkgs; with config.services.cfssl; mkIf (top.apiserver.enable) + (concatStringsSep "\n" [ + "set -e" + (optionalString cfg.genCfsslCACert '' + if [ ! -f "${cfg.caCertPathPrefix}.pem" ]; then + ${cfssl}/bin/cfssl genkey -initca ${csrCA} | \ + ${cfssl}/bin/cfssljson -bare ${cfg.caCertPathPrefix} + fi + '') + (optionalString cfg.genCfsslAPICerts '' + if [ ! -f "${dataDir}/cfssl.pem" ]; then + ${cfssl}/bin/cfssl gencert -ca "${cfg.caCertPathPrefix}.pem" -ca-key "${cfg.caCertPathPrefix}-key.pem" ${csrCfssl} | \ + ${cfssl}/bin/cfssljson -bare ${cfsslCertPathPrefix} + fi + '') + (optionalString cfg.genCfsslAPIToken '' + if [ ! -f "${cfsslAPITokenPath}" ]; then + head -c ${toString (cfsslAPITokenLength / 2)} /dev/urandom | od -An -t x | tr -d ' ' >"${cfsslAPITokenPath}" + fi + chown cfssl "${cfsslAPITokenPath}" && chmod 400 "${cfsslAPITokenPath}" + '')]); + + systemd.services.kube-certmgr-bootstrap = { + description = "Kubernetes certmgr bootstrapper"; + wantedBy = [ "certmgr.service" ]; + after = [ "cfssl.target" ]; + script = concatStringsSep "\n" ['' + set -e + + # If there's a cfssl (cert issuer) running locally, then don't rely on user to + # manually paste it in place. Just symlink. + # otherwise, create the target file, ready for users to insert the token + + if [ -f "${cfsslAPITokenPath}" ]; then + ln -fs "${cfsslAPITokenPath}" "${certmgrAPITokenPath}" + else + touch "${certmgrAPITokenPath}" && chmod 600 "${certmgrAPITokenPath}" + fi + '' + (optionalString (cfg.pkiTrustOnBootstrap) '' + if [ ! -f "${top.caFile}" ] || [ $(cat "${top.caFile}" | wc -c) -lt 1 ]; then + ${pkgs.curl}/bin/curl --fail-early -f -kd '{}' ${remote}/api/v1/cfssl/info | \ + ${pkgs.cfssl}/bin/cfssljson -stdout >${top.caFile} + fi + '') + ]; + serviceConfig = { + RestartSec = "10s"; + Restart = "on-failure"; + }; + }; + + services.certmgr = { + enable = true; + svcManager = "command"; + specs = + let + mkSpec = _: cert: { + inherit (cert) action; + authority = { + inherit remote; + file.path = cert.caCert; + root_ca = cert.caCert; + profile = "default"; + auth_key_file = certmgrAPITokenPath; + }; + certificate = { + path = cert.cert; + }; + private_key = cert.privateKeyOptions; + request = { + inherit (cert) CN hosts; + key = { + algo = "rsa"; + size = 2048; + }; + names = [ cert.fields ]; + }; + }; + in + mapAttrs mkSpec cfg.certs; + }; + + #TODO: Get rid of kube-addon-manager in the future for the following reasons + # - it is basically just a shell script wrapped around kubectl + # - it assumes that it is clusterAdmin or can gain clusterAdmin rights through serviceAccount + # - it is designed to be used with k8s system components only + # - it would be better with a more Nix-oriented way of managing addons + systemd.services.kube-addon-manager = mkIf top.addonManager.enable (mkMerge [{ + environment.KUBECONFIG = with cfg.certs.addonManager; + top.lib.mkKubeConfig "addon-manager" { + server = top.apiserverAddress; + certFile = cert; + keyFile = key; + }; + } + + (optionalAttrs (top.addonManager.bootstrapAddons != {}) { + serviceConfig.PermissionsStartOnly = true; + preStart = with pkgs; + let + files = mapAttrsToList (n: v: writeText "${n}.json" (builtins.toJSON v)) + top.addonManager.bootstrapAddons; + in + '' + export KUBECONFIG=${clusterAdminKubeconfig} + ${kubectl}/bin/kubectl apply -f ${concatStringsSep " \\\n -f " files} + ''; + })]); + + environment.etc.${cfg.etcClusterAdminKubeconfig}.source = mkIf (!isNull cfg.etcClusterAdminKubeconfig) + clusterAdminKubeconfig; + + environment.systemPackages = mkIf (top.kubelet.enable || top.proxy.enable) [ + (pkgs.writeScriptBin "nixos-kubernetes-node-join" '' + set -e + exec 1>&2 + + if [ $# -gt 0 ]; then + echo "Usage: $(basename $0)" + echo "" + echo "No args. Apitoken must be provided on stdin." + echo "To get the apitoken, execute: 'sudo cat ${certmgrAPITokenPath}' on the master node." + exit 1 + fi + + if [ $(id -u) != 0 ]; then + echo "Run as root please." + exit 1 + fi + + read -r token + if [ ''${#token} != ${toString cfsslAPITokenLength} ]; then + echo "Token must be of length ${toString cfsslAPITokenLength}." + exit 1 + fi + + echo $token > ${certmgrAPITokenPath} + chmod 600 ${certmgrAPITokenPath} + + echo "Restarting certmgr..." >&1 + systemctl restart certmgr + + echo "Waiting for certs to appear..." >&1 + + ${optionalString top.kubelet.enable '' + while [ ! -f ${cfg.certs.kubelet.cert} ]; do sleep 1; done + echo "Restarting kubelet..." >&1 + systemctl restart kubelet + ''} + + ${optionalString top.proxy.enable '' + while [ ! -f ${cfg.certs.kubeProxyClient.cert} ]; do sleep 1; done + echo "Restarting kube-proxy..." >&1 + systemctl restart kube-proxy + ''} + + ${optionalString top.flannel.enable '' + while [ ! -f ${cfg.certs.flannelEtcdClient.cert} ]; do sleep 1; done + echo "Restarting flannel..." >&1 + systemctl restart flannel + ''} + + echo "Node joined succesfully" + '')]; + + services.etcd = with cfg.certs.etcd; { + certFile = mkDefault cert; + keyFile = mkDefault key; + trustedCaFile = mkDefault caCert; + }; + + services.flannel.etcd = with cfg.certs.flannelEtcdClient; { + certFile = mkDefault cert; + keyFile = mkDefault key; + caFile = mkDefault caCert; + }; + + services.kubernetes = { + + apiserver = mkIf top.apiserver.enable (with cfg.certs.apiServer; { + etcd = with cfg.certs.apiserverEtcdClient; { + certFile = mkDefault cert; + keyFile = mkDefault key; + caFile = mkDefault caCert; + }; + clientCaFile = mkDefault caCert; + tlsCertFile = mkDefault cert; + tlsKeyFile = mkDefault key; + serviceAccountKeyFile = mkDefault cfg.certs.serviceAccount.cert; + kubeletClientCaFile = mkDefault caCert; + kubeletClientCertFile = mkDefault cfg.certs.apiserverKubeletClient.cert; + kubeletClientKeyFile = mkDefault cfg.certs.apiserverKubeletClient.key; + }); + controllerManager = mkIf top.controllerManager.enable { + serviceAccountKeyFile = mkDefault cfg.certs.serviceAccount.key; + rootCaFile = cfg.certs.controllerManagerClient.caCert; + kubeconfig = with cfg.certs.controllerManagerClient; { + certFile = mkDefault cert; + keyFile = mkDefault key; + }; + }; + scheduler = mkIf top.scheduler.enable { + kubeconfig = with cfg.certs.schedulerClient; { + certFile = mkDefault cert; + keyFile = mkDefault key; + }; + }; + kubelet = mkIf top.kubelet.enable { + clientCaFile = mkDefault cfg.certs.kubelet.caCert; + tlsCertFile = mkDefault cfg.certs.kubelet.cert; + tlsKeyFile = mkDefault cfg.certs.kubelet.key; + kubeconfig = with cfg.certs.kubeletClient; { + certFile = mkDefault cert; + keyFile = mkDefault key; + }; + }; + proxy = mkIf top.proxy.enable { + kubeconfig = with cfg.certs.kubeProxyClient; { + certFile = mkDefault cert; + keyFile = mkDefault key; + }; + }; + }; + }); +} diff --git a/nixos/modules/services/cluster/kubernetes/proxy.nix b/nixos/modules/services/cluster/kubernetes/proxy.nix new file mode 100644 index 000000000000..6bcf2eaca82a --- /dev/null +++ b/nixos/modules/services/cluster/kubernetes/proxy.nix @@ -0,0 +1,80 @@ +{ config, lib, pkgs, ... }: + +with lib; + +let + top = config.services.kubernetes; + cfg = top.proxy; +in +{ + + ###### interface + options.services.kubernetes.proxy = with lib.types; { + + bindAddress = mkOption { + description = "Kubernetes proxy listening address."; + default = "0.0.0.0"; + type = str; + }; + + enable = mkEnableOption "Whether to enable Kubernetes proxy."; + + extraOpts = mkOption { + description = "Kubernetes proxy extra command line options."; + default = ""; + type = str; + }; + + featureGates = mkOption { + description = "List set of feature gates"; + default = top.featureGates; + type = listOf str; + }; + + kubeconfig = top.lib.mkKubeConfigOptions "Kubernetes proxy"; + + verbosity = mkOption { + description = '' + Optional glog verbosity level for logging statements. See + + ''; + default = null; + type = nullOr int; + }; + + }; + + ###### implementation + config = mkIf cfg.enable { + systemd.services.kube-proxy = { + description = "Kubernetes Proxy Service"; + wantedBy = [ "kubernetes.target" ]; + after = [ "kube-apiserver.service" ]; + path = with pkgs; [ iptables conntrack_tools ]; + serviceConfig = { + Slice = "kubernetes.slice"; + ExecStart = ''${top.package}/bin/kube-proxy \ + --bind-address=${cfg.bindAddress} \ + ${optionalString (top.clusterCidr!=null) + "--cluster-cidr=${top.clusterCidr}"} \ + ${optionalString (cfg.featureGates != []) + "--feature-gates=${concatMapStringsSep "," (feature: "${feature}=true") cfg.featureGates}"} \ + --kubeconfig=${top.lib.mkKubeConfig "kube-proxy" cfg.kubeconfig} \ + ${optionalString (cfg.verbosity != null) "--v=${toString cfg.verbosity}"} \ + ${cfg.extraOpts} + ''; + WorkingDirectory = top.dataDir; + }; + }; + + services.kubernetes.pki.certs = { + kubeProxyClient = top.lib.mkCert { + name = "kube-proxy-client"; + CN = "system:kube-proxy"; + action = "systemctl restart kube-proxy.service"; + }; + }; + + services.kubernetes.proxy.kubeconfig.server = mkDefault top.apiserverAddress; + }; +} diff --git a/nixos/modules/services/cluster/kubernetes/scheduler.nix b/nixos/modules/services/cluster/kubernetes/scheduler.nix new file mode 100644 index 000000000000..655e6f8b6e20 --- /dev/null +++ b/nixos/modules/services/cluster/kubernetes/scheduler.nix @@ -0,0 +1,92 @@ +{ config, lib, pkgs, ... }: + +with lib; + +let + top = config.services.kubernetes; + cfg = top.scheduler; +in +{ + ###### interface + options.services.kubernetes.scheduler = with lib.types; { + + address = mkOption { + description = "Kubernetes scheduler listening address."; + default = "127.0.0.1"; + type = str; + }; + + enable = mkEnableOption "Whether to enable Kubernetes scheduler."; + + extraOpts = mkOption { + description = "Kubernetes scheduler extra command line options."; + default = ""; + type = str; + }; + + featureGates = mkOption { + description = "List set of feature gates"; + default = top.featureGates; + type = listOf str; + }; + + kubeconfig = top.lib.mkKubeConfigOptions "Kubernetes scheduler"; + + leaderElect = mkOption { + description = "Whether to start leader election before executing main loop."; + type = bool; + default = true; + }; + + port = mkOption { + description = "Kubernetes scheduler listening port."; + default = 10251; + type = int; + }; + + verbosity = mkOption { + description = '' + Optional glog verbosity level for logging statements. See + + ''; + default = null; + type = nullOr int; + }; + + }; + + ###### implementation + config = mkIf cfg.enable { + systemd.services.kube-scheduler = { + description = "Kubernetes Scheduler Service"; + wantedBy = [ "kubernetes.target" ]; + after = [ "kube-apiserver.service" ]; + serviceConfig = { + Slice = "kubernetes.slice"; + ExecStart = ''${top.package}/bin/kube-scheduler \ + --address=${cfg.address} \ + ${optionalString (cfg.featureGates != []) + "--feature-gates=${concatMapStringsSep "," (feature: "${feature}=true") cfg.featureGates}"} \ + --kubeconfig=${top.lib.mkKubeConfig "kube-scheduler" cfg.kubeconfig} \ + --leader-elect=${boolToString cfg.leaderElect} \ + --port=${toString cfg.port} \ + ${optionalString (cfg.verbosity != null) "--v=${toString cfg.verbosity}"} \ + ${cfg.extraOpts} + ''; + WorkingDirectory = top.dataDir; + User = "kubernetes"; + Group = "kubernetes"; + }; + }; + + services.kubernetes.pki.certs = { + schedulerClient = top.lib.mkCert { + name = "kube-scheduler-client"; + CN = "system:kube-scheduler"; + action = "systemctl restart kube-scheduler.service"; + }; + }; + + services.kubernetes.scheduler.kubeconfig.server = mkDefault top.apiserverAddress; + }; +} diff --git a/nixos/tests/kubernetes/base.nix b/nixos/tests/kubernetes/base.nix index 9d77be131751..3529f35f60e6 100644 --- a/nixos/tests/kubernetes/base.nix +++ b/nixos/tests/kubernetes/base.nix @@ -10,7 +10,6 @@ let mkKubernetesBaseTest = { name, domain ? "my.zyx", test, machines , pkgs ? import { inherit system; } - , certs ? import ./certs.nix { inherit pkgs; externalDomain = domain; kubelets = attrNames machines; } , extraConfiguration ? null }: let masterName = head (filter (machineName: any (role: role == "master") machines.${machineName}.roles) (attrNames machines)); @@ -20,6 +19,10 @@ let ${master.ip} api.${domain} ${concatMapStringsSep "\n" (machineName: "${machines.${machineName}.ip} ${machineName}.${domain}") (attrNames machines)} ''; + kubectl = with pkgs; runCommand "wrap-kubectl" { buildInputs = [ makeWrapper ]; } '' + mkdir -p $out/bin + makeWrapper ${pkgs.kubernetes}/bin/kubectl $out/bin/kubectl --set KUBECONFIG "/etc/kubernetes/cluster-admin.kubeconfig" + ''; in makeTest { inherit name; @@ -27,6 +30,7 @@ let { config, pkgs, lib, nodes, ... }: mkMerge [ { + boot.postBootCommands = "rm -fr /var/lib/kubernetes/secrets /tmp/shared/*"; virtualisation.memorySize = mkDefault 1536; virtualisation.diskSize = mkDefault 4096; networking = { @@ -45,34 +49,26 @@ let }; }; programs.bash.enableCompletion = true; - environment.variables = { - ETCDCTL_CERT_FILE = "${certs.worker}/etcd-client.pem"; - ETCDCTL_KEY_FILE = "${certs.worker}/etcd-client-key.pem"; - ETCDCTL_CA_FILE = "${certs.worker}/ca.pem"; - ETCDCTL_PEERS = "https://etcd.${domain}:2379"; - }; + environment.systemPackages = [ kubectl ]; services.flannel.iface = "eth1"; - services.kubernetes.apiserver.advertiseAddress = master.ip; + services.kubernetes = { + addons.dashboard.enable = true; + + easyCerts = true; + inherit (machine) roles; + apiserver = { + securePort = 443; + advertiseAddress = master.ip; + }; + masterAddress = "${masterName}.${config.networking.domain}"; + }; } (optionalAttrs (any (role: role == "master") machine.roles) { networking.firewall.allowedTCPPorts = [ 2379 2380 # etcd 443 # kubernetes apiserver ]; - services.etcd = { - enable = true; - certFile = "${certs.master}/etcd.pem"; - keyFile = "${certs.master}/etcd-key.pem"; - trustedCaFile = "${certs.master}/ca.pem"; - peerClientCertAuth = true; - listenClientUrls = ["https://0.0.0.0:2379"]; - listenPeerUrls = ["https://0.0.0.0:2380"]; - advertiseClientUrls = ["https://etcd.${config.networking.domain}:2379"]; - initialCluster = ["${masterName}=https://etcd.${config.networking.domain}:2380"]; - initialAdvertisePeerUrls = ["https://etcd.${config.networking.domain}:2380"]; - }; }) - (import ./kubernetes-common.nix { inherit (machine) roles; inherit pkgs config certs; }) (optionalAttrs (machine ? "extraConfiguration") (machine.extraConfiguration { inherit config pkgs lib nodes; })) (optionalAttrs (extraConfiguration != null) (extraConfiguration { inherit config pkgs lib nodes; })) ] diff --git a/nixos/tests/kubernetes/certs.nix b/nixos/tests/kubernetes/certs.nix deleted file mode 100644 index 85e92f6330c9..000000000000 --- a/nixos/tests/kubernetes/certs.nix +++ /dev/null @@ -1,219 +0,0 @@ -{ - pkgs ? import {}, - externalDomain ? "myawesomecluster.cluster.yourdomain.net", - serviceClusterIp ? "10.0.0.1", - kubelets, - ... -}: -let - runWithCFSSL = name: cmd: - let secrets = pkgs.runCommand "${name}-cfss.json" { - buildInputs = [ pkgs.cfssl pkgs.jq ]; - outputs = [ "out" "cert" "key" "csr" ]; - } - '' - ( - echo "${cmd}" - cfssl ${cmd} > tmp - cat tmp | jq -r .key > $key - cat tmp | jq -r .cert > $cert - cat tmp | jq -r .csr > $csr - - touch $out - ) 2>&1 | fold -w 80 -s - ''; - in { - key = secrets.key; - cert = secrets.cert; - csr = secrets.csr; - }; - - writeCFSSL = content: - pkgs.runCommand content.name { - buildInputs = [ pkgs.cfssl pkgs.jq ]; - } '' - mkdir -p $out - cd $out - - json=${pkgs.lib.escapeShellArg (builtins.toJSON content)} - - # for a given $field in the $json, treat the associated value as a - # file path and substitute the contents thereof into the $json - # object. - expandFileField() { - local field=$1 - if jq -e --arg field "$field" 'has($field)'; then - local path="$(echo "$json" | jq -r ".$field")" - json="$(echo "$json" | jq --arg val "$(cat "$path")" ".$field = \$val")" - fi - } - - expandFileField key - expandFileField ca - expandFileField cert - - echo "$json" | cfssljson -bare ${content.name} - ''; - - noCSR = content: pkgs.lib.filterAttrs (n: v: n != "csr") content; - noKey = content: pkgs.lib.filterAttrs (n: v: n != "key") content; - - writeFile = content: - if pkgs.lib.isDerivation content - then content - else pkgs.writeText "content" (builtins.toJSON content); - - createServingCertKey = { ca, cn, hosts? [], size ? 2048, name ? cn }: - noCSR ( - (runWithCFSSL name "gencert -ca=${writeFile ca.cert} -ca-key=${writeFile ca.key} -profile=server -config=${writeFile ca.config} ${writeFile { - CN = cn; - hosts = hosts; - key = { algo = "rsa"; inherit size; }; - }}") // { inherit name; } - ); - - createClientCertKey = { ca, cn, groups ? [], size ? 2048, name ? cn }: - noCSR ( - (runWithCFSSL name "gencert -ca=${writeFile ca.cert} -ca-key=${writeFile ca.key} -profile=client -config=${writeFile ca.config} ${writeFile { - CN = cn; - names = map (group: {O = group;}) groups; - hosts = [""]; - key = { algo = "rsa"; inherit size; }; - }}") // { inherit name; } - ); - - createSigningCertKey = { C ? "xx", ST ? "x", L ? "x", O ? "x", OU ? "x", CN ? "ca", emailAddress ? "x", expiry ? "43800h", size ? 2048, name ? CN }: - (noCSR (runWithCFSSL CN "genkey -initca ${writeFile { - key = { algo = "rsa"; inherit size; }; - names = [{ inherit C ST L O OU CN emailAddress; }]; - }}")) // { - inherit name; - config.signing = { - default.expiry = expiry; - profiles = { - server = { - inherit expiry; - usages = [ - "signing" - "key encipherment" - "server auth" - ]; - }; - client = { - inherit expiry; - usages = [ - "signing" - "key encipherment" - "client auth" - ]; - }; - peer = { - inherit expiry; - usages = [ - "signing" - "key encipherment" - "server auth" - "client auth" - ]; - }; - }; - }; - }; - - ca = createSigningCertKey {}; - - kube-apiserver = createServingCertKey { - inherit ca; - cn = "kube-apiserver"; - hosts = ["kubernetes.default" "kubernetes.default.svc" "localhost" "api.${externalDomain}" serviceClusterIp]; - }; - - kubelet = createServingCertKey { - inherit ca; - cn = "kubelet"; - hosts = ["*.${externalDomain}"]; - }; - - service-accounts = createServingCertKey { - inherit ca; - cn = "kube-service-accounts"; - }; - - etcd = createServingCertKey { - inherit ca; - cn = "etcd"; - hosts = ["etcd.${externalDomain}"]; - }; - - etcd-client = createClientCertKey { - inherit ca; - cn = "etcd-client"; - }; - - kubelet-client = createClientCertKey { - inherit ca; - cn = "kubelet-client"; - groups = ["system:masters"]; - }; - - apiserver-client = { - kubelet = hostname: createClientCertKey { - inherit ca; - name = "apiserver-client-kubelet-${hostname}"; - cn = "system:node:${hostname}.${externalDomain}"; - groups = ["system:nodes"]; - }; - - kube-proxy = createClientCertKey { - inherit ca; - name = "apiserver-client-kube-proxy"; - cn = "system:kube-proxy"; - groups = ["system:kube-proxy" "system:nodes"]; - }; - - kube-controller-manager = createClientCertKey { - inherit ca; - name = "apiserver-client-kube-controller-manager"; - cn = "system:kube-controller-manager"; - groups = ["system:masters"]; - }; - - kube-scheduler = createClientCertKey { - inherit ca; - name = "apiserver-client-kube-scheduler"; - cn = "system:kube-scheduler"; - groups = ["system:kube-scheduler"]; - }; - - admin = createClientCertKey { - inherit ca; - cn = "admin"; - groups = ["system:masters"]; - }; - }; -in { - master = pkgs.buildEnv { - name = "master-keys"; - paths = [ - (writeCFSSL (noKey ca)) - (writeCFSSL kube-apiserver) - (writeCFSSL kubelet-client) - (writeCFSSL apiserver-client.kube-controller-manager) - (writeCFSSL apiserver-client.kube-scheduler) - (writeCFSSL service-accounts) - (writeCFSSL etcd) - ]; - }; - - worker = pkgs.buildEnv { - name = "worker-keys"; - paths = [ - (writeCFSSL (noKey ca)) - (writeCFSSL kubelet) - (writeCFSSL apiserver-client.kube-proxy) - (writeCFSSL etcd-client) - ] ++ map (hostname: writeCFSSL (apiserver-client.kubelet hostname)) kubelets; - }; - - admin = writeCFSSL apiserver-client.admin; -} diff --git a/nixos/tests/kubernetes/dns.nix b/nixos/tests/kubernetes/dns.nix index f25ea5b9ed84..42eafcfc1956 100644 --- a/nixos/tests/kubernetes/dns.nix +++ b/nixos/tests/kubernetes/dns.nix @@ -71,7 +71,7 @@ let base = { name = "dns"; - inherit domain certs extraConfiguration; + inherit domain extraConfiguration; }; singleNodeTest = { @@ -99,8 +99,11 @@ let multiNodeTest = { test = '' + # Node token exchange + $machine1->waitUntilSucceeds("cp -f /var/lib/cfssl/apitoken.secret /tmp/shared/apitoken.secret"); + $machine2->waitUntilSucceeds("cat /tmp/shared/apitoken.secret | nixos-kubernetes-node-join"); + # prepare machines for test - $machine1->waitUntilSucceeds("kubectl get node machine1.${domain} | grep -w Ready"); $machine1->waitUntilSucceeds("kubectl get node machine2.${domain} | grep -w Ready"); $machine2->execute("docker load < ${redisImage}"); $machine1->waitUntilSucceeds("kubectl create -f ${redisPod}"); diff --git a/nixos/tests/kubernetes/kubernetes-common.nix b/nixos/tests/kubernetes/kubernetes-common.nix deleted file mode 100644 index 87c65b883659..000000000000 --- a/nixos/tests/kubernetes/kubernetes-common.nix +++ /dev/null @@ -1,57 +0,0 @@ -{ roles, config, pkgs, certs }: -with pkgs.lib; -let - base = { - inherit roles; - flannel.enable = true; - addons.dashboard.enable = true; - - caFile = "${certs.master}/ca.pem"; - apiserver = { - tlsCertFile = "${certs.master}/kube-apiserver.pem"; - tlsKeyFile = "${certs.master}/kube-apiserver-key.pem"; - kubeletClientCertFile = "${certs.master}/kubelet-client.pem"; - kubeletClientKeyFile = "${certs.master}/kubelet-client-key.pem"; - serviceAccountKeyFile = "${certs.master}/kube-service-accounts.pem"; - }; - etcd = { - servers = ["https://etcd.${config.networking.domain}:2379"]; - certFile = "${certs.worker}/etcd-client.pem"; - keyFile = "${certs.worker}/etcd-client-key.pem"; - }; - kubeconfig = { - server = "https://api.${config.networking.domain}"; - }; - kubelet = { - tlsCertFile = "${certs.worker}/kubelet.pem"; - tlsKeyFile = "${certs.worker}/kubelet-key.pem"; - hostname = "${config.networking.hostName}.${config.networking.domain}"; - kubeconfig = { - certFile = "${certs.worker}/apiserver-client-kubelet-${config.networking.hostName}.pem"; - keyFile = "${certs.worker}/apiserver-client-kubelet-${config.networking.hostName}-key.pem"; - }; - }; - controllerManager = { - serviceAccountKeyFile = "${certs.master}/kube-service-accounts-key.pem"; - kubeconfig = { - certFile = "${certs.master}/apiserver-client-kube-controller-manager.pem"; - keyFile = "${certs.master}/apiserver-client-kube-controller-manager-key.pem"; - }; - }; - scheduler = { - kubeconfig = { - certFile = "${certs.master}/apiserver-client-kube-scheduler.pem"; - keyFile = "${certs.master}/apiserver-client-kube-scheduler-key.pem"; - }; - }; - proxy = { - kubeconfig = { - certFile = "${certs.worker}/apiserver-client-kube-proxy.pem"; - keyFile = "${certs.worker}//apiserver-client-kube-proxy-key.pem"; - }; - }; - }; - -in { - services.kubernetes = base; -} diff --git a/nixos/tests/kubernetes/rbac.nix b/nixos/tests/kubernetes/rbac.nix index 226808c4b263..91f97bed6818 100644 --- a/nixos/tests/kubernetes/rbac.nix +++ b/nixos/tests/kubernetes/rbac.nix @@ -105,7 +105,7 @@ let $machine1->waitUntilSucceeds("kubectl get pod kubectl | grep Running"); - $machine1->succeed("kubectl exec -ti kubectl -- kubectl get pods"); + $machine1->waitUntilSucceeds("kubectl exec -ti kubectl -- kubectl get pods"); $machine1->fail("kubectl exec -ti kubectl -- kubectl create -f /kubectl-pod-2.json"); $machine1->fail("kubectl exec -ti kubectl -- kubectl delete pods -l name=kubectl"); ''; @@ -113,7 +113,10 @@ let multinode = base // { test = '' - $machine1->waitUntilSucceeds("kubectl get node machine1.my.zyx | grep -w Ready"); + # Node token exchange + $machine1->waitUntilSucceeds("cp -f /var/lib/cfssl/apitoken.secret /tmp/shared/apitoken.secret"); + $machine2->waitUntilSucceeds("cat /tmp/shared/apitoken.secret | nixos-kubernetes-node-join"); + $machine1->waitUntilSucceeds("kubectl get node machine2.my.zyx | grep -w Ready"); $machine2->execute("docker load < ${kubectlImage}"); @@ -125,7 +128,7 @@ let $machine1->waitUntilSucceeds("kubectl get pod kubectl | grep Running"); - $machine1->succeed("kubectl exec -ti kubectl -- kubectl get pods"); + $machine1->waitUntilSucceeds("kubectl exec -ti kubectl -- kubectl get pods"); $machine1->fail("kubectl exec -ti kubectl -- kubectl create -f /kubectl-pod-2.json"); $machine1->fail("kubectl exec -ti kubectl -- kubectl delete pods -l name=kubectl"); ''; From 8d62d7972f9ca3179619b0b7ddc7f2e45c57d000 Mon Sep 17 00:00:00 2001 From: Johan Thomsen Date: Mon, 17 Sep 2018 14:12:44 +0200 Subject: [PATCH 2/9] nixos/kubernetes: adding manual section for kubernetes and writing release note for NixOS 19.03 --- .../manual/configuration/configuration.xml | 1 + nixos/doc/manual/configuration/kubernetes.xml | 127 ++++++++++++++++++ nixos/doc/manual/release-notes/rl-1903.xml | 41 ++++++ 3 files changed, 169 insertions(+) create mode 100644 nixos/doc/manual/configuration/kubernetes.xml diff --git a/nixos/doc/manual/configuration/configuration.xml b/nixos/doc/manual/configuration/configuration.xml index cebc4122c6c6..138d1d86d7fc 100644 --- a/nixos/doc/manual/configuration/configuration.xml +++ b/nixos/doc/manual/configuration/configuration.xml @@ -23,5 +23,6 @@ + diff --git a/nixos/doc/manual/configuration/kubernetes.xml b/nixos/doc/manual/configuration/kubernetes.xml new file mode 100644 index 000000000000..ddc026c0c010 --- /dev/null +++ b/nixos/doc/manual/configuration/kubernetes.xml @@ -0,0 +1,127 @@ + + Kubernetes + + + The NixOS Kubernetes module is a collective term for a handful of + individual submodules implementing the Kubernetes cluster components. + + + + There are generally two ways of enabling Kubernetes on NixOS. + One way is to enable and configure cluster components appropriately by hand: + +services.kubernetes = { + apiserver.enable = true; + controllerManager.enable = true; + scheduler.enable = true; + addonManager.enable = true; + proxy.enable = true; + flannel.enable = true; +}; + + Another way is to assign cluster roles ("master" and/or "node") to the host. + This enables apiserver, controllerManager, scheduler, addonManager, + kube-proxy and etcd: + + = [ "master" ]; + + While this will enable the kubelet and kube-proxy only: + + = [ "node" ]; + + Assigning both the master and node roles is usable if you want a single + node Kubernetes cluster for dev or testing purposes: + + = [ "master" "node" ]; + + Note: Assigning either role will also default both + and + to true. + This sets up flannel as CNI and activates automatic PKI bootstrapping. + + + + As of kubernetes 1.10.X it has been deprecated to open + non-tls-enabled ports on kubernetes components. Thus, from NixOS 19.03 all + plain HTTP ports have been disabled by default. + While opening insecure ports is still possible, it is recommended not to + bind these to other interfaces than loopback. + + To re-enable the insecure port on the apiserver, see options: + + and + + + + + + As of NixOS 19.03, it is mandatory to configure: + . + The masterAddress must be resolveable and routeable by all cluster nodes. + In single node clusters, this can be set to localhost. + + + + + Role-based access control (RBAC) authorization mode is enabled by default. + This means that anonymous requests to the apiserver secure port will + expectedly cause a permission denied error. All cluster components must + therefore be configured with x509 certificates for two-way tls communication. + The x509 certificate subject section determines the roles and permissions + granted by the apiserver to perform clusterwide or namespaced operations. + See also: + + Using RBAC Authorization. + + + + The NixOS kubernetes module provides an option for automatic certificate + bootstrapping and configuration, + . + The PKI bootstrapping process involves setting up a certificate authority + (CA) daemon (cfssl) on the kubernetes master node. cfssl generates a CA-cert + for the cluster, and uses the CA-cert for signing subordinate certs issued to + each of the cluster components. Subsequently, the certmgr daemon monitors + active certificates and renews them when needed. For single node Kubernetes + clusters, setting = true + is sufficient and no further action is required. For joining extra node + machines to an existing cluster on the other hand, establishing initial trust + is mandatory. + + + + To add new nodes to the cluster: + On any (non-master) cluster node where + is enabled, the helper + script nixos-kubernetes-node-join is available on PATH. + Given a token on stdin, it will copy the token to the kubernetes + secrets directory and restart the certmgr service. As requested + certificates are issued, the script will restart kubernetes cluster + components as needed for them to pick up new keypairs. + + + + + Multi-master (HA) clusters are not supported by the easyCerts module. + + + + + In order to interact with an RBAC-enabled cluster as an administrator, one + needs to have cluster-admin privileges. By default, when easyCerts is + enabled, a cluster-admin kubeconfig file is generated and linked into + /etc/kubernetes/cluster-admin.kubeconfig as determined by + . + export KUBECONFIG=/etc/kubernetes/cluster-admin.kubeconfig + will make kubectl use this kubeconfig to access and authenticate the cluster. + The cluster-admin kubeconfig references an auto-generated keypair owned by + root. Thus, only root on the kubernetes master may obtain cluster-admin + rights by means of this file. + + + diff --git a/nixos/doc/manual/release-notes/rl-1903.xml b/nixos/doc/manual/release-notes/rl-1903.xml index 6f78983d4829..269f27f74fbd 100644 --- a/nixos/doc/manual/release-notes/rl-1903.xml +++ b/nixos/doc/manual/release-notes/rl-1903.xml @@ -54,6 +54,13 @@ to false and enable your preferred display manager. + + A major refactoring of the Kubernetes module has been completed. + Refactorings primarily focus on decoupling components and enhancing + security. Two-way TLS and RBAC has been enabled by default for all + components, which slightly changes the way the module is configured. + See: for details. + @@ -564,6 +571,40 @@ provisioning. + + + The use of insecure ports on kubernetes has been deprecated. + Thus options: + services.kubernetes.apiserver.port and + services.kubernetes.controllerManager.port + has been renamed to .insecurePort, + and default of both options has changed to 0 (disabled). + + + + + Note that the default value of + services.kubernetes.apiserver.bindAddress + has changed from 127.0.0.1 to 0.0.0.0, allowing the apiserver to be + accessible from outside the master node itself. + If the apiserver insecurePort is enabled, + it is strongly recommended to only bind on the loopback interface. See: + services.kubernetes.apiserver.insecurebindAddress. + + + + + The option services.kubernetes.apiserver.allowPrivileged + and services.kubernetes.kubelet.allowPrivileged now + defaults to false. Disallowing privileged containers on the cluster. + + + + + The kubernetes module does no longer add the kubernetes package to + environment.systemPackages implicitly. + + From 63347963707cd07d5c14aa16ad1971504f72a6e7 Mon Sep 17 00:00:00 2001 From: Johan Thomsen Date: Mon, 11 Feb 2019 10:02:36 +0100 Subject: [PATCH 3/9] nixos/kubernetes: use the certmgr-selfsigned variant --- nixos/modules/services/cluster/kubernetes/pki.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/nixos/modules/services/cluster/kubernetes/pki.nix b/nixos/modules/services/cluster/kubernetes/pki.nix index e560bb9b29a0..4587373d519e 100644 --- a/nixos/modules/services/cluster/kubernetes/pki.nix +++ b/nixos/modules/services/cluster/kubernetes/pki.nix @@ -200,6 +200,7 @@ in services.certmgr = { enable = true; + package = pkgs.certmgr-selfsigned; svcManager = "command"; specs = let From 1f49c2160a074b6cb36389a05fd3395cee432d64 Mon Sep 17 00:00:00 2001 From: Johan Thomsen Date: Mon, 11 Feb 2019 10:01:26 +0100 Subject: [PATCH 4/9] nixos/kubernetes: CoreDNS privileges has to be assigned by addon manager bootstrap - because the kube-addon-manager drops most of its privileges after startup. --- .../cluster/kubernetes/addons/dns.nix | 32 ++++++++++--------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/nixos/modules/services/cluster/kubernetes/addons/dns.nix b/nixos/modules/services/cluster/kubernetes/addons/dns.nix index ea3e0706163a..8f3234bfc706 100644 --- a/nixos/modules/services/cluster/kubernetes/addons/dns.nix +++ b/nixos/modules/services/cluster/kubernetes/addons/dns.nix @@ -54,21 +54,7 @@ in { services.kubernetes.kubelet.seedDockerImages = singleton (pkgs.dockerTools.pullImage cfg.coredns); - services.kubernetes.addonManager.addons = { - coredns-sa = { - apiVersion = "v1"; - kind = "ServiceAccount"; - metadata = { - labels = { - "addonmanager.kubernetes.io/mode" = "Reconcile"; - "k8s-app" = "kube-dns"; - "kubernetes.io/cluster-service" = "true"; - }; - name = "coredns"; - namespace = "kube-system"; - }; - }; - + services.kubernetes.addonManager.bootstrapAddons = { coredns-cr = { apiVersion = "rbac.authorization.k8s.io/v1beta1"; kind = "ClusterRole"; @@ -123,6 +109,22 @@ in { } ]; }; + }; + + services.kubernetes.addonManager.addons = { + coredns-sa = { + apiVersion = "v1"; + kind = "ServiceAccount"; + metadata = { + labels = { + "addonmanager.kubernetes.io/mode" = "Reconcile"; + "k8s-app" = "kube-dns"; + "kubernetes.io/cluster-service" = "true"; + }; + name = "coredns"; + namespace = "kube-system"; + }; + }; coredns-cm = { apiVersion = "v1"; From 466beb02143f99815eef90ef8a69c91cd898a998 Mon Sep 17 00:00:00 2001 From: Johan Thomsen Date: Tue, 12 Feb 2019 16:48:23 +0100 Subject: [PATCH 5/9] nixos/kubernetes: let flannel use kubernetes as storage backend + isolate etcd on the master node by letting it listen only on loopback + enabling kubelet on master and taint master with NoSchedule The reason for the latter is that flannel requires all nodes to be "registered" in the cluster in order to setup the cluster network. This means that the kubelet is needed even at nodes on which we don't plan to schedule anything. --- .../services/cluster/kubernetes/apiserver.nix | 1 + .../services/cluster/kubernetes/default.nix | 10 ++++ .../services/cluster/kubernetes/flannel.nix | 52 +++++++++++++++++-- .../services/cluster/kubernetes/pki.nix | 23 ++++++-- nixos/tests/kubernetes/base.nix | 1 - 5 files changed, 78 insertions(+), 9 deletions(-) diff --git a/nixos/modules/services/cluster/kubernetes/apiserver.nix b/nixos/modules/services/cluster/kubernetes/apiserver.nix index 465d74d83c8b..81e45b417de3 100644 --- a/nixos/modules/services/cluster/kubernetes/apiserver.nix +++ b/nixos/modules/services/cluster/kubernetes/apiserver.nix @@ -411,6 +411,7 @@ in name = "etcd"; CN = top.masterAddress; hosts = [ + "etcd.local" "etcd.${top.addons.dns.clusterDomain}" top.masterAddress cfg.advertiseAddress diff --git a/nixos/modules/services/cluster/kubernetes/default.nix b/nixos/modules/services/cluster/kubernetes/default.nix index a5a59f4a5fc9..375e33e91b5a 100644 --- a/nixos/modules/services/cluster/kubernetes/default.nix +++ b/nixos/modules/services/cluster/kubernetes/default.nix @@ -189,6 +189,16 @@ in { services.kubernetes.addonManager.enable = mkDefault true; services.kubernetes.proxy.enable = mkDefault true; services.etcd.enable = true; # Cannot mkDefault because of flannel default options + services.kubernetes.kubelet = { + enable = mkDefault true; + taints = mkIf (!(elem "node" cfg.roles)) { + master = { + key = "node-role.kubernetes.io/master"; + value = "true"; + effect = "NoSchedule"; + }; + }; + }; }) diff --git a/nixos/modules/services/cluster/kubernetes/flannel.nix b/nixos/modules/services/cluster/kubernetes/flannel.nix index 38dc1e2b47d4..fb70e6513d39 100644 --- a/nixos/modules/services/cluster/kubernetes/flannel.nix +++ b/nixos/modules/services/cluster/kubernetes/flannel.nix @@ -6,6 +6,9 @@ let top = config.services.kubernetes; cfg = top.flannel; + # we want flannel to use kubernetes itself as configuration backend, not direct etcd + storageBackend = "kubernetes"; + # needed for flannel to pass options to docker mkDockerOpts = pkgs.runCommand "mk-docker-opts" { buildInputs = [ pkgs.makeWrapper ]; @@ -29,6 +32,8 @@ in enable = mkDefault true; network = mkDefault top.clusterCidr; + inherit storageBackend; + nodeName = config.services.kubernetes.kubelet.hostname; }; services.kubernetes.kubelet = { @@ -69,11 +74,52 @@ in }; services.kubernetes.pki.certs = { - flannelEtcdClient = top.lib.mkCert { - name = "flannel-etcd-client"; - CN = "flannel-etcd-client"; + flannelClient = top.lib.mkCert { + name = "flannel-client"; + CN = "flannel-client"; action = "systemctl restart flannel.service"; }; }; + + # give flannel som kubernetes rbac permissions if applicable + services.kubernetes.addonManager.bootstrapAddons = mkIf ((storageBackend == "kubernetes") && (elem "RBAC" top.apiserver.authorizationMode)) { + + flannel-cr = { + apiVersion = "rbac.authorization.k8s.io/v1beta1"; + kind = "ClusterRole"; + metadata = { name = "flannel"; }; + rules = [{ + apiGroups = [ "" ]; + resources = [ "pods" ]; + verbs = [ "get" ]; + } + { + apiGroups = [ "" ]; + resources = [ "nodes" ]; + verbs = [ "list" "watch" ]; + } + { + apiGroups = [ "" ]; + resources = [ "nodes/status" ]; + verbs = [ "patch" ]; + }]; + }; + + flannel-crb = { + apiVersion = "rbac.authorization.k8s.io/v1beta1"; + kind = "ClusterRoleBinding"; + metadata = { name = "flannel"; }; + roleRef = { + apiGroup = "rbac.authorization.k8s.io"; + kind = "ClusterRole"; + name = "flannel"; + }; + subjects = [{ + kind = "User"; + name = "flannel-client"; + }]; + }; + + }; }; } diff --git a/nixos/modules/services/cluster/kubernetes/pki.nix b/nixos/modules/services/cluster/kubernetes/pki.nix index 4587373d519e..38deca23a990 100644 --- a/nixos/modules/services/cluster/kubernetes/pki.nix +++ b/nixos/modules/services/cluster/kubernetes/pki.nix @@ -305,7 +305,7 @@ in ''} ${optionalString top.flannel.enable '' - while [ ! -f ${cfg.certs.flannelEtcdClient.cert} ]; do sleep 1; done + while [ ! -f ${cfg.certs.flannelClient.cert} ]; do sleep 1; done echo "Restarting flannel..." >&1 systemctl restart flannel ''} @@ -313,22 +313,35 @@ in echo "Node joined succesfully" '')]; + # isolate etcd on loopback at the master node + # easyCerts doesn't support multimaster clusters anyway atm. services.etcd = with cfg.certs.etcd; { + listenClientUrls = ["https://127.0.0.1:2379"]; + listenPeerUrls = ["https://127.0.0.1:2380"]; + advertiseClientUrls = ["https://etcd.local:2379"]; + initialCluster = ["${top.masterAddress}=https://etcd.local:2380"]; + initialAdvertisePeerUrls = ["https://etcd.local:2380"]; certFile = mkDefault cert; keyFile = mkDefault key; trustedCaFile = mkDefault caCert; }; + networking.extraHosts = mkIf (config.services.etcd.enable) '' + 127.0.0.1 etcd.${top.addons.dns.clusterDomain} etcd.local + ''; - services.flannel.etcd = with cfg.certs.flannelEtcdClient; { - certFile = mkDefault cert; - keyFile = mkDefault key; - caFile = mkDefault caCert; + services.flannel = with cfg.certs.flannelClient; { + kubeconfig = top.lib.mkKubeConfig "flannel" { + server = top.apiserverAddress; + certFile = cert; + keyFile = key; + }; }; services.kubernetes = { apiserver = mkIf top.apiserver.enable (with cfg.certs.apiServer; { etcd = with cfg.certs.apiserverEtcdClient; { + servers = ["https://etcd.local:2379"]; certFile = mkDefault cert; keyFile = mkDefault key; caFile = mkDefault caCert; diff --git a/nixos/tests/kubernetes/base.nix b/nixos/tests/kubernetes/base.nix index 3529f35f60e6..ec1a75e74c41 100644 --- a/nixos/tests/kubernetes/base.nix +++ b/nixos/tests/kubernetes/base.nix @@ -65,7 +65,6 @@ let } (optionalAttrs (any (role: role == "master") machine.roles) { networking.firewall.allowedTCPPorts = [ - 2379 2380 # etcd 443 # kubernetes apiserver ]; }) From 7028fac35baf085ba973754c3dfe573b0bc2823a Mon Sep 17 00:00:00 2001 From: Johan Thomsen Date: Thu, 14 Feb 2019 10:28:51 +0100 Subject: [PATCH 6/9] nixos/kubernetes: use system.path to handle dependency on flannel subnet.env The current postStart step on flannel causes flannel.service to sometimes hang, even when it's commanded to stop. --- .../services/cluster/kubernetes/flannel.nix | 14 ++++++++++---- nixos/modules/services/networking/flannel.nix | 17 +++++++++-------- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/nixos/modules/services/cluster/kubernetes/flannel.nix b/nixos/modules/services/cluster/kubernetes/flannel.nix index fb70e6513d39..55c737a5cf18 100644 --- a/nixos/modules/services/cluster/kubernetes/flannel.nix +++ b/nixos/modules/services/cluster/kubernetes/flannel.nix @@ -50,16 +50,22 @@ in systemd.services."mk-docker-opts" = { description = "Pre-Docker Actions"; - wantedBy = [ "flannel.service" ]; - before = [ "docker.service" ]; - after = [ "flannel.service" ]; path = with pkgs; [ gawk gnugrep ]; script = '' - mkdir -p /run/flannel ${mkDockerOpts}/mk-docker-opts -d /run/flannel/docker + systemctl restart docker ''; serviceConfig.Type = "oneshot"; }; + + systemd.paths."flannel-subnet-env" = { + wantedBy = [ "flannel.service" ]; + pathConfig = { + PathModified = "/run/flannel/subnet.env"; + Unit = "mk-docker-opts.service"; + }; + }; + systemd.services.docker.serviceConfig.EnvironmentFile = "/run/flannel/docker"; # read environment variables generated by mk-docker-opts diff --git a/nixos/modules/services/networking/flannel.nix b/nixos/modules/services/networking/flannel.nix index 6c43573851b2..ec702cdc6ff4 100644 --- a/nixos/modules/services/networking/flannel.nix +++ b/nixos/modules/services/networking/flannel.nix @@ -161,7 +161,10 @@ in { FLANNELD_KUBECONFIG_FILE = cfg.kubeconfig; NODE_NAME = cfg.nodeName; }; - preStart = mkIf (cfg.storageBackend == "etcd") '' + preStart = '' + mkdir -p /run/flannel + touch /run/flannel/docker + '' + optionalString (cfg.storageBackend == "etcd") '' echo "setting network configuration" until ${pkgs.etcdctl.bin}/bin/etcdctl set /coreos.com/network/config '${builtins.toJSON networkConfig}' do @@ -169,13 +172,11 @@ in { sleep 1 done ''; - postStart = '' - while [ ! -f /run/flannel/subnet.env ] - do - sleep 1 - done - ''; - serviceConfig.ExecStart = "${cfg.package}/bin/flannel"; + serviceConfig = { + ExecStart = "${cfg.package}/bin/flannel"; + Restart = "always"; + RestartSec = "10s"; + }; }; services.etcd.enable = mkDefault (cfg.storageBackend == "etcd" && cfg.etcd.endpoints == ["http://127.0.0.1:2379"]); From 6045068f6c1ad40bb4ac6264fa11fa6641d0c1bf Mon Sep 17 00:00:00 2001 From: Johan Thomsen Date: Thu, 14 Feb 2019 10:51:44 +0100 Subject: [PATCH 7/9] nixos/kubernetes: (test) Fix race-condition in test cases. docker load might fail due to dockerd restarting --- nixos/tests/kubernetes/dns.nix | 8 ++++---- nixos/tests/kubernetes/rbac.nix | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/nixos/tests/kubernetes/dns.nix b/nixos/tests/kubernetes/dns.nix index 42eafcfc1956..46bcb01a5265 100644 --- a/nixos/tests/kubernetes/dns.nix +++ b/nixos/tests/kubernetes/dns.nix @@ -78,10 +78,10 @@ let test = '' # prepare machine1 for test $machine1->waitUntilSucceeds("kubectl get node machine1.${domain} | grep -w Ready"); - $machine1->execute("docker load < ${redisImage}"); + $machine1->waitUntilSucceeds("docker load < ${redisImage}"); $machine1->waitUntilSucceeds("kubectl create -f ${redisPod}"); $machine1->waitUntilSucceeds("kubectl create -f ${redisService}"); - $machine1->execute("docker load < ${probeImage}"); + $machine1->waitUntilSucceeds("docker load < ${probeImage}"); $machine1->waitUntilSucceeds("kubectl create -f ${probePod}"); # check if pods are running @@ -105,10 +105,10 @@ let # prepare machines for test $machine1->waitUntilSucceeds("kubectl get node machine2.${domain} | grep -w Ready"); - $machine2->execute("docker load < ${redisImage}"); + $machine2->waitUntilSucceeds("docker load < ${redisImage}"); $machine1->waitUntilSucceeds("kubectl create -f ${redisPod}"); $machine1->waitUntilSucceeds("kubectl create -f ${redisService}"); - $machine2->execute("docker load < ${probeImage}"); + $machine2->waitUntilSucceeds("docker load < ${probeImage}"); $machine1->waitUntilSucceeds("kubectl create -f ${probePod}"); # check if pods are running diff --git a/nixos/tests/kubernetes/rbac.nix b/nixos/tests/kubernetes/rbac.nix index 91f97bed6818..3ce7adcd0d71 100644 --- a/nixos/tests/kubernetes/rbac.nix +++ b/nixos/tests/kubernetes/rbac.nix @@ -96,7 +96,7 @@ let test = '' $machine1->waitUntilSucceeds("kubectl get node machine1.my.zyx | grep -w Ready"); - $machine1->execute("docker load < ${kubectlImage}"); + $machine1->waitUntilSucceeds("docker load < ${kubectlImage}"); $machine1->waitUntilSucceeds("kubectl apply -f ${roServiceAccount}"); $machine1->waitUntilSucceeds("kubectl apply -f ${roRole}"); @@ -119,7 +119,7 @@ let $machine1->waitUntilSucceeds("kubectl get node machine2.my.zyx | grep -w Ready"); - $machine2->execute("docker load < ${kubectlImage}"); + $machine2->waitUntilSucceeds("docker load < ${kubectlImage}"); $machine1->waitUntilSucceeds("kubectl apply -f ${roServiceAccount}"); $machine1->waitUntilSucceeds("kubectl apply -f ${roRole}"); From 3a022054966fcae27f6c31a1bb4f292ec84ca379 Mon Sep 17 00:00:00 2001 From: Franz Pletz Date: Wed, 20 Feb 2019 20:52:36 +0100 Subject: [PATCH 8/9] nixos/kubernetes: bootstrap docker without networking Before flannel is ready there is a brief time where docker will be running with a default docker0 bridge. If kubernetes happens to spawn containers before flannel is ready, docker can't be restarted when flannel is ready because some containers are still running on the docker0 bridge with potentially different network addresses. Environment variables in `EnvironmentFile` override those defined via `Environment` in the systemd service config. Co-authored-by: Christian Albrecht --- nixos/modules/services/cluster/kubernetes/flannel.nix | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/nixos/modules/services/cluster/kubernetes/flannel.nix b/nixos/modules/services/cluster/kubernetes/flannel.nix index 55c737a5cf18..6f97febf5ba7 100644 --- a/nixos/modules/services/cluster/kubernetes/flannel.nix +++ b/nixos/modules/services/cluster/kubernetes/flannel.nix @@ -66,7 +66,10 @@ in }; }; - systemd.services.docker.serviceConfig.EnvironmentFile = "/run/flannel/docker"; + systemd.services.docker = { + environment.DOCKER_OPTS = "-b none"; + serviceConfig.EnvironmentFile = "/run/flannel/docker"; + }; # read environment variables generated by mk-docker-opts virtualisation.docker.extraOptions = "$DOCKER_OPTS"; From 97a27fd2d27fa25dec8a527c4b578ab0af1a13c1 Mon Sep 17 00:00:00 2001 From: Jaka Hudoklin Date: Thu, 21 Feb 2019 00:26:11 +0100 Subject: [PATCH 9/9] nixos/kubernetes: fix flannel and kubelet startup --- .../services/cluster/kubernetes/flannel.nix | 2 +- .../services/cluster/kubernetes/kubelet.nix | 25 ++++++------------- 2 files changed, 9 insertions(+), 18 deletions(-) diff --git a/nixos/modules/services/cluster/kubernetes/flannel.nix b/nixos/modules/services/cluster/kubernetes/flannel.nix index 6f97febf5ba7..93ee2fd65eeb 100644 --- a/nixos/modules/services/cluster/kubernetes/flannel.nix +++ b/nixos/modules/services/cluster/kubernetes/flannel.nix @@ -68,7 +68,7 @@ in systemd.services.docker = { environment.DOCKER_OPTS = "-b none"; - serviceConfig.EnvironmentFile = "/run/flannel/docker"; + serviceConfig.EnvironmentFile = "-/run/flannel/docker"; }; # read environment variables generated by mk-docker-opts diff --git a/nixos/modules/services/cluster/kubernetes/kubelet.nix b/nixos/modules/services/cluster/kubernetes/kubelet.nix index 51d1fd30959b..c94bb28bf7fb 100644 --- a/nixos/modules/services/cluster/kubernetes/kubelet.nix +++ b/nixos/modules/services/cluster/kubernetes/kubelet.nix @@ -244,12 +244,12 @@ in (mkIf cfg.enable { services.kubernetes.kubelet.seedDockerImages = [infraContainer]; - systemd.services.kubelet-bootstrap = { - description = "Boostrap Kubelet"; - wantedBy = ["kubernetes.target"]; - after = ["docker.service" "network.target"]; - path = with pkgs; [ docker ]; - script = '' + systemd.services.kubelet = { + description = "Kubernetes Kubelet Service"; + wantedBy = [ "kubernetes.target" ]; + after = [ "network.target" "docker.service" "kube-apiserver.service" ]; + path = with pkgs; [ gitMinimal openssh docker utillinux iproute ethtool thin-provisioning-tools iptables socat ] ++ top.path; + preStart = '' ${concatMapStrings (img: '' echo "Seeding docker image: ${img}" docker load <${img} @@ -261,21 +261,12 @@ in ln -fs ${package}/bin/* /opt/cni/bin '') cfg.cni.packages} ''; - serviceConfig = { - Slice = "kubernetes.slice"; - Type = "oneshot"; - }; - }; - - systemd.services.kubelet = { - description = "Kubernetes Kubelet Service"; - wantedBy = [ "kubernetes.target" ]; - after = [ "network.target" "docker.service" "kube-apiserver.service" "kubelet-bootstrap.service" ]; - path = with pkgs; [ gitMinimal openssh docker utillinux iproute ethtool thin-provisioning-tools iptables socat ] ++ top.path; serviceConfig = { Slice = "kubernetes.slice"; CPUAccounting = true; MemoryAccounting = true; + Restart = "on-failure"; + RestartSec = "1000ms"; ExecStart = ''${top.package}/bin/kubelet \ --address=${cfg.address} \ --allow-privileged=${boolToString cfg.allowPrivileged} \