diff --git a/lib/maintainers.nix b/lib/maintainers.nix index a2aa88a807de..71b2a7a08bba 100644 --- a/lib/maintainers.nix +++ b/lib/maintainers.nix @@ -380,6 +380,7 @@ ledif = "Adam Fidel "; leemachin = "Lee Machin "; leenaars = "Michiel Leenaars "; + lejonet = "Daniel Kuehn "; leonardoce = "Leonardo Cecchi "; lethalman = "Luca Bruno "; lewo = "Antoine Eiche "; diff --git a/nixos/modules/misc/ids.nix b/nixos/modules/misc/ids.nix index c0c6a6ef9244..8d775ffc82d3 100644 --- a/nixos/modules/misc/ids.nix +++ b/nixos/modules/misc/ids.nix @@ -304,6 +304,7 @@ mighttpd2 = 285; hass = 286; monero = 287; + ceph = 288; # When adding a uid, make sure it doesn't match an existing gid. And don't use uids above 399! @@ -576,6 +577,7 @@ mighttpd2 = 285; hass = 286; monero = 287; + ceph = 288; # When adding a gid, make sure it doesn't match an existing # uid. Users and groups with the same name should have equal diff --git a/nixos/modules/module-list.nix b/nixos/modules/module-list.nix index 13a32b968dcb..3bb65c6b295a 100644 --- a/nixos/modules/module-list.nix +++ b/nixos/modules/module-list.nix @@ -439,6 +439,7 @@ ./services/network-filesystems/u9fs.nix ./services/network-filesystems/yandex-disk.nix ./services/network-filesystems/xtreemfs.nix + ./services/network-filesystems/ceph.nix ./services/networking/amuled.nix ./services/networking/aria2.nix ./services/networking/asterisk.nix diff --git a/nixos/modules/services/network-filesystems/ceph.nix b/nixos/modules/services/network-filesystems/ceph.nix new file mode 100644 index 000000000000..5de8ae79a246 --- /dev/null +++ b/nixos/modules/services/network-filesystems/ceph.nix @@ -0,0 +1,371 @@ +{ config, lib, pkgs, ... }: + +with lib; + +let + ceph = pkgs.ceph; + cfg = config.services.ceph; + # function that translates "camelCaseOptions" to "camel case options", credits to tilpner in #nixos@freenode + translateOption = replaceStrings upperChars (map (s: " ${s}") lowerChars); + generateDaemonList = (daemonType: daemons: extraServiceConfig: + mkMerge ( + map (daemon: + { "ceph-${daemonType}-${daemon}" = generateServiceFile daemonType daemon cfg.global.clusterName ceph extraServiceConfig; } + ) daemons + ) + ); + generateServiceFile = (daemonType: daemonId: clusterName: ceph: extraServiceConfig: { + enable = true; + description = "Ceph ${builtins.replaceStrings lowerChars upperChars daemonType} daemon ${daemonId}"; + after = [ "network-online.target" "local-fs.target" "time-sync.target" ] ++ optional (daemonType == "osd") "ceph-mon.target"; + wants = [ "network-online.target" "local-fs.target" "time-sync.target" ]; + partOf = [ "ceph-${daemonType}.target" ]; + wantedBy = [ "ceph-${daemonType}.target" ]; + + serviceConfig = { + LimitNOFILE = 1048576; + LimitNPROC = 1048576; + Environment = "CLUSTER=${clusterName}"; + ExecReload = "${pkgs.coreutils}/bin/kill -HUP $MAINPID"; + PrivateDevices = "yes"; + PrivateTmp = "true"; + ProtectHome = "true"; + ProtectSystem = "full"; + Restart = "on-failure"; + StartLimitBurst = "5"; + StartLimitInterval = "30min"; + ExecStart = "${ceph.out}/bin/${if daemonType == "rgw" then "radosgw" else "ceph-${daemonType}"} -f --cluster ${clusterName} --id ${if daemonType == "rgw" then "client.${daemonId}" else daemonId} --setuser ceph --setgroup ceph"; + } // extraServiceConfig + // optionalAttrs (daemonType == "osd") { ExecStartPre = "${ceph.out}/libexec/ceph/ceph-osd-prestart.sh --id ${daemonId} --cluster ${clusterName}"; }; + } // optionalAttrs (builtins.elem daemonType [ "mds" "mon" "rgw" "mgr" ]) { preStart = '' + daemonPath="/var/lib/ceph/${if daemonType == "rgw" then "radosgw" else daemonType}/${clusterName}-${daemonId}" + if [ ! -d ''$daemonPath ]; then + mkdir -m 755 -p ''$daemonPath + chown -R ceph:ceph ''$daemonPath + fi + ''; + } // optionalAttrs (daemonType == "osd") { path = [ pkgs.getopt ]; } + ); + generateTargetFile = (daemonType: + { + "ceph-${daemonType}" = { + description = "Ceph target allowing to start/stop all ceph-${daemonType} services at once"; + partOf = [ "ceph.target" ]; + before = [ "ceph.target" ]; + }; + } + ); +in +{ + options.services.ceph = { + # Ceph has a monolithic configuration file but different sections for + # each daemon, a separate client section and a global section + enable = mkEnableOption "Ceph global configuration"; + + global = { + fsid = mkOption { + type = types.str; + example = '' + 433a2193-4f8a-47a0-95d2-209d7ca2cca5 + ''; + description = '' + Filesystem ID, a generated uuid, its must be generated and set before + attempting to start a cluster + ''; + }; + + clusterName = mkOption { + type = types.str; + default = "ceph"; + description = '' + Name of cluster + ''; + }; + + monInitialMembers = mkOption { + type = with types; nullOr commas; + default = null; + example = '' + node0, node1, node2 + ''; + description = '' + List of hosts that will be used as monitors at startup. + ''; + }; + + monHost = mkOption { + type = with types; nullOr commas; + default = null; + example = '' + 10.10.0.1, 10.10.0.2, 10.10.0.3 + ''; + description = '' + List of hostname shortnames/IP addresses of the initial monitors. + ''; + }; + + maxOpenFiles = mkOption { + type = types.int; + default = 131072; + description = '' + Max open files for each OSD daemon. + ''; + }; + + authClusterRequired = mkOption { + type = types.enum [ "cephx" "none" ]; + default = "cephx"; + description = '' + Enables requiring daemons to authenticate with eachother in the cluster. + ''; + }; + + authServiceRequired = mkOption { + type = types.enum [ "cephx" "none" ]; + default = "cephx"; + description = '' + Enables requiring clients to authenticate with the cluster to access services in the cluster (e.g. radosgw, mds or osd). + ''; + }; + + authClientRequired = mkOption { + type = types.enum [ "cephx" "none" ]; + default = "cephx"; + description = '' + Enables requiring the cluster to authenticate itself to the client. + ''; + }; + + publicNetwork = mkOption { + type = with types; nullOr commas; + default = null; + example = '' + 10.20.0.0/24, 192.168.1.0/24 + ''; + description = '' + A comma-separated list of subnets that will be used as public networks in the cluster. + ''; + }; + + clusterNetwork = mkOption { + type = with types; nullOr commas; + default = null; + example = '' + 10.10.0.0/24, 192.168.0.0/24 + ''; + description = '' + A comma-separated list of subnets that will be used as cluster networks in the cluster. + ''; + }; + }; + + mgr = { + enable = mkEnableOption "Ceph MGR daemon"; + daemons = mkOption { + type = with types; listOf str; + default = []; + example = '' + [ "name1" "name2" ]; + ''; + description = '' + A list of names for manager daemons that should have a service created. The names correspond + to the id part in ceph i.e. [ "name1" ] would result in mgr.name1 + ''; + }; + extraConfig = mkOption { + type = with types; attrsOf str; + default = {}; + description = '' + Extra configuration to add to the global section for manager daemons. + ''; + }; + }; + + mon = { + enable = mkEnableOption "Ceph MON daemon"; + daemons = mkOption { + type = with types; listOf str; + default = []; + example = '' + [ "name1" "name2" ]; + ''; + description = '' + A list of monitor daemons that should have a service created. The names correspond + to the id part in ceph i.e. [ "name1" ] would result in mon.name1 + ''; + }; + extraConfig = mkOption { + type = with types; attrsOf str; + default = {}; + description = '' + Extra configuration to add to the monitor section. + ''; + }; + }; + + osd = { + enable = mkEnableOption "Ceph OSD daemon"; + daemons = mkOption { + type = with types; listOf str; + default = []; + example = '' + [ "name1" "name2" ]; + ''; + description = '' + A list of OSD daemons that should have a service created. The names correspond + to the id part in ceph i.e. [ "name1" ] would result in osd.name1 + ''; + }; + extraConfig = mkOption { + type = with types; attrsOf str; + default = { + "osd journal size" = "10000"; + "osd pool default size" = "3"; + "osd pool default min size" = "2"; + "osd pool default pg num" = "200"; + "osd pool default pgp num" = "200"; + "osd crush chooseleaf type" = "1"; + }; + description = '' + Extra configuration to add to the OSD section. + ''; + }; + }; + + mds = { + enable = mkEnableOption "Ceph MDS daemon"; + daemons = mkOption { + type = with types; listOf str; + default = []; + example = '' + [ "name1" "name2" ]; + ''; + description = '' + A list of metadata service daemons that should have a service created. The names correspond + to the id part in ceph i.e. [ "name1" ] would result in mds.name1 + ''; + }; + extraConfig = mkOption { + type = with types; attrsOf str; + default = {}; + description = '' + Extra configuration to add to the MDS section. + ''; + }; + }; + + rgw = { + enable = mkEnableOption "Ceph RadosGW daemon"; + daemons = mkOption { + type = with types; listOf str; + default = []; + example = '' + [ "name1" "name2" ]; + ''; + description = '' + A list of rados gateway daemons that should have a service created. The names correspond + to the id part in ceph i.e. [ "name1" ] would result in client.name1, radosgw daemons + aren't daemons to cluster in the sense that OSD, MGR or MON daemons are. They are simply + daemons, from ceph, that uses the cluster as a backend. + ''; + }; + }; + + client = { + enable = mkEnableOption "Ceph client configuration"; + extraConfig = mkOption { + type = with types; attrsOf str; + default = {}; + example = '' + { + # This would create a section for a radosgw daemon named node0 and related + # configuration for it + "client.radosgw.node0" = { "some config option" = "true"; }; + }; + ''; + description = '' + Extra configuration to add to the client section. Configuration for rados gateways + would be added here, with their own sections, see example. + ''; + }; + }; + }; + + config = mkIf config.services.ceph.enable { + assertions = [ + { assertion = cfg.global.fsid != ""; + message = "fsid has to be set to a valid uuid for the cluster to function"; + } + { assertion = cfg.mgr.enable == true; + message = "ceph 12.x requires atleast 1 MGR daemon enabled for the cluster to function"; + } + { assertion = cfg.mon.enable == true -> cfg.mon.daemons != []; + message = "have to set id of atleast one MON if you're going to enable Monitor"; + } + { assertion = cfg.mds.enable == true -> cfg.mds.daemons != []; + message = "have to set id of atleast one MDS if you're going to enable Metadata Service"; + } + { assertion = cfg.osd.enable == true -> cfg.osd.daemons != []; + message = "have to set id of atleast one OSD if you're going to enable OSD"; + } + { assertion = cfg.mgr.enable == true -> cfg.mgr.daemons != []; + message = "have to set id of atleast one MGR if you're going to enable MGR"; + } + ]; + + warnings = optional (cfg.global.monInitialMembers == null) + ''Not setting up a list of members in monInitialMembers requires that you set the host variable for each mon daemon or else the cluster won't function''; + + environment.etc."ceph/ceph.conf".text = let + # Translate camelCaseOptions to the expected camel case option for ceph.conf + translatedGlobalConfig = mapAttrs' (name: value: nameValuePair (translateOption name) value) cfg.global; + # Merge the extraConfig set for mgr daemons, as mgr don't have their own section + globalAndMgrConfig = translatedGlobalConfig // optionalAttrs cfg.mgr.enable cfg.mgr.extraConfig; + # Remove all name-value pairs with null values from the attribute set to avoid making empty sections in the ceph.conf + globalConfig = mapAttrs' (name: value: nameValuePair (translateOption name) value) (filterAttrs (name: value: value != null) globalAndMgrConfig); + totalConfig = { + "global" = globalConfig; + } // optionalAttrs (cfg.mon.enable && cfg.mon.extraConfig != {}) { "mon" = cfg.mon.extraConfig; } + // optionalAttrs (cfg.mds.enable && cfg.mds.extraConfig != {}) { "mds" = cfg.mds.extraConfig; } + // optionalAttrs (cfg.osd.enable && cfg.osd.extraConfig != {}) { "osd" = cfg.osd.extraConfig; } + // optionalAttrs (cfg.client.enable && cfg.client.extraConfig != {}) cfg.client.extraConfig; + in + generators.toINI {} totalConfig; + + users.extraUsers = singleton { + name = "ceph"; + uid = config.ids.uids.ceph; + description = "Ceph daemon user"; + }; + + users.extraGroups = singleton { + name = "ceph"; + gid = config.ids.gids.ceph; + }; + + systemd.services = let + services = [] + ++ optional cfg.mon.enable (generateDaemonList "mon" cfg.mon.daemons { RestartSec = "10"; }) + ++ optional cfg.mds.enable (generateDaemonList "mds" cfg.mds.daemons { StartLimitBurst = "3"; }) + ++ optional cfg.osd.enable (generateDaemonList "osd" cfg.osd.daemons { StartLimitBurst = "30"; RestartSec = "20s"; }) + ++ optional cfg.rgw.enable (generateDaemonList "rgw" cfg.rgw.daemons { }) + ++ optional cfg.mgr.enable (generateDaemonList "mgr" cfg.mgr.daemons { StartLimitBurst = "3"; }); + in + mkMerge services; + + systemd.targets = let + targets = [ + { "ceph" = { description = "Ceph target allowing to start/stop all ceph service instances at once"; }; } + ] ++ optional cfg.mon.enable (generateTargetFile "mon") + ++ optional cfg.mds.enable (generateTargetFile "mds") + ++ optional cfg.osd.enable (generateTargetFile "osd") + ++ optional cfg.rgw.enable (generateTargetFile "rgw") + ++ optional cfg.mgr.enable (generateTargetFile "mgr"); + in + mkMerge targets; + + systemd.tmpfiles.rules = [ + "d /run/ceph 0770 ceph ceph -" + ]; + }; +} diff --git a/nixos/release.nix b/nixos/release.nix index 558bbbf9a9d4..473b11313bef 100644 --- a/nixos/release.nix +++ b/nixos/release.nix @@ -230,6 +230,7 @@ in rec { tests.borgbackup = callTest tests/borgbackup.nix {}; tests.buildbot = callTest tests/buildbot.nix {}; tests.cadvisor = callTestOnTheseSystems ["x86_64-linux"] tests/cadvisor.nix {}; + tests.ceph = callTestOnTheseSystems ["x86_64-linux"] tests/ceph.nix {}; tests.chromium = (callSubTestsOnTheseSystems ["x86_64-linux"] tests/chromium.nix {}).stable; tests.cjdns = callTest tests/cjdns.nix {}; tests.cloud-init = callTest tests/cloud-init.nix {}; diff --git a/nixos/tests/ceph.nix b/nixos/tests/ceph.nix new file mode 100644 index 000000000000..b9993062c079 --- /dev/null +++ b/nixos/tests/ceph.nix @@ -0,0 +1,140 @@ +import ./make-test.nix ({pkgs, ...}: rec { + name = "All-in-one-basic-ceph-cluster"; + meta = with pkgs.stdenv.lib.maintainers; { + maintainers = [ lejonet ]; + }; + + nodes = { + aio = { config, pkgs, ... }: { + virtualisation = { + emptyDiskImages = [ 20480 20480 ]; + vlans = [ 1 ]; + }; + + networking = { + firewall.allowPing = true; + useDHCP = false; + interfaces.eth1.ipv4.addresses = pkgs.lib.mkOverride 0 [ + { address = "192.168.1.1"; prefixLength = 24; } + ]; + }; + + environment.systemPackages = with pkgs; [ + bash + sudo + ceph + xfsprogs + ]; + nixpkgs.config.packageOverrides = super: { + ceph = super.ceph.override({ nss = super.nss; libxfs = super.libxfs; libaio = super.libaio; jemalloc = super.jemalloc; }); + }; + + boot.kernelModules = [ "xfs" ]; + + services.ceph.enable = true; + services.ceph.global = { + fsid = "066ae264-2a5d-4729-8001-6ad265f50b03"; + monInitialMembers = "aio"; + monHost = "192.168.1.1"; + }; + + services.ceph.mon = { + enable = true; + daemons = [ "aio" ]; + }; + + services.ceph.mgr = { + enable = true; + daemons = [ "aio" ]; + }; + + services.ceph.osd = { + enable = true; + daemons = [ "0" "1" ]; + }; + }; + }; + + testScript = { nodes, ... }: '' + startAll; + + $aio->waitForUnit("network.target"); + + # Create the ceph-related directories + $aio->mustSucceed( + "mkdir -p /var/lib/ceph/mgr/ceph-aio/", + "mkdir -p /var/lib/ceph/mon/ceph-aio/", + "mkdir -p /var/lib/ceph/osd/ceph-{0..1}/", + "chown ceph:ceph -R /var/lib/ceph/" + ); + + # Bootstrap ceph-mon daemon + $aio->mustSucceed( + "mkdir -p /var/lib/ceph/bootstrap-osd && chown ceph:ceph /var/lib/ceph/bootstrap-osd", + "sudo -u ceph ceph-authtool --create-keyring /tmp/ceph.mon.keyring --gen-key -n mon. --cap mon 'allow *'", + "ceph-authtool --create-keyring /etc/ceph/ceph.client.admin.keyring --gen-key -n client.admin --set-uid=0 --cap mon 'allow *' --cap osd 'allow *' --cap mds 'allow *' --cap mgr 'allow *'", + "ceph-authtool /tmp/ceph.mon.keyring --import-keyring /etc/ceph/ceph.client.admin.keyring", + "monmaptool --create --add aio 192.168.1.1 --fsid 066ae264-2a5d-4729-8001-6ad265f50b03 /tmp/monmap", + "sudo -u ceph ceph-mon --mkfs -i aio --monmap /tmp/monmap --keyring /tmp/ceph.mon.keyring", + "touch /var/lib/ceph/mon/ceph-aio/done", + "systemctl start ceph-mon-aio" + ); + $aio->waitForUnit("ceph-mon-aio"); + + # Can't check ceph status until a mon is up + $aio->succeed("ceph -s | grep 'mon: 1 daemons'"); + + # Start the ceph-mgr daemon, it has no deps and hardly any setup + $aio->mustSucceed( + "ceph auth get-or-create mgr.aio mon 'allow profile mgr' osd 'allow *' mds 'allow *' > /var/lib/ceph/mgr/ceph-aio/keyring", + "systemctl start ceph-mgr-aio" + ); + $aio->waitForUnit("ceph-mgr-aio"); + $aio->waitUntilSucceeds("ceph -s | grep 'quorum aio'"); + + # Bootstrap both OSDs + $aio->mustSucceed( + "mkfs.xfs /dev/vdb", + "mkfs.xfs /dev/vdc", + "mount /dev/vdb /var/lib/ceph/osd/ceph-0", + "mount /dev/vdc /var/lib/ceph/osd/ceph-1", + "ceph-authtool --create-keyring /var/lib/ceph/osd/ceph-0/keyring --name osd.0 --add-key AQBCEJNa3s8nHRAANvdsr93KqzBznuIWm2gOGg==", + "ceph-authtool --create-keyring /var/lib/ceph/osd/ceph-1/keyring --name osd.1 --add-key AQBEEJNac00kExAAXEgy943BGyOpVH1LLlHafQ==", + "echo '{\"cephx_secret\": \"AQBCEJNa3s8nHRAANvdsr93KqzBznuIWm2gOGg==\"}' | ceph osd new 55ba2294-3e24-478f-bee0-9dca4c231dd9 -i -", + "echo '{\"cephx_secret\": \"AQBEEJNac00kExAAXEgy943BGyOpVH1LLlHafQ==\"}' | ceph osd new 5e97a838-85b6-43b0-8950-cb56d554d1e5 -i -" + ); + + # Initialize the OSDs with regular filestore + $aio->mustSucceed( + "ceph-osd -i 0 --mkfs --osd-uuid 55ba2294-3e24-478f-bee0-9dca4c231dd9", + "ceph-osd -i 1 --mkfs --osd-uuid 5e97a838-85b6-43b0-8950-cb56d554d1e5", + "chown -R ceph:ceph /var/lib/ceph/osd", + "systemctl start ceph-osd-0", + "systemctl start ceph-osd-1" + ); + + $aio->waitUntilSucceeds("ceph osd stat | grep '2 osds: 2 up, 2 in'"); + $aio->waitUntilSucceeds("ceph -s | grep 'mgr: aio(active)'"); + $aio->waitUntilSucceeds("ceph -s | grep 'HEALTH_OK'"); + + $aio->mustSucceed( + "ceph osd pool create aio-test 100 100", + "ceph osd pool ls | grep 'aio-test'", + "ceph osd pool rename aio-test aio-other-test", + "ceph osd pool ls | grep 'aio-other-test'", + "ceph -s | grep '1 pools, 100 pgs'", + "ceph osd getcrushmap -o crush", + "crushtool -d crush -o decrushed", + "sed 's/step chooseleaf firstn 0 type host/step chooseleaf firstn 0 type osd/' decrushed > modcrush", + "crushtool -c modcrush -o recrushed", + "ceph osd setcrushmap -i recrushed", + "ceph osd pool set aio-other-test size 2" + ); + $aio->waitUntilSucceeds("ceph -s | grep 'HEALTH_OK'"); + $aio->waitUntilSucceeds("ceph -s | grep '100 active+clean'"); + $aio->mustFail( + "ceph osd pool ls | grep 'aio-test'", + "ceph osd pool delete aio-other-test aio-other-test --yes-i-really-really-mean-it" + ); + ''; +})