4a9d9928dc
The `nix.*` options, apart from options for setting up the daemon itself, currently provide a lot of setting mappings for the Nix daemon configuration. The scope of the mapping yields convience, but the line where an option is considered essential is blurry. For instance, the `extra-sandbox-paths` mapping is provided without its primary consumer, and the corresponding `sandbox-paths` option is also not mapped. The current system increases the maintenance burden as maintainers have to closely follow upstream changes. In this case, there are two state versions of Nix which have to be maintained collectively, with different options avaliable. This commit aims to following the standard outlined in RFC 42[1] to implement a structural setting pattern. The Nix configuration is encoded at its core as key-value pairs which maps nicely to attribute sets, making it feasible to express in the Nix language itself. Some existing options are kept such as `buildMachines` and `registry` which present a simplified interface to managing the respective settings. The interface is exposed as `nix.settings`. Legacy configurations are mapped to their corresponding options under `nix.settings` for backwards compatibility. Various options settings in other nixos modules and relevant tests have been updated to use structural setting for consistency. The generation and validation of the configration file has been modified to use `writeTextFile` instead of `runCommand` for clarity. Note that validation is now mandatory as strict checking of options has been pushed down to the derivation level due to freeformType consuming unmatched options. Furthermore, validation can not occur when cross-compiling due to current limitations. A new option `publicHostKey` was added to the `buildMachines` submodule corresponding to the base64 encoded public host key settings exposed in the builder syntax. The build machine generation was subsequently rewritten to use `concatStringsSep` for better performance by grouping concatenations. [1] - https://github.com/NixOS/rfcs/blob/master/rfcs/0042-config-option.md
156 lines
5.5 KiB
Nix
156 lines
5.5 KiB
Nix
{ config, lib, ... }:
|
|
|
|
with lib;
|
|
|
|
{
|
|
meta = {
|
|
maintainers = [ maintainers.joachifm ];
|
|
};
|
|
|
|
imports = [
|
|
(lib.mkRenamedOptionModule [ "security" "virtualization" "flushL1DataCache" ] [ "security" "virtualisation" "flushL1DataCache" ])
|
|
];
|
|
|
|
options = {
|
|
security.allowUserNamespaces = mkOption {
|
|
type = types.bool;
|
|
default = true;
|
|
description = ''
|
|
Whether to allow creation of user namespaces.
|
|
|
|
The motivation for disabling user namespaces is the potential
|
|
presence of code paths where the kernel's permission checking
|
|
logic fails to account for namespacing, instead permitting a
|
|
namespaced process to act outside the namespace with the same
|
|
privileges as it would have inside it. This is particularly
|
|
damaging in the common case of running as root within the namespace.
|
|
|
|
When user namespace creation is disallowed, attempting to create a
|
|
user namespace fails with "no space left on device" (ENOSPC).
|
|
root may re-enable user namespace creation at runtime.
|
|
'';
|
|
};
|
|
|
|
security.unprivilegedUsernsClone = mkOption {
|
|
type = types.bool;
|
|
default = false;
|
|
description = ''
|
|
When disabled, unprivileged users will not be able to create new namespaces.
|
|
By default unprivileged user namespaces are disabled.
|
|
This option only works in a hardened profile.
|
|
'';
|
|
};
|
|
|
|
security.protectKernelImage = mkOption {
|
|
type = types.bool;
|
|
default = false;
|
|
description = ''
|
|
Whether to prevent replacing the running kernel image.
|
|
'';
|
|
};
|
|
|
|
security.allowSimultaneousMultithreading = mkOption {
|
|
type = types.bool;
|
|
default = true;
|
|
description = ''
|
|
Whether to allow SMT/hyperthreading. Disabling SMT means that only
|
|
physical CPU cores will be usable at runtime, potentially at
|
|
significant performance cost.
|
|
|
|
The primary motivation for disabling SMT is to mitigate the risk of
|
|
leaking data between threads running on the same CPU core (due to
|
|
e.g., shared caches). This attack vector is unproven.
|
|
|
|
Disabling SMT is a supplement to the L1 data cache flushing mitigation
|
|
(see <xref linkend="opt-security.virtualisation.flushL1DataCache"/>)
|
|
versus malicious VM guests (SMT could "bring back" previously flushed
|
|
data).
|
|
'';
|
|
};
|
|
|
|
security.forcePageTableIsolation = mkOption {
|
|
type = types.bool;
|
|
default = false;
|
|
description = ''
|
|
Whether to force-enable the Page Table Isolation (PTI) Linux kernel
|
|
feature even on CPU models that claim to be safe from Meltdown.
|
|
|
|
This hardening feature is most beneficial to systems that run untrusted
|
|
workloads that rely on address space isolation for security.
|
|
'';
|
|
};
|
|
|
|
security.virtualisation.flushL1DataCache = mkOption {
|
|
type = types.nullOr (types.enum [ "never" "cond" "always" ]);
|
|
default = null;
|
|
description = ''
|
|
Whether the hypervisor should flush the L1 data cache before
|
|
entering guests.
|
|
See also <xref linkend="opt-security.allowSimultaneousMultithreading"/>.
|
|
|
|
<variablelist>
|
|
<varlistentry>
|
|
<term><literal>null</literal></term>
|
|
<listitem><para>uses the kernel default</para></listitem>
|
|
</varlistentry>
|
|
<varlistentry>
|
|
<term><literal>"never"</literal></term>
|
|
<listitem><para>disables L1 data cache flushing entirely.
|
|
May be appropriate if all guests are trusted.</para></listitem>
|
|
</varlistentry>
|
|
<varlistentry>
|
|
<term><literal>"cond"</literal></term>
|
|
<listitem><para>flushes L1 data cache only for pre-determined
|
|
code paths. May leak information about the host address space
|
|
layout.</para></listitem>
|
|
</varlistentry>
|
|
<varlistentry>
|
|
<term><literal>"always"</literal></term>
|
|
<listitem><para>flushes L1 data cache every time the hypervisor
|
|
enters the guest. May incur significant performance cost.
|
|
</para></listitem>
|
|
</varlistentry>
|
|
</variablelist>
|
|
'';
|
|
};
|
|
};
|
|
|
|
config = mkMerge [
|
|
(mkIf (!config.security.allowUserNamespaces) {
|
|
# Setting the number of allowed user namespaces to 0 effectively disables
|
|
# the feature at runtime. Note that root may raise the limit again
|
|
# at any time.
|
|
boot.kernel.sysctl."user.max_user_namespaces" = 0;
|
|
|
|
assertions = [
|
|
{ assertion = config.nix.settings.sandbox -> config.security.allowUserNamespaces;
|
|
message = "`nix.settings.sandbox = true` conflicts with `!security.allowUserNamespaces`.";
|
|
}
|
|
];
|
|
})
|
|
|
|
(mkIf config.security.unprivilegedUsernsClone {
|
|
boot.kernel.sysctl."kernel.unprivileged_userns_clone" = mkDefault true;
|
|
})
|
|
|
|
(mkIf config.security.protectKernelImage {
|
|
# Disable hibernation (allows replacing the running kernel)
|
|
boot.kernelParams = [ "nohibernate" ];
|
|
# Prevent replacing the running kernel image w/o reboot
|
|
boot.kernel.sysctl."kernel.kexec_load_disabled" = mkDefault true;
|
|
})
|
|
|
|
(mkIf (!config.security.allowSimultaneousMultithreading) {
|
|
boot.kernelParams = [ "nosmt" ];
|
|
})
|
|
|
|
(mkIf config.security.forcePageTableIsolation {
|
|
boot.kernelParams = [ "pti=on" ];
|
|
})
|
|
|
|
(mkIf (config.security.virtualisation.flushL1DataCache != null) {
|
|
boot.kernelParams = [ "kvm-intel.vmentry_l1d_flush=${config.security.virtualisation.flushL1DataCache}" ];
|
|
})
|
|
];
|
|
}
|