nixos/thanos: add module for the thanos service
This commit is contained in:
parent
4a6e47126c
commit
ebc65a5f21
@ -519,6 +519,7 @@
|
||||
./services/monitoring/systemhealth.nix
|
||||
./services/monitoring/teamviewer.nix
|
||||
./services/monitoring/telegraf.nix
|
||||
./services/monitoring/thanos.nix
|
||||
./services/monitoring/ups.nix
|
||||
./services/monitoring/uptime.nix
|
||||
./services/monitoring/vnstat.nix
|
||||
|
756
nixos/modules/services/monitoring/thanos.nix
Normal file
756
nixos/modules/services/monitoring/thanos.nix
Normal file
@ -0,0 +1,756 @@
|
||||
{ config, lib, pkgs, ... }:
|
||||
|
||||
with lib;
|
||||
|
||||
let
|
||||
cfg = config.services.thanos;
|
||||
|
||||
nullOpt = type : description : mkOption {
|
||||
type = types.nullOr type;
|
||||
default = null;
|
||||
inherit description;
|
||||
};
|
||||
|
||||
optionToArgs = opt : v : optional (v != null) ''--${opt}="${toString v}"'';
|
||||
flagToArgs = opt : v : optional v ''--${opt}'';
|
||||
listToArgs = opt : vs : map (v: ''--${opt}="${v}"'') vs;
|
||||
attrsToArgs = opt : kvs : mapAttrsToList (k: v: ''--${opt}=${k}=\"${v}\"'') kvs;
|
||||
|
||||
mkParamDef = type : default : description : mkParam type (description + ''
|
||||
|
||||
Defaults to <literal>${toString default}</literal> in Thanos
|
||||
when set to <literal>null</literal>.
|
||||
'');
|
||||
|
||||
mkParam = type : description : {
|
||||
toArgs = optionToArgs;
|
||||
option = nullOpt type description;
|
||||
};
|
||||
|
||||
mkFlagParam = description : {
|
||||
toArgs = flagToArgs;
|
||||
option = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
inherit description;
|
||||
};
|
||||
};
|
||||
|
||||
mkListParam = opt : description : {
|
||||
toArgs = _opt : listToArgs opt;
|
||||
option = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = [];
|
||||
inherit description;
|
||||
};
|
||||
};
|
||||
|
||||
mkAttrsParam = opt : description : {
|
||||
toArgs = _opt : attrsToArgs opt;
|
||||
option = mkOption {
|
||||
type = types.attrsOf types.str;
|
||||
default = {};
|
||||
inherit description;
|
||||
};
|
||||
};
|
||||
|
||||
mkStateDirParam = opt : default : description : {
|
||||
toArgs = _opt : stateDir : optionToArgs opt "/var/lib/${stateDir}";
|
||||
option = mkOption {
|
||||
type = types.str;
|
||||
inherit default;
|
||||
inherit description;
|
||||
};
|
||||
};
|
||||
|
||||
toYAML = name : attrs : pkgs.runCommandNoCC name {
|
||||
preferLocalBuild = true;
|
||||
json = builtins.toFile "${name}.json" (builtins.toJSON attrs);
|
||||
nativeBuildInputs = [ pkgs.remarshal ];
|
||||
} ''json2yaml -i $json -o $out'';
|
||||
|
||||
thanos = cmd : "${cfg.package}/bin/thanos ${cmd}" +
|
||||
(let args = concatLists (collect isList
|
||||
(flip mapParamsRecursive params."${cmd}" (path : param :
|
||||
let opt = concatStringsSep "." path;
|
||||
v = getAttrFromPath path cfg."${cmd}";
|
||||
in param.toArgs opt v)));
|
||||
in optionalString (length args != 0) (" \\\n " +
|
||||
concatStringsSep " \\\n " args));
|
||||
|
||||
mapParamsRecursive =
|
||||
let noParam = attr : !(attr ? "toArgs" && attr ? "option");
|
||||
in mapAttrsRecursiveCond noParam;
|
||||
|
||||
paramsToOptions = mapParamsRecursive (_path : param : param.option);
|
||||
|
||||
params = {
|
||||
|
||||
log = {
|
||||
|
||||
log.level = mkParamDef (types.enum ["debug" "info" "warn" "error" "fatal"]) "info" ''
|
||||
Log filtering level.
|
||||
'';
|
||||
|
||||
log.format = mkParam types.str ''
|
||||
Log format to use.
|
||||
'';
|
||||
};
|
||||
|
||||
gcloudtrace = {
|
||||
|
||||
gcloudtrace.project = mkParam types.str ''
|
||||
GCP project to send Google Cloud Trace tracings to.
|
||||
|
||||
If <literal>null</literal>, tracing will be disabled.
|
||||
'';
|
||||
|
||||
gcloudtrace.sample-factor = mkParamDef types.int 1 ''
|
||||
How often we send traces <literal>1/<sample-factor></literal>.
|
||||
|
||||
If <literal>0</literal> no trace will be sent periodically, unless
|
||||
forced by baggage item.
|
||||
'';
|
||||
};
|
||||
|
||||
common = params.log // params.gcloudtrace // {
|
||||
|
||||
http-address = mkParamDef types.str "0.0.0.0:10902" ''
|
||||
Listen <literal>host:port</literal> for HTTP endpoints.
|
||||
'';
|
||||
|
||||
grpc-address = mkParamDef types.str "0.0.0.0:10901" ''
|
||||
Listen <literal>ip:port</literal> address for gRPC endpoints (StoreAPI).
|
||||
|
||||
Make sure this address is routable from other components if you use gossip,
|
||||
<option>grpc-advertise-address</option> is empty and you require cross-node connection.
|
||||
'';
|
||||
|
||||
grpc-server-tls-cert = mkParam types.str ''
|
||||
TLS Certificate for gRPC server, leave blank to disable TLS
|
||||
'';
|
||||
|
||||
grpc-server-tls-key = mkParam types.str ''
|
||||
TLS Key for the gRPC server, leave blank to disable TLS
|
||||
'';
|
||||
|
||||
grpc-server-tls-client-ca = mkParam types.str ''
|
||||
TLS CA to verify clients against.
|
||||
|
||||
If no client CA is specified, there is no client verification on server side.
|
||||
(tls.NoClientCert)
|
||||
'';
|
||||
};
|
||||
|
||||
objstore = cfg : {
|
||||
|
||||
objstore.config-file = {
|
||||
toArgs = _opt : path : optionToArgs "objstore.config-file" path;
|
||||
option = mkOption {
|
||||
type = with types; nullOr str;
|
||||
default = if cfg.objstore.config == null then null
|
||||
else toString (toYAML "objstore.yaml" cfg.objstore.config);
|
||||
defaultText = ''
|
||||
if config.services.thanos.<cmd>.objstore.config == null then null
|
||||
else toString (toYAML "objstore.yaml" config.services.thanos.<cmd>.objstore.config);
|
||||
'';
|
||||
description = ''
|
||||
Path to YAML file that contains object store configuration.
|
||||
'';
|
||||
};
|
||||
};
|
||||
|
||||
objstore.config =
|
||||
{
|
||||
toArgs = _opt : _attrs : [];
|
||||
option = nullOpt types.attrs ''
|
||||
Object store configuration.
|
||||
|
||||
When not <literal>null</literal> the attribute set gets converted to
|
||||
a YAML file and stored in the Nix store. The option
|
||||
<option>objstore.config-file</option> will default to its path.
|
||||
|
||||
If <option>objstore.config-file</option> is set this option has no effect.
|
||||
'';
|
||||
};
|
||||
};
|
||||
|
||||
sidecar = params.common // params.objstore cfg.sidecar // {
|
||||
|
||||
prometheus.url = mkParamDef types.str "http://localhost:9090" ''
|
||||
URL at which to reach Prometheus's API.
|
||||
|
||||
For better performance use local network.
|
||||
'';
|
||||
|
||||
tsdb.path = {
|
||||
toArgs = optionToArgs;
|
||||
option = mkOption {
|
||||
type = types.str;
|
||||
default = "/var/lib/${config.services.prometheus2.stateDir}/data";
|
||||
defaultText = "/var/lib/\${config.services.prometheus2.stateDir}/data";
|
||||
description = ''
|
||||
Data directory of TSDB.
|
||||
'';
|
||||
};
|
||||
};
|
||||
|
||||
reloader.config-file = mkParam types.str ''
|
||||
Config file watched by the reloader.
|
||||
'';
|
||||
|
||||
reloader.config-envsubst-file = mkParam types.str ''
|
||||
Output file for environment variable substituted config file.
|
||||
'';
|
||||
|
||||
reloader.rule-dirs = mkListParam "reloader.rule-dir" ''
|
||||
Rule directories for the reloader to refresh.
|
||||
'';
|
||||
|
||||
};
|
||||
|
||||
store = params.common // params.objstore cfg.store // {
|
||||
|
||||
stateDir = mkStateDirParam "data-dir" "thanos-store" ''
|
||||
Data directory relative to <literal>/var/lib</literal>
|
||||
in which to cache remote blocks.
|
||||
'';
|
||||
|
||||
index-cache-size = mkParamDef types.str "250MB" ''
|
||||
Maximum size of items held in the index cache.
|
||||
'';
|
||||
|
||||
chunk-pool-size = mkParamDef types.str "2GB" ''
|
||||
Maximum size of concurrently allocatable bytes for chunks.
|
||||
'';
|
||||
|
||||
store.grpc.series-sample-limit = mkParamDef types.int 0 ''
|
||||
Maximum amount of samples returned via a single Series call.
|
||||
|
||||
<literal>0</literal> means no limit.
|
||||
|
||||
NOTE: for efficiency we take 120 as the number of samples in chunk (it
|
||||
cannot be bigger than that), so the actual number of samples might be
|
||||
lower, even though the maximum could be hit.
|
||||
'';
|
||||
|
||||
store.grpc.series-max-concurrency = mkParamDef types.int 20 ''
|
||||
Maximum number of concurrent Series calls.
|
||||
'';
|
||||
|
||||
sync-block-duration = mkParamDef types.str "3m" ''
|
||||
Repeat interval for syncing the blocks between local and remote view.
|
||||
'';
|
||||
|
||||
block-sync-concurrency = mkParamDef types.int 20 ''
|
||||
Number of goroutines to use when syncing blocks from object storage.
|
||||
'';
|
||||
};
|
||||
|
||||
query = params.common // {
|
||||
|
||||
http-advertise-address = mkParam types.str ''
|
||||
Explicit (external) <literal>host:port</literal> address to advertise
|
||||
for HTTP QueryAPI in gossip cluster.
|
||||
|
||||
If <literal>null</literal>, the option <option>http-address</option>
|
||||
will be used.
|
||||
'';
|
||||
|
||||
grpc-client-tls-secure = mkFlagParam ''
|
||||
Use TLS when talking to the gRPC server
|
||||
'';
|
||||
|
||||
grpc-client-tls-cert = mkParam types.str ''
|
||||
TLS Certificates to use to identify this client to the server
|
||||
'';
|
||||
|
||||
grpc-client-tls-key = mkParam types.str ''
|
||||
TLS Key for the client's certificate
|
||||
'';
|
||||
|
||||
grpc-client-tls-ca = mkParam types.str ''
|
||||
TLS CA Certificates to use to verify gRPC servers
|
||||
'';
|
||||
|
||||
grpc-client-server-name = mkParam types.str ''
|
||||
Server name to verify the hostname on the returned gRPC certificates.
|
||||
See <link xlink:href="https://tools.ietf.org/html/rfc4366#section-3.1"/>
|
||||
'';
|
||||
|
||||
web.route-prefix = mkParam types.str ''
|
||||
Prefix for API and UI endpoints.
|
||||
|
||||
This allows thanos UI to be served on a sub-path. This option is
|
||||
analogous to <option>web.route-prefix</option> of Promethus.
|
||||
'';
|
||||
|
||||
web.external-prefix = mkParam types.str ''
|
||||
Static prefix for all HTML links and redirect URLs in the UI query web
|
||||
interface.
|
||||
|
||||
Actual endpoints are still served on / or the
|
||||
<option>web.route-prefix</option>. This allows thanos UI to be served
|
||||
behind a reverse proxy that strips a URL sub-path.
|
||||
'';
|
||||
|
||||
web.prefix-header = mkParam types.str ''
|
||||
Name of HTTP request header used for dynamic prefixing of UI links and
|
||||
redirects.
|
||||
|
||||
This option is ignored if the option
|
||||
<literal>web.external-prefix</literal> is set.
|
||||
|
||||
Security risk: enable this option only if a reverse proxy in front of
|
||||
thanos is resetting the header.
|
||||
|
||||
The setting <literal>web.prefix-header="X-Forwarded-Prefix"</literal>
|
||||
can be useful, for example, if Thanos UI is served via Traefik reverse
|
||||
proxy with <literal>PathPrefixStrip</literal> option enabled, which
|
||||
sends the stripped prefix value in <literal>X-Forwarded-Prefix</literal>
|
||||
header. This allows thanos UI to be served on a sub-path.
|
||||
'';
|
||||
|
||||
query.timeout = mkParamDef types.str "2m" ''
|
||||
Maximum time to process query by query node.
|
||||
'';
|
||||
|
||||
query.max-concurrent = mkParamDef types.int 20 ''
|
||||
Maximum number of queries processed concurrently by query node.
|
||||
'';
|
||||
|
||||
query.replica-label = mkParam types.str ''
|
||||
Label to treat as a replica indicator along which data is
|
||||
deduplicated.
|
||||
|
||||
Still you will be able to query without deduplication using
|
||||
<literal>dedup=false</literal> parameter.
|
||||
'';
|
||||
|
||||
selector-labels = mkAttrsParam "selector-label" ''
|
||||
Query selector labels that will be exposed in info endpoint.
|
||||
'';
|
||||
|
||||
store.addresses = mkListParam "store" ''
|
||||
Addresses of statically configured store API servers.
|
||||
|
||||
The scheme may be prefixed with <literal>dns+</literal> or
|
||||
<literal>dnssrv+</literal> to detect store API servers through
|
||||
respective DNS lookups.
|
||||
'';
|
||||
|
||||
store.sd-files = mkListParam "store.sd-files" ''
|
||||
Path to files that contain addresses of store API servers. The path
|
||||
can be a glob pattern.
|
||||
'';
|
||||
|
||||
store.sd-interval = mkParamDef types.str "5m" ''
|
||||
Refresh interval to re-read file SD files. It is used as a resync fallback.
|
||||
'';
|
||||
|
||||
store.sd-dns-interval = mkParamDef types.str "30s" ''
|
||||
Interval between DNS resolutions.
|
||||
'';
|
||||
|
||||
store.unhealthy-timeout = mkParamDef types.str "5m" ''
|
||||
Timeout before an unhealthy store is cleaned from the store UI page.
|
||||
'';
|
||||
|
||||
query.auto-downsampling = mkFlagParam ''
|
||||
Enable automatic adjustment (step / 5) to what source of data should
|
||||
be used in store gateways if no
|
||||
<literal>max_source_resolution</literal> param is specified.
|
||||
'';
|
||||
|
||||
query.partial-response = mkFlagParam ''
|
||||
Enable partial response for queries if no
|
||||
<literal>partial_response</literal> param is specified.
|
||||
'';
|
||||
|
||||
query.default-evaluation-interval = mkParamDef types.str "1m" ''
|
||||
Set default evaluation interval for sub queries.
|
||||
'';
|
||||
|
||||
store.response-timeout = mkParamDef types.str "0ms" ''
|
||||
If a Store doesn't send any data in this specified duration then a
|
||||
Store will be ignored and partial data will be returned if it's
|
||||
enabled. <literal>0</literal> disables timeout.
|
||||
'';
|
||||
};
|
||||
|
||||
rule = params.common // params.objstore cfg.rule // {
|
||||
|
||||
labels = mkAttrsParam "label" ''
|
||||
Labels to be applied to all generated metrics.
|
||||
|
||||
Similar to external labels for Prometheus,
|
||||
used to identify ruler and its blocks as unique source.
|
||||
'';
|
||||
|
||||
stateDir = mkStateDirParam "data-dir" "thanos-rule" ''
|
||||
Data directory relative to <literal>/var/lib</literal>.
|
||||
'';
|
||||
|
||||
rule-files = mkListParam "rule-file" ''
|
||||
Rule files that should be used by rule manager. Can be in glob format.
|
||||
'';
|
||||
|
||||
eval-interval = mkParamDef types.str "30s" ''
|
||||
The default evaluation interval to use.
|
||||
'';
|
||||
|
||||
tsdb.block-duration = mkParamDef types.str "2h" ''
|
||||
Block duration for TSDB block.
|
||||
'';
|
||||
|
||||
tsdb.retention = mkParamDef types.str "48h" ''
|
||||
Block retention time on local disk.
|
||||
'';
|
||||
|
||||
alertmanagers.urls = mkListParam "alertmanagers.url" ''
|
||||
Alertmanager replica URLs to push firing alerts.
|
||||
|
||||
Ruler claims success if push to at least one alertmanager from
|
||||
discovered succeeds. The scheme may be prefixed with
|
||||
<literal>dns+</literal> or <literal>dnssrv+</literal> to detect
|
||||
Alertmanager IPs through respective DNS lookups. The port defaults to
|
||||
<literal>9093</literal> or the SRV record's value. The URL path is
|
||||
used as a prefix for the regular Alertmanager API path.
|
||||
'';
|
||||
|
||||
alertmanagers.send-timeout = mkParamDef types.str "10s" ''
|
||||
Timeout for sending alerts to alertmanager.
|
||||
'';
|
||||
|
||||
alert.query-url = mkParam types.str ''
|
||||
The external Thanos Query URL that would be set in all alerts 'Source' field.
|
||||
'';
|
||||
|
||||
alert.label-drop = mkListParam "alert.label-drop" ''
|
||||
Labels by name to drop before sending to alertmanager.
|
||||
|
||||
This allows alert to be deduplicated on replica label.
|
||||
|
||||
Similar Prometheus alert relabelling
|
||||
'';
|
||||
|
||||
web.route-prefix = mkParam types.str ''
|
||||
Prefix for API and UI endpoints.
|
||||
|
||||
This allows thanos UI to be served on a sub-path.
|
||||
|
||||
This option is analogous to <literal>--web.route-prefix</literal> of Promethus.
|
||||
'';
|
||||
|
||||
web.external-prefix = mkParam types.str ''
|
||||
Static prefix for all HTML links and redirect URLs in the UI query web
|
||||
interface.
|
||||
|
||||
Actual endpoints are still served on / or the
|
||||
<option>web.route-prefix</option>. This allows thanos UI to be served
|
||||
behind a reverse proxy that strips a URL sub-path.
|
||||
'';
|
||||
|
||||
web.prefix-header = mkParam types.str ''
|
||||
Name of HTTP request header used for dynamic prefixing of UI links and
|
||||
redirects.
|
||||
|
||||
This option is ignored if the option
|
||||
<option>web.external-prefix</option> is set.
|
||||
|
||||
Security risk: enable this option only if a reverse proxy in front of
|
||||
thanos is resetting the header.
|
||||
|
||||
The header <literal>X-Forwarded-Prefix</literal> can be useful, for
|
||||
example, if Thanos UI is served via Traefik reverse proxy with
|
||||
<literal>PathPrefixStrip</literal> option enabled, which sends the
|
||||
stripped prefix value in <literal>X-Forwarded-Prefix</literal>
|
||||
header. This allows thanos UI to be served on a sub-path.
|
||||
'';
|
||||
|
||||
query.addresses = mkListParam "query" ''
|
||||
Addresses of statically configured query API servers.
|
||||
|
||||
The scheme may be prefixed with <literal>dns+</literal> or
|
||||
<literal>dnssrv+</literal> to detect query API servers through
|
||||
respective DNS lookups.
|
||||
'';
|
||||
|
||||
query.sd-files = mkListParam "query.sd-files" ''
|
||||
Path to file that contain addresses of query peers.
|
||||
The path can be a glob pattern.
|
||||
'';
|
||||
|
||||
query.sd-interval = mkParamDef types.str "5m" ''
|
||||
Refresh interval to re-read file SD files. (used as a fallback)
|
||||
'';
|
||||
|
||||
query.sd-dns-interval = mkParamDef types.str "30s" ''
|
||||
Interval between DNS resolutions.
|
||||
'';
|
||||
};
|
||||
|
||||
compact = params.log // params.gcloudtrace // params.objstore cfg.compact // {
|
||||
|
||||
http-address = mkParamDef types.str "0.0.0.0:10902" ''
|
||||
Listen <literal>host:port</literal> for HTTP endpoints.
|
||||
'';
|
||||
|
||||
stateDir = mkStateDirParam "data-dir" "thanos-compact" ''
|
||||
Data directory relative to <literal>/var/lib</literal>
|
||||
in which to cache blocks and process compactions.
|
||||
'';
|
||||
|
||||
consistency-delay = mkParamDef types.str "30m" ''
|
||||
Minimum age of fresh (non-compacted) blocks before they are being
|
||||
processed. Malformed blocks older than the maximum of consistency-delay
|
||||
and 30m0s will be removed.
|
||||
'';
|
||||
|
||||
retention.resolution-raw = mkParamDef types.str "0d" ''
|
||||
How long to retain raw samples in bucket.
|
||||
|
||||
<literal>0d</literal> - disables this retention
|
||||
'';
|
||||
|
||||
retention.resolution-5m = mkParamDef types.str "0d" ''
|
||||
How long to retain samples of resolution 1 (5 minutes) in bucket.
|
||||
|
||||
<literal>0d</literal> - disables this retention
|
||||
'';
|
||||
|
||||
retention.resolution-1h = mkParamDef types.str "0d" ''
|
||||
How long to retain samples of resolution 2 (1 hour) in bucket.
|
||||
|
||||
<literal>0d</literal> - disables this retention
|
||||
'';
|
||||
|
||||
startAt = {
|
||||
toArgs = _opt : startAt : flagToArgs "wait" (startAt == null);
|
||||
option = nullOpt types.str ''
|
||||
When this option is set to a <literal>systemd.time</literal>
|
||||
specification the Thanos compactor will run at the specified period.
|
||||
|
||||
When this option is <literal>null</literal> the Thanos compactor service
|
||||
will run continuously. So it will not exit after all compactions have
|
||||
been processed but wait for new work.
|
||||
'';
|
||||
};
|
||||
|
||||
block-sync-concurrency = mkParamDef types.int 20 ''
|
||||
Number of goroutines to use when syncing block metadata from object storage.
|
||||
'';
|
||||
|
||||
compact.concurrency = mkParamDef types.int 1 ''
|
||||
Number of goroutines to use when compacting groups.
|
||||
'';
|
||||
};
|
||||
|
||||
downsample = params.log // params.gcloudtrace // params.objstore cfg.downsample // {
|
||||
|
||||
stateDir = mkStateDirParam "data-dir" "thanos-downsample" ''
|
||||
Data directory relative to <literal>/var/lib</literal>
|
||||
in which to cache blocks and process downsamplings.
|
||||
'';
|
||||
|
||||
};
|
||||
|
||||
receive = params.common // params.objstore cfg.receive // {
|
||||
|
||||
remote-write.address = mkParamDef types.str "0.0.0.0:19291" ''
|
||||
Address to listen on for remote write requests.
|
||||
'';
|
||||
|
||||
stateDir = mkStateDirParam "tsdb.path" "thanos-receive" ''
|
||||
Data directory relative to <literal>/var/lib</literal> of TSDB.
|
||||
'';
|
||||
|
||||
labels = mkAttrsParam "labels" ''
|
||||
External labels to announce.
|
||||
|
||||
This flag will be removed in the future when handling multiple tsdb
|
||||
instances is added.
|
||||
'';
|
||||
|
||||
tsdb.retention = mkParamDef types.str "15d" ''
|
||||
How long to retain raw samples on local storage.
|
||||
|
||||
<literal>0d</literal> - disables this retention
|
||||
'';
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
assertRelativeStateDir = cmd : {
|
||||
assertions = [
|
||||
{
|
||||
assertion = !hasPrefix "/" cfg."${cmd}".stateDir;
|
||||
message =
|
||||
"The option services.thanos.${cmd}.stateDir should not be an absolute directory." +
|
||||
" It should be a directory relative to /var/lib.";
|
||||
}
|
||||
];
|
||||
};
|
||||
|
||||
in {
|
||||
|
||||
options.services.thanos = {
|
||||
|
||||
package = mkOption {
|
||||
type = types.package;
|
||||
default = pkgs.thanos;
|
||||
defaultText = "pkgs.thanos";
|
||||
description = ''
|
||||
The thanos package that should be used.
|
||||
'';
|
||||
};
|
||||
|
||||
sidecar = paramsToOptions params.sidecar // {
|
||||
enable = mkEnableOption
|
||||
"the Thanos sidecar for Prometheus server";
|
||||
};
|
||||
|
||||
store = paramsToOptions params.store // {
|
||||
enable = mkEnableOption
|
||||
"the Thanos store node giving access to blocks in a bucket provider.";
|
||||
};
|
||||
|
||||
query = paramsToOptions params.query // {
|
||||
enable = mkEnableOption
|
||||
("the Thanos query node exposing PromQL enabled Query API " +
|
||||
"with data retrieved from multiple store nodes");
|
||||
};
|
||||
|
||||
rule = paramsToOptions params.rule // {
|
||||
enable = mkEnableOption
|
||||
("the Thanos ruler service which evaluates Prometheus rules against" +
|
||||
" given Query nodes, exposing Store API and storing old blocks in bucket");
|
||||
};
|
||||
|
||||
compact = paramsToOptions params.compact // {
|
||||
enable = mkEnableOption
|
||||
"the Thanos compactor which continuously compacts blocks in an object store bucket";
|
||||
};
|
||||
|
||||
downsample = paramsToOptions params.downsample // {
|
||||
enable = mkEnableOption
|
||||
"the Thanos downsampler which continuously downsamples blocks in an object store bucket";
|
||||
};
|
||||
|
||||
receive = paramsToOptions params.receive // {
|
||||
enable = mkEnableOption
|
||||
("the Thanos receiver which accept Prometheus remote write API requests " +
|
||||
"and write to local tsdb (EXPERIMENTAL, this may change drastically without notice)");
|
||||
};
|
||||
};
|
||||
|
||||
config = mkMerge [
|
||||
|
||||
(mkIf cfg.sidecar.enable {
|
||||
systemd.services.thanos-sidecar = {
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
after = [ "network.target" "prometheus2.service" ];
|
||||
serviceConfig = {
|
||||
User = "prometheus";
|
||||
Restart = "always";
|
||||
ExecStart = thanos "sidecar";
|
||||
};
|
||||
};
|
||||
})
|
||||
|
||||
(mkIf cfg.store.enable (mkMerge [
|
||||
(assertRelativeStateDir "store")
|
||||
{
|
||||
systemd.services.thanos-store = {
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
after = [ "network.target" ];
|
||||
serviceConfig = {
|
||||
DynamicUser = true;
|
||||
StateDirectory = cfg.store.stateDir;
|
||||
Restart = "always";
|
||||
ExecStart = thanos "store";
|
||||
};
|
||||
};
|
||||
}
|
||||
]))
|
||||
|
||||
(mkIf cfg.query.enable {
|
||||
systemd.services.thanos-query = {
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
after = [ "network.target" ];
|
||||
serviceConfig = {
|
||||
DynamicUser = true;
|
||||
Restart = "always";
|
||||
ExecStart = thanos "query";
|
||||
};
|
||||
};
|
||||
})
|
||||
|
||||
(mkIf cfg.rule.enable (mkMerge [
|
||||
(assertRelativeStateDir "rule")
|
||||
{
|
||||
systemd.services.thanos-rule = {
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
after = [ "network.target" ];
|
||||
serviceConfig = {
|
||||
DynamicUser = true;
|
||||
StateDirectory = cfg.rule.stateDir;
|
||||
Restart = "always";
|
||||
ExecStart = thanos "rule";
|
||||
};
|
||||
};
|
||||
}
|
||||
]))
|
||||
|
||||
(mkIf cfg.compact.enable (mkMerge [
|
||||
(assertRelativeStateDir "compact")
|
||||
{
|
||||
systemd.services.thanos-compact =
|
||||
let wait = cfg.compact.startAt == null; in {
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
after = [ "network.target" ];
|
||||
serviceConfig = {
|
||||
Type = if wait then "simple" else "oneshot";
|
||||
Restart = if wait then "always" else "no";
|
||||
DynamicUser = true;
|
||||
StateDirectory = cfg.compact.stateDir;
|
||||
ExecStart = thanos "compact";
|
||||
};
|
||||
} // optionalAttrs (!wait) { inherit (cfg.compact) startAt; };
|
||||
}
|
||||
]))
|
||||
|
||||
(mkIf cfg.downsample.enable (mkMerge [
|
||||
(assertRelativeStateDir "downsample")
|
||||
{
|
||||
systemd.services.thanos-downsample = {
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
after = [ "network.target" ];
|
||||
serviceConfig = {
|
||||
DynamicUser = true;
|
||||
StateDirectory = cfg.downsample.stateDir;
|
||||
Restart = "always";
|
||||
ExecStart = thanos "downsample";
|
||||
};
|
||||
};
|
||||
}
|
||||
]))
|
||||
|
||||
(mkIf cfg.receive.enable (mkMerge [
|
||||
(assertRelativeStateDir "receive")
|
||||
{
|
||||
systemd.services.thanos-receive = {
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
after = [ "network.target" ];
|
||||
serviceConfig = {
|
||||
DynamicUser = true;
|
||||
StateDirectory = cfg.receive.stateDir;
|
||||
Restart = "always";
|
||||
ExecStart = thanos "receive";
|
||||
};
|
||||
};
|
||||
}
|
||||
]))
|
||||
|
||||
];
|
||||
}
|
@ -1,9 +1,44 @@
|
||||
import ./make-test.nix {
|
||||
let
|
||||
grpcPort = 19090;
|
||||
queryPort = 9090;
|
||||
minioPort = 9000;
|
||||
pushgwPort = 9091;
|
||||
|
||||
s3 = {
|
||||
accessKey = "BKIKJAA5BMMU2RHO6IBB";
|
||||
secretKey = "V7f1CwQqAcwo80UEIJEjc5gVQUSSx5ohQ9GSrr12";
|
||||
};
|
||||
|
||||
objstore.config = {
|
||||
type = "S3";
|
||||
config = {
|
||||
bucket = "thanos-bucket";
|
||||
endpoint = "s3:${toString minioPort}";
|
||||
region = "us-east-1";
|
||||
access_key = s3.accessKey;
|
||||
secret_key = s3.secretKey;
|
||||
insecure = true;
|
||||
signature_version2 = false;
|
||||
encrypt_sse = false;
|
||||
put_user_metadata = {};
|
||||
http_config = {
|
||||
idle_conn_timeout = "0s";
|
||||
insecure_skip_verify = false;
|
||||
};
|
||||
trace = {
|
||||
enable = false;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
in import ./make-test.nix {
|
||||
name = "prometheus-2";
|
||||
|
||||
nodes = {
|
||||
one = { pkgs, ... }: {
|
||||
prometheus = { pkgs, ... }: {
|
||||
virtualisation.diskSize = 2 * 1024;
|
||||
environment.systemPackages = [ pkgs.jq ];
|
||||
networking.firewall.allowedTCPPorts = [ grpcPort ];
|
||||
services.prometheus2 = {
|
||||
enable = true;
|
||||
scrapeConfigs = [
|
||||
@ -11,7 +46,7 @@ import ./make-test.nix {
|
||||
job_name = "prometheus";
|
||||
static_configs = [
|
||||
{
|
||||
targets = [ "127.0.0.1:9090" ];
|
||||
targets = [ "127.0.0.1:${toString queryPort}" ];
|
||||
labels = { instance = "localhost"; };
|
||||
}
|
||||
];
|
||||
@ -21,7 +56,7 @@ import ./make-test.nix {
|
||||
scrape_interval = "1s";
|
||||
static_configs = [
|
||||
{
|
||||
targets = [ "127.0.0.1:9091" ];
|
||||
targets = [ "127.0.0.1:${toString pushgwPort}" ];
|
||||
}
|
||||
];
|
||||
}
|
||||
@ -35,33 +70,169 @@ import ./make-test.nix {
|
||||
expr: count(up{job="prometheus"})
|
||||
''
|
||||
];
|
||||
globalConfig = {
|
||||
external_labels = {
|
||||
some_label = "required by thanos";
|
||||
};
|
||||
};
|
||||
extraFlags = [
|
||||
# Required by thanos
|
||||
"--storage.tsdb.min-block-duration=5s"
|
||||
"--storage.tsdb.max-block-duration=5s"
|
||||
];
|
||||
};
|
||||
services.prometheus.pushgateway = {
|
||||
enable = true;
|
||||
web.listen-address = ":${toString pushgwPort}";
|
||||
persistMetrics = true;
|
||||
persistence.interval = "1s";
|
||||
stateDir = "prometheus-pushgateway";
|
||||
};
|
||||
services.thanos = {
|
||||
sidecar = {
|
||||
enable = true;
|
||||
grpc-address = "0.0.0.0:${toString grpcPort}";
|
||||
inherit objstore;
|
||||
};
|
||||
|
||||
# TODO: Add some tests for these services:
|
||||
#rule = {
|
||||
# enable = true;
|
||||
# http-address = "0.0.0.0:19194";
|
||||
# grpc-address = "0.0.0.0:19193";
|
||||
# query.addresses = [
|
||||
# "localhost:19191"
|
||||
# ];
|
||||
# labels = {
|
||||
# just = "some";
|
||||
# nice = "labels";
|
||||
# };
|
||||
#};
|
||||
#
|
||||
#receive = {
|
||||
# http-address = "0.0.0.0:19195";
|
||||
# enable = true;
|
||||
# labels = {
|
||||
# just = "some";
|
||||
# nice = "labels";
|
||||
# };
|
||||
#};
|
||||
};
|
||||
};
|
||||
|
||||
query = { pkgs, ... }: {
|
||||
environment.systemPackages = [ pkgs.jq ];
|
||||
services.thanos.query = {
|
||||
enable = true;
|
||||
http-address = "0.0.0.0:${toString queryPort}";
|
||||
store.addresses = [
|
||||
"prometheus:${toString grpcPort}"
|
||||
];
|
||||
};
|
||||
};
|
||||
|
||||
store = { pkgs, ... }: {
|
||||
environment.systemPackages = with pkgs; [ jq thanos ];
|
||||
services.thanos.store = {
|
||||
enable = true;
|
||||
http-address = "0.0.0.0:10902";
|
||||
grpc-address = "0.0.0.0:${toString grpcPort}";
|
||||
inherit objstore;
|
||||
sync-block-duration = "1s";
|
||||
};
|
||||
services.thanos.compact = {
|
||||
enable = true;
|
||||
http-address = "0.0.0.0:10903";
|
||||
inherit objstore;
|
||||
consistency-delay = "5s";
|
||||
};
|
||||
services.thanos.query = {
|
||||
enable = true;
|
||||
http-address = "0.0.0.0:${toString queryPort}";
|
||||
store.addresses = [
|
||||
"localhost:${toString grpcPort}"
|
||||
];
|
||||
};
|
||||
};
|
||||
|
||||
s3 = { pkgs, ... } : {
|
||||
# Minio requires at least 1GiB of free disk space to run.
|
||||
virtualisation.diskSize = 2 * 1024;
|
||||
networking.firewall.allowedTCPPorts = [ minioPort ];
|
||||
|
||||
services.minio = {
|
||||
enable = true;
|
||||
inherit (s3) accessKey secretKey;
|
||||
};
|
||||
|
||||
environment.systemPackages = [ pkgs.minio-client ];
|
||||
};
|
||||
};
|
||||
|
||||
testScript = ''
|
||||
startAll;
|
||||
$one->waitForUnit("prometheus2.service");
|
||||
$one->waitForOpenPort(9090);
|
||||
$one->succeed("curl -s http://127.0.0.1:9090/metrics");
|
||||
testScript = { nodes, ... } : ''
|
||||
# Before starting the other machines we first make sure that our S3 service is online
|
||||
# and has a bucket added for thanos:
|
||||
$s3->start;
|
||||
$s3->waitForUnit("minio.service");
|
||||
$s3->waitForOpenPort(${toString minioPort});
|
||||
$s3->succeed(
|
||||
"mc config host add minio " .
|
||||
"http://localhost:${toString minioPort} ${s3.accessKey} ${s3.secretKey} S3v4");
|
||||
$s3->succeed("mc mb minio/thanos-bucket");
|
||||
|
||||
# Let's test if pushing a metric to the pushgateway succeeds
|
||||
# and whether that metric gets ingested by prometheus.
|
||||
$one->waitForUnit("pushgateway.service");
|
||||
$one->succeed(
|
||||
# Now that s3 has started we can start the other machines:
|
||||
$prometheus->start;
|
||||
$query->start;
|
||||
$store->start;
|
||||
|
||||
# Check if prometheus responds to requests:
|
||||
$prometheus->waitForUnit("prometheus2.service");
|
||||
$prometheus->waitForOpenPort(${toString queryPort});
|
||||
$prometheus->succeed("curl -s http://127.0.0.1:${toString queryPort}/metrics");
|
||||
|
||||
# Let's test if pushing a metric to the pushgateway succeeds:
|
||||
$prometheus->waitForUnit("pushgateway.service");
|
||||
$prometheus->succeed(
|
||||
"echo 'some_metric 3.14' | " .
|
||||
"curl --data-binary \@- http://127.0.0.1:9091/metrics/job/some_job");
|
||||
$one->waitUntilSucceeds(
|
||||
"curl -sf 'http://127.0.0.1:9090/api/v1/query?query=some_metric' " .
|
||||
"| jq '.data.result[0].value[1]' | grep '\"3.14\"'");
|
||||
"curl --data-binary \@- http://127.0.0.1:${toString pushgwPort}/metrics/job/some_job");
|
||||
|
||||
# Now check whether that metric gets ingested by prometheus.
|
||||
# Since we'll check for the metric several times on different machines
|
||||
# we abstract the test using the following function:
|
||||
|
||||
# Function to check if the metric "some_metric" has been received and returns the correct value.
|
||||
local *Machine::waitForMetric = sub {
|
||||
my ($self) = @_;
|
||||
$self->waitUntilSucceeds(
|
||||
"curl -sf 'http://127.0.0.1:${toString queryPort}/api/v1/query?query=some_metric' " .
|
||||
"| jq '.data.result[0].value[1]' | grep '\"3.14\"'");
|
||||
};
|
||||
|
||||
$prometheus->waitForMetric;
|
||||
|
||||
# Let's test if the pushgateway persists metrics to the configured location.
|
||||
$one->waitUntilSucceeds("test -e /var/lib/prometheus-pushgateway/metrics");
|
||||
$prometheus->waitUntilSucceeds("test -e /var/lib/prometheus-pushgateway/metrics");
|
||||
|
||||
# Test thanos
|
||||
$prometheus->waitForUnit("thanos-sidecar.service");
|
||||
|
||||
# Test if the Thanos query service can correctly retrieve the metric that was send above.
|
||||
$query->waitForUnit("thanos-query.service");
|
||||
$query->waitForMetric;
|
||||
|
||||
# Test if the Thanos sidecar has correctly uploaded its TSDB to S3, if the
|
||||
# Thanos storage service has correctly downloaded it from S3 and if the Thanos
|
||||
# query service running on $store can correctly retrieve the metric:
|
||||
$store->waitForUnit("thanos-store.service");
|
||||
$store->waitForMetric;
|
||||
|
||||
$store->waitForUnit("thanos-compact.service");
|
||||
|
||||
# Test if the Thanos bucket command is able to retrieve blocks from the S3 bucket
|
||||
# and check if the blocks have the correct labels:
|
||||
$store->succeed(
|
||||
"thanos bucket ls" .
|
||||
" --objstore.config-file=${nodes.store.config.services.thanos.store.objstore.config-file}" .
|
||||
" --output=json | jq .thanos.labels.some_label | grep 'required by thanos'");
|
||||
'';
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user