bees: init at 0.6.1

Introduce an extent-layer (as opposed to the existing file-level) deduplication
system for btrfs. This provides a means of finding similarities within
non-identical files, when they contain identical, aligned blocks.
This commit is contained in:
Charles Duffy 2018-10-14 00:40:37 -05:00
parent 6845ebbff1
commit 7df55477fd
No known key found for this signature in database
GPG Key ID: BB397BA4CAA75978
3 changed files with 294 additions and 0 deletions

View File

@ -0,0 +1,223 @@
#!@bash@/bin/bash
PATH=@bash@/bin:@coreutils@/bin:@utillinux@/bin:@btrfsProgs@/bin:$PATH
beesd_bin=@bees@/lib/bees/bees
# PLEASE KEEP NIX-ISMS ABOVE THIS LINE TO EASE UPSTREAM MERGE
#!/usr/bin/env bash
shopt -s extglob
# Upstream wrapper requires UUID to be used for configuration.
# However, when declaratively describing a host, we may not know its UUID, and
# shouldn't need to persist something that will differ between hosts built from
# the same configuration template.
# Thus, for using bees from NixOS, we have our own wrapper, which supports not
# just UUID but any specification permitted by findmnt
[[ $bees_debug ]] && { PS4=':${BASH_SOURCE##*/}:$LINENO+'; set -x; }
usage() {
cat >&2 <<EOF
Usage: ${BASH_SOURCE##*/} run|cleanup config-name|fsSpec [idxSizeMB=...] [verbosity=...] [workDir=...] [-- daemon-options...]
fsSpec should be in a format recognized by findmnt. Alternately,
"config-name" may refer to a file that exists in ${bees_config_dir:-/etc/bees}
with a .conf extension; if that file does not specify UUID, findmnt will be
used in addition.
Note that while config files may presently use shell arithmetic, use of this
functionality is not encouraged going forward: Setting ''idxSizeMB=4096'' is
preferred over ''DB_SIZE=$((1024*1024*1024*4))'' or ''DB_SIZE=$(( AL16M * 256 ))'',
although both of these are presently supported.
If fsSpec contains a /, it assumed to be a mount point to be looked up by
findmnt, not a config file name.
daemon-options are passed directly through to the daemon on startup, as
documented at https://github.com/Zygo/bees/blob/master/docs/options.md.
EOF
exit 1
}
die() { echo "$*" >&2; exit 1; }
allConfigNames=( blockdev fsSpec home idxSize idxSizeMB mntDir runDir status verbosity workDir )
# Alternate names for configuration values; "bees_" will always be prepended
declare -A altConfigNames=(
# from original bees wrapper
[BEESHOME]=home
[BEESSTATUS]=status
[MNT_DIR]=mntDir
[UUID]=uuid
[WORK_DIR]=runDir
[DB_SIZE]=idxSize
)
# legacy bees config files can be arbitrary shell scripts, so we need to actually evaluate them
sandboxedConfigFileEval() {
bash_exe=$(type -P bash) || exit
PATH=/var/empty ENV='' BASH_ENV='' AL128K="$((128*1024))" AL16M="$((16*1024*1024))" "$bash_exe" -r ${bees_debug+-x} \
-c 'eval "$(</dev/stdin)" >&2; for var; do [[ ${!var} ]] && printf "%q=%s\\0" "$var" "${!var}"; done' \
"${!altConfigNames[@]}" "${allConfigNames[@]}" \
<"$1"
}
readConfigFileIfExists() {
local line
[[ -s $1 ]] || return 1
while IFS= read -r -d '' line; do
line=${line%%+([[:space:]])"#"*}
[[ $line ]] || continue
[[ $line = *=* ]] || {
printf 'WARNING: Config file line not recognized: %q\n' "$line" >&2
continue
}
set_option "$line"
done < <(sandboxedConfigFileEval "$1")
}
set_option() {
local k v
k="${1%%=*}" v="${1#*=}"
[[ ${altConfigNames[$k]} ]] && k=${altConfigNames[$k]}
printf -v "bees_$k" %s "$v"
}
uuid_re='^[[:xdigit:]]{8}-[[:xdigit:]]{4}-[[:xdigit:]]{4}-[[:xdigit:]]{4}-[[:xdigit:]]{12}$'
# Shared code for setting configuration used by other operations.
#
# Reads from global associative array "opts" containing options passed in as
# key=value pairs on the command line, looks for config-file overrides, and
# sets individual global variables.
_setup() {
declare fstype
bees_fsSpec=$1; shift
# Look for file-based configuration, additional to honoring configuration on the command line
bees_config_dir="${bees_config_dir:-/etc/bees}"
if [[ $bees_fsSpec =~ $uuid_re ]]; then
bees_uuid=$bees_fsSpec
# If our spec looks like a bare UUID, and no config file exists in the new
# format, fall back to legacy config file search mechanism (grep; ewww).
if ! readConfigFileIfExists "$bees_config_dir/UUID=$bees_fsSpec.conf"; then
# Legacy approach to finding a config file: Grep for a *.conf file
# containing the UUID within its text. Permitting spaces around the "="
# appears to be a bug, but is retained for compatibility with the
# original upstream script.
allConfFiles=( "$bees_config_dir"/*.conf )
if (( ${#allConfFiles[@]} )); then
# in read or readarray with -d '', the NUL terminating the empty string is used as delimiter character.
readarray -d '' -t matchingConfFiles < <(grep -E -l -Z "^[^#]*UUID[[:space:]]*=[[:space:]]*" "${allConfFiles[@]}")
else
matchingConfFiles=( )
fi
if (( ${#matchingConfFiles[@]} == 1 )); then
# Exactly one configuration file exists in our target directory with a reference to the UUID given.
bees_config_file=${matchingConfFiles[0]}
readConfigFileIfExists "$bees_config_file"
echo "NOTE: Please consider renaming $bees_config_file to $bees_config_dir/UUID=$bees_fsSpec" >&2
echo " ...and passing UUID=$bees_fsSpec on startup." >&2
elif (( ${#matchingConfFiles[@]} > 1 )); then
# The legacy wrapper would silently use the first file and ignore
# others, but... no.
echo "ERROR: Passed a bare UUID, but multiple configuration files match it:" >&2
printf ' - %q\n' "${matchingConfFiles[@]}" >&2
die "Unable to continue."
fi
fi
else
# For a non-UUID fsSpec that is not a path, look only for a config file
# exactly matching its text.
#
# (Passing a mount point as a fsSpec is only supported with the new
# wrapper; all key=value pairs can be passed on the command line in this
# mode, so config file support is not needed).
[[ $bees_fsSpec = */* ]] || readConfigFileIfExists "$bees_config_dir/$bees_fsSpec.conf"
fi
[[ $bees_uuid ]] || {
# if bees_uuid is not in our .conf file, look it up with findmnt
read -r bees_uuid fstype < <(findmnt -n -o uuid,fstype "$bees_fsSpec") && [[ $fstype ]] || exit
[[ $fstype = btrfs ]] || die "Device type is $fstype, not btrfs"
}
[[ $bees_uuid = */* ]] || readConfigFileIfExists "$bees_config_dir/UUID=$bees_uuid.conf"
# Honor any values read from config files above; otherwise, set defaults.
bees_workDir="${bees_workDir:-.beeshome}"
bees_runDir="${bees_runDir:-/run/bees}"
bees_mntDir="${bees_mntDir:-$bees_runDir/mnt/$bees_uuid}"
bees_home="${bees_home:-$bees_mntDir/$bees_workDir}"
bees_status="${bees_status:-${bees_runDir}/$bees_uuid.status}"
bees_verbosity="${bees_verbosity:-6}"
bees_idxSizeMB="${bees_idxSizeMB:-1024}"
bees_idxSize=${bees_idxSize:-"$(( bees_idxSizeMB * 1024 * 1024 ))"}
bees_blockdev=${bees_blockdev:-"/dev/disk/by-uuid/$bees_uuid"}
[[ -b $bees_blockdev ]] || die "Block device $bees_blockdev missing"
(( bees_idxSize % (16 * 1024 * 1024) == 0 )) || die "DB size must be divisible by 16MB"
}
do_run() {
local db old_db_size
_setup "$1"; shift
mkdir -p -- "$bees_mntDir" || exit
# subvol id 5 is reserved for the root subvolume of a btrfs filesystem.
mountpoint -q "$bees_mntDir" || mount -osubvolid=5 -- "$bees_blockdev" "$bees_mntDir" || exit
if [[ -d $bees_home ]]; then
btrfs subvolume show "$bees_home" >/dev/null 2>&1 || die "$bees_home exists but is not a subvolume"
else
btrfs subvolume create "$bees_home" || exit
sync # workaround for Zygo/bees#93
fi
db=$bees_home/beeshash.dat
touch -- "$db"
old_db_size=$(stat -c %s -- "$db")
new_db_size=$bees_idxSize
if (( old_db_size != new_db_size )); then
rm -f -- "$bees_home"/beescrawl."$bees_uuid".dat
truncate -s "$new_db_size" -- "$db" || exit
fi
chmod 700 -- "$bees_home"
# BEESSTATUS and BEESHOME are the only variables handled by the legacy
# wrapper for which getenv() is called in C code.
BEESSTATUS=$bees_status BEESHOME=$bees_home exec "${beesd_bin:-/lib/bees/bees}" \
--verbose "$bees_verbosity" \
"$@" "$bees_mntDir" || exit
}
do_cleanup() {
_setup "$1"; shift
mountpoint -q "$bees_mntDir" && umount -l -- "$bees_mntDir" || exit
}
(( $# >= 2 )) || usage
declare -f "do_$1" >/dev/null 2>&1 || usage
mode=$1; shift # must be a do_* function; currently "run" or "cleanup"
declare -a args=( "$1" ); shift # pass first argument (config-name|fsSpec) through literally
# parse other arguments as key=value pairs, or pass them through literally if they do not match that form.
# similarly, any option after "--" will be passed through literally.
while (( $# )); do
if [[ $1 = *=* ]]; then
set_option "$1"
elif [[ $1 = -- ]]; then
shift
args+=( "$@" )
break
else
args+=( "$1" )
fi
shift
done
"do_$mode" "${args[@]}"

View File

@ -0,0 +1,69 @@
{ stdenv, runCommand, makeWrapper, fetchFromGitHub, bash, btrfs-progs, coreutils, pythonPackages, utillinux }:
let
version = "0.6.1";
sha256 = "0h7idclmhyp14mq6786x7f2237vqpn70gyi88ik4g70xl84yfgyh";
bees = stdenv.mkDerivation rec {
name = "bees-${version}";
inherit version;
src = fetchFromGitHub {
owner = "Zygo";
repo = "bees";
rev = "v${version}";
inherit sha256;
};
buildInputs = [
btrfs-progs # for btrfs/ioctl.h
utillinux # for uuid.h
];
nativeBuildInputs = [
pythonPackages.markdown # documentation build
];
preBuild = ''
git() { if [[ $1 = describe ]]; then echo ${version}; else command git "$@"; fi; }
export -f git
'';
postBuild = ''
unset -f git
'';
buildFlags = [
"ETC_PREFIX=/var/run/bees/configs"
];
makeFlags = [
"SHELL=bash"
"PREFIX=$(out)"
"ETC_PREFIX=$(out)/etc"
"BEES_VERSION=${version}"
"SYSTEMD_SYSTEM_UNIT_DIR=$(out)/etc/systemd/system"
];
meta = with stdenv.lib; {
homepage = "https://github.com/Zygo/bees";
description = "Block-oriented BTRFS deduplication service";
license = licenses.gpl3;
platforms = platforms.linux;
maintainers = with maintainers; [ chaduffy ];
longDescription = "Best-Effort Extent-Same: bees finds not just identical files, but also identical extents within files that differ";
};
};
in
runCommand "bees-service-${version}" {
inherit bash bees coreutils utillinux;
btrfsProgs = btrfs-progs; # needs to be a valid shell variable name
} ''
mkdir -p -- "$out/bin"
substituteAll ${./bees-service-wrapper} "$out"/bin/bees-service-wrapper
chmod +x "$out"/bin/bees-service-wrapper
ln -s ${bees}/bin/beesd "$out"/bin/beesd
''

View File

@ -21893,6 +21893,8 @@ with pkgs;
beep = callPackage ../misc/beep { };
bees = callPackage ../tools/filesystems/bees { };
blackbird = callPackage ../misc/themes/blackbird { };
bootil = callPackage ../development/libraries/bootil { };