storj/satellite/repair/priority.go

// Copyright (C) 2020 Storj Labs, Inc.
// See LICENSE for copying information.

package repair

import (
	"math"
)

// SegmentHealth returns a value corresponding to the health of a segment
// in the repair queue. Lower health segments should be repaired first.
func SegmentHealth(numHealthy, minPieces int, failureRate float64) float64 {
	return 1.0 / SegmentDanger(numHealthy, minPieces, failureRate)
}

// SegmentDanger returns the chance of a segment with the given minPieces
// and the given number of healthy pieces of being lost in the next time
// period.
//
// It assumes:
//
// * Nodes fail at the given failureRate (i.e., each node has a failureRate
//   chance of going offline within the next time period).
// * Node failures are entirely independent. Obviously this is not the case,
//   because many nodes may be operated by a single entity or share network
//   infrastructure, in which case their failures would be correlated. But we
//   can't easily model that, so our best hope is to try to avoid putting
//   pieces for the same segment on related nodes to maximize failure
//   independence.
//
// (The "time period" we are talking about here could be anything. The returned
// danger value will be given in terms of whatever time period was used to
// determine failureRate. If it simplifies things, you can think of the time
// period as "one repair worker iteration".)
//
// If those things are true, then the number of nodes holding this segment
// that will go offline follows the Binomial distribution:
//
//     X ~ Binom(numHealthy, failureRate)
//
// A segment is lost if the number of nodes that go offline is higher than
// (numHealthy - minPieces). So we want to find
//
//     Pr[X > (numHealthy - minPieces)]
//
// If we invert the logic here, we can use the standard CDF for the binomial
// distribution.
//
//     Pr[X > (numHealthy - minPieces)] = 1 - Pr[X <= (numHealthy - minPieces)]
//
// And that gives us the danger value.
func SegmentDanger(numHealthy, minPieces int, failureRate float64) float64 {
	return 1.0 - binomialCDF(float64(numHealthy-minPieces), float64(numHealthy), failureRate)
}

// math.Lgamma without the returned sign parameter; it's unneeded here.
func lnGamma(x float64) float64 {
	lg, _ := math.Lgamma(x)
	return lg
}

// The following functions are based on code from
// Numerical Recipes in C, Second Edition, Section 6.4 (pp. 227-228).

// betaI calculates the incomplete beta function I_x(a, b).
func betaI(a, b, x float64) float64 {
	if x < 0.0 || x > 1.0 {
		return math.NaN()
	}
	bt := 0.0
	if x > 0.0 && x < 1.0 {
		// factors in front of the continued function
		bt = math.Exp(lnGamma(a+b) - lnGamma(a) - lnGamma(b) + a*math.Log(x) + b*math.Log(1.0-x))
	}
	if x < (a+1.0)/(a+b+2.0) {
		// use continued fraction directly
		return bt * betaCF(a, b, x) / a
	}
	// use continued fraction after making the symmetry transformation
	return 1.0 - bt*betaCF(b, a, 1.0-x)/b
}

const (
	// unlikely to go this far, as betaCF is expected to converge quickly for
	// typical values.
	maxIter = 100

	// betaI outputs will be accurate to within this amount.
	epsilon = 1.0e-14
)

// betaCF evaluates the continued fraction for the incomplete beta function
// by a modified Lentz's method.
func betaCF(a, b, x float64) float64 {
	avoidZero := func(f float64) float64 {
		if math.Abs(f) < math.SmallestNonzeroFloat64 {
			return math.SmallestNonzeroFloat64
		}
		return f
	}

	qab := a + b
	qap := a + 1.0
	qam := a - 1.0
	c := 1.0
	d := 1.0 / avoidZero(1.0-qab*x/qap)
	h := d

	for m := 1; m <= maxIter; m++ {
		m := float64(m)
		m2 := 2.0 * m
		aa := m * (b - m) * x / ((qam + m2) * (a + m2))
		// one step (the even one) of the recurrence
		d = 1.0 / avoidZero(1.0+aa*d)
		c = avoidZero(1.0 + aa/c)
		h *= d * c
		aa = -(a + m) * (qab + m) * x / ((a + m2) * (qap + m2))
		// next step of the recurrence (the odd one)
		d = 1.0 / avoidZero(1.0+aa*d)
		c = avoidZero(1.0 + aa/c)
		del := d * c
		h *= del
		if math.Abs(del-1.0) < epsilon {
			return h
		}
	}
	// a or b too big, or maxIter too small
	return math.NaN()
}

// binomialCDF evaluates the CDF of the binomial distribution Binom(n, p) at k.
// This is done using (1-p)**(n-k) when k is 0, or with the incomplete beta
// function otherwise.
func binomialCDF(k, n, p float64) float64 {
	k = math.Floor(k)
	if k < 0.0 || n < k {
		return math.NaN()
	}
	if k == n {
		return 1.0
	}
	if k == 0 {
		return math.Pow(1.0-p, n-k)
	}
	return betaI(n-k, k+1.0, 1.0-p)
}