import math def compute_fec_params(k, m, n): """ Returns the FEC encoding parameters k_e and m_e, where k_e is the number of shares required to reconstruct the file, and m_e is the number of shares to be deployed to the servers. 'k' is the numer of servers that must be available to reconstruct the file, 'm' is the maximum number of servers to which shares should be deployed, and 'n' is the number of servers available. In the nominal case, n >= k, and in that case this function returns parameters that ensure each server gets at least one share (to maximize parallelism available for retrieval), that any k-server subset of n has sufficient shares to reconstruct the file and that FEC expansion is minimal, consistent with the retrieval performance and availability goals. If n < k, this function ensures that enough shares are deployed that the file can be retrieved, providing as much redundancy as possible, but without much more FEC expansion than is allowed by the user's choice of k and m. """ assert k <= m assert n <= m if n <= k: return k, min(k * n, k * int(math.ceil(float(m) / k))) if n % k == 0: return n, n * n / k else: return n, n - k + 1 + n * (n // k) def compute_p_failure(k_e, m_e, n, p): """ Compute the probability that the file is lost given that 'm_e' shares are deployed to 'n' servers, that 'k_e' shares are required for recovery and that any given server fails with probability 'p' """ # Allocate shares to servers. share_lst = [ m_e // n ] * n m_e -= m_e // n * n for i in range(0, n): if m_e == 0: break share_lst[i] += 1 m_e -= 1 # Construct per-server share survival probability mass functions server_pmfs = [] for shares in share_lst: server_pmf = [ p ] + [ 0 ] * (shares - 1) + [ 1 - p ] server_pmfs.append(server_pmf) # Convolve per-server PMFs to produce aggregate share survival PMF pmf = reduce(convolve, server_pmfs) # Probability of file survival is the probability that at least # k_e shares survive. return 1 - sum(pmf[k_e:]) def convolve(list_a, list_b): """ Returns the discrete convolution of two lists. Given two random variables X and Y, the convolution of their probability mass functions Pr(X) and Pr(Y) is equal to the Pr(X+Y). """ n = len(list_a) m = len(list_b) result = [] for i in range(n + m - 1): sum = 0.0 lower = max(0, i - n + 1) upper = min(m - 1, i) for j in range(lower, upper+1): sum += list_a[i-j] * list_b[j] result.append(sum) return result if __name__ == "__main__": for n in range(1, 11): k_e, m_e = compute_fec_params(3, 10, n) p_fail = compute_p_failure(k_e, m_e, n, 0.001) s = "%2d server(s), %2d shares, %2d needed, %2.2f expansion, %0.2e p_fail" print s % (n, m_e, k_e, float(m_e) / float(k_e), p_fail)