Commit dbe0e61cb7208c7b078c1bb65d86e5ff72199172 - python-pyeclib

Added a new tool to help choose the best EC scheme, given a set of constraints. Kevin Greenan 10 years ago

3 changed file(s) with 244 addition(s) and 11 deletion(s). Raw diff Collapse all Expand all

-2

src/c/pyeclib_c/pyeclib_c.c less more

80	80	case PYECC_RS_VAND:
81	81	default:
82	82	{
83		int max_symbols;
	83	long long max_symbols;
84	84
85	85	if (w != 8 && w != 16 && w != 32) {
86	86	return 0;
87	87	}
88		max_symbols = 1 << w;
	88	max_symbols = 1LL << w;
89	89	if ((k+m) > max_symbols) {
90	90	return 0;
91	91	}

+39

-9

src/python/pyeclib/core.py less more

24	24	import pyeclib_c
25	25	import math
26	26
	27	#
	28	# Generic ECPyECLibException
	29	#
	30	class ECPyECLibException(Exception):
	31	def __init__(self, error_str):
	32	self.error_str = error_str
	33	def __str__(self):
	34	return self.error_str
	35
27	36	class ECPyECLibDriver(object):
28	37	def __init__(self, k, m, type):
29	38	self.ec_rs_vand = "rs_vand"

36	45	self.ec_rs_cauchy_best_w = 4
37	46	self.k = k
38	47	self.m = m
	48
	49	#
	50	# Override the default wordsize (w) for Reed-Solomon, if specified
	51	#
	52	if type[:len(self.ec_rs_vand)] == self.ec_rs_vand and len(type) > len(self.ec_rs_vand):
	53	type_ary = type.split("_")
	54	if len(type_ary) != 3:
	55	raise ECPyECLibException("%s is not a valid EC type for PyECLib!" % type)
	56	self.ec_rs_vand_best_w = int(type_ary[2])
	57	type = self.ec_rs_vand
	58
	59	#
	60	# Override the default wordsize (w) for Cauchy, if specified
	61	#
	62	if type[:len(self.ec_rs_cauchy_orig)] == self.ec_rs_cauchy_orig and len(type) > len(self.ec_rs_cauchy_orig):
	63	type_ary = type.split("_")
	64	if len(type_ary) != 4:
	65	raise ECPyECLibException("%s is not a valid EC type for PyECLib!" % type)
	66	self.ec_rs_cauchy_best_w = int(type_ary[3])
	67	type = self.ec_rs_cauchy_orig
	68
39	69	if type in self.ec_types:
40	70	self.type = type
41	71	else:
42		raise ECDriverError("%s is not a valid EC type for PyECLib!")
	72	raise ECPyECLibException("%s is not a valid EC type for PyECLib!")
43	73
44	74	if self.type == self.ec_rs_vand:
45	75	self.w = self.ec_rs_vand_best_w

65	95	try:
66	96	ret_string = pyeclib_c.fragments_to_string(self.handle, fragment_payloads)
67	97	except:
68		raise ECDriverError("Error in ECPyECLibDriver.decode")
	98	raise ECPyECLibException("Error in ECPyECLibDriver.decode")
69	99
70	100	if ret_string is None:
71	101	(data_frags, parity_frags, missing_idxs) = pyeclib_c.get_fragment_partition(self.handle, fragment_payloads)

134	164	Stripe an arbitrary-sized string into k fragments
135	165	:param k: the number of data fragments to stripe
136	166	:param m: the number of parity fragments to stripe
137		:raises: ECDriverError if there is an error during encoding
	167	:raises: ECPyECLibException if there is an error during encoding
138	168	"""
139	169	self.k = k
140	170
141	171	if m != 0:
142		raise ECDriverError("This driver only supports m=0")
	172	raise ECPyECLibException("This driver only supports m=0")
143	173
144	174	self.m = m
145	175

148	178	Stripe an arbitrary-sized string into k fragments
149	179	:param bytes: the buffer to encode
150	180	:returns: a list of k buffers (data only)
151		:raises: ECDriverError if there is an error during encoding
	181	:raises: ECPyECLibException if there is an error during encoding
152	182	"""
153	183	# Main fragment size
154	184	fragment_size = math.ceil(len(bytes) / float(self.k))

171	201	Convert a k-fragment data stripe into a string
172	202	:param fragment_payloads: fragments (in order) to convert into a string
173	203	:returns: a string containing the original data
174		:raises: ECDriverError if there is an error during decoding
	204	:raises: ECPyECLibException if there is an error during decoding
175	205	"""
176	206
177	207	if len(fragment_payloads) != self.k:
178		raise ECDriverError("Decode requires %d fragments, %d fragments were given" % (len(fragment_payloads), self.k))
	208	raise ECPyECLibException("Decode requires %d fragments, %d fragments were given" % (len(fragment_payloads), self.k))
179	209
180	210	ret_string = ''
181	211

192	222	:param available_fragment_payloads: available fragments (in order)
193	223	:param missing_fragment_indexes: indexes of missing fragments
194	224	:returns: a string containing the original data
195		:raises: ECDriverError if there is an error during reconstruction
	225	:raises: ECPyECLibException if there is an error during reconstruction
196	226	"""
197	227	if len(available_fragment_payloads) != self.k:
198		raise ECDriverError("Reconstruction requires %d fragments, %d fragments were given" % (len(available_fragment_payloads), self.k))
	228	raise ECPyECLibException("Reconstruction requires %d fragments, %d fragments were given" % (len(available_fragment_payloads), self.k))
199	229
200	230	return available_fragment_payloads
201	231

+203

-0

tools/pyeclib_conf_tool.py less more

	0	# PyEClib Companion tool
	1	# Goal: When defining an EC pool, help cluster admin make an informed choice between available EC implementations. Generate sample swift.conf + swift-ring-builder hints.
	2	#
	3	# Suggested features:
	4	#
	5	# List the "EC types" - EC algorithms
	6	# List implementations of each EC type available on the platform (dumb-software-only, software with SIMD acceleration, specialized hardware, etc).
	7	# Benchmark each algorithm with possible implementation and display performance numbers.
	8	# Generate sample EC policy entry (for inclusion in swift.conf) for the best performing algorithm + implementation. (And optionally provide swift-ring-builder hints).
	9	# Suggested EC policy entry format:
	10	#
	11	# ======== swift.conf ============
	12	# [storage-policy:10]
	13	# type = erasure_coding
	14	# name = ec_rs_cauchy_orig_12_2
	15	# ec_type = rs_cauchy_orig
	16	# ec_k = 12
	17	# ec_m = 2
	18	# ============================
	19	#
	20	# (ec_type values are one of those available within PyEClib)
	21
	22	#
	23	# User input: Num data, num parity, average file size
	24	# Output: Ordered list of options and their corresponding conf entries (limit 10)
	25	#
	26
	27	import pyeclib
	28	from pyeclib.ec_iface import ECDriver
	29	import random
	30	import string
	31	import sys
	32	import os
	33	import argparse
	34	import time
	35	import math
	36
	37	class Timer:
	38	def __init__(self):
	39	self.start_time = 0
	40	self.end_time = 0
	41
	42	def reset(self):
	43	self.start_time = 0
	44	self.end_time = 0
	45
	46	def start(self):
	47	self.start_time = time.time()
	48
	49	def stop(self):
	50	self.end_time = time.time()
	51
	52	def curr_delta(self):
	53	return self.end_time - self.start_time
	54
	55	def stop_and_return(self):
	56	self.end_time = time.time()
	57	return self.curr_delta()
	58
	59	def nCr(n,r):
	60	f = math.factorial
	61	return f(n) / f(r) / f(n-r)
	62
	63	class ECScheme:
	64	def __init__(self, k, m, w, type):
	65	self.k = k
	66	self.m = m
	67	self.w = w
	68	self.type = type
	69
	70	def __str__(self):
	71	return "k=%d m=%d w=%d type=%s" % (self.k, self.m, self.w, self.type)
	72
	73	valid_flat_xor_3 = [(6,6), (7,6), (8,6), (9,6), (10,6), (11,6), (12,6), (13,6), (14,6), (15,6)]
	74	valid_flat_xor_4 = [(6,6), (7,6), (8,6), (9,6), (10,6), (11,6), (12,6), (13,6), (14,6), (15,6), (16,6), (17,6), (18,6), (19,6), (20,6)]
	75
	76	def get_viable_schemes(max_num_frags, minimum_rate, avg_stripe_size, fault_tolerance):
	77
	78	list_of_schemes = []
	79
	80	#
	81	# Get min_k from (minimum_rate * max_num_frags)
	82	#
	83	min_k = int(math.ceil(minimum_rate * max_num_frags))
	84
	85	#
	86	# Get min_m from the fault tolerance
	87	#
	88	min_m = fault_tolerance
	89
	90	#
	91	# Is not information theoretically possible
	92	#
	93	if (min_k + min_m) > max_num_frags:
	94	return list_of_schemes
	95
	96	#
	97	# Iterate over EC(k, max_num_frags-k) k \in [min_k, n-min_m]
	98	#
	99	for k in range(min_k, max_num_frags-min_m):
	100	#
	101	# RS(k, max_num_frags-k) is trivial, just add it (w=[8,16,32] for vand_rs)
	102	#
	103	for w in [8, 16, 32]:
	104	list_of_schemes.append(ECScheme(k, max_num_frags-k, w, "rs_vand_%d" % w))
	105
	106	for w in [4, 8]:
	107	list_of_schemes.append(ECScheme(k, max_num_frags-k, w, "rs_cauchy_orig_%d" % w))
	108
	109	#
	110	# The XOR codes are a little tricker (only check if fault_tolerance = 2 or 3)
	111	#
	112	# Constraint for 2: k <= (m choose 2)
	113	# Constraint for 3: k <= (m choose 3)
	114	#
	115	# The '3' flat_xor_3 (and '4' in flat_xor_4) refers to the Hamming distance,
	116	# which means the code guarantees the reconstruction of any 2 lost fragments
	117	# (or 3 in the case of flat_xor_4).
	118	#
	119	# So, only consider the XOR code if the fault_tolerance matches and
	120	# the additional constraint is met
	121	#
	122	if fault_tolerance == 2:
	123	max_k = nCr(max_num_frags-k, 2)
	124	if k <= max_k and (k, max_num_frags-k) in valid_flat_xor_3:
	125	list_of_schemes.append(ECScheme(k, max_num_frags-k, 0, "flat_xor_3"))
	126
	127	if fault_tolerance == 3:
	128	max_k = nCr(max_num_frags-k, 3)
	129	if k <= max_k and (k, max_num_frags-k) in valid_flat_xor_4:
	130	list_of_schemes.append(ECScheme(k, max_num_frags-k, 0, "flat_xor_4"))
	131
	132	return list_of_schemes
	133
	134
	135	parser = argparse.ArgumentParser(description='PyECLib tool to evaluate viable EC options, benchmark them and report results with the appropriate conf entries.')
	136	parser.add_argument('-n', type=int, help='max number of fragments')
	137	parser.add_argument('-f', type=int, help='fault tolerance')
	138	parser.add_argument('-r', type=float, help='minimum coding rate (num_data / num_data+num_parity)')
	139	parser.add_argument('-s', type=int, help='average stripe size')
	140	parser.add_argument('-l', type=int, help='set limit on number of entries returned (default = 10)', default=10, )
	141
	142	args = parser.parse_args(sys.argv[1:])
	143
	144	MB=1024*1024
	145
	146	# Generate a buffer of size 's'
	147	if args.s > 10*MB:
	148	print "s must be smaller than 10 MB."
	149	sys.exit(1)
	150
	151	# Instantiate the timer
	152	timer = Timer()
	153
	154	return_limit = args.l
	155
	156	schemes = get_viable_schemes(args.n, args.r, args.s, args.f)
	157
	158	# Results will be List[(type, throughput)]
	159	results = []
	160
	161	# Num iterations
	162	num_iterations=10
	163
	164	for scheme in schemes:
	165	print scheme
	166
	167	# Generate a new string for each test
	168	file_str = ''.join(random.choice(string.ascii_uppercase + string.digits) for x in range(args.s))
	169
	170	try:
	171	ec_driver = ECDriver("pyeclib.core.ECPyECLibDriver", k=scheme.k, m=scheme.m, type=scheme.type)
	172	except Exception as e:
	173	print "Scheme %s is not defined (%s)." % (scheme, e)
	174	continue
	175
	176	timer.start()
	177
	178	for i in range(num_iterations):
	179	ec_driver.encode(file_str)
	180
	181	duration = timer.stop_and_return()
	182
	183	results.append((scheme, duration))
	184
	185	timer.reset()
	186
	187	results.sort(lambda x,y: (int)((1000x[1]) - (1000y[1])))
	188
	189	for i in range(len(results)):
	190	if i > return_limit:
	191	break
	192
	193	print "\n\nPerf Rank #%d:" % i
	194	print " ======== To Use this Policy, Copy and Paste Text (not including this header and footer) to Swift Conf ========"
	195	print " type = erasure_coding"
	196	print " name = %s_%d_%d" % (results[i][0].type, results[i][0].k, results[i][0].m)
	197	print " ec_type = %s" % results[i][0].type
	198	print " ec_k = %s" % results[i][0].k
	199	print " ec_m = %s" % results[i][0].m
	200	print " =============================================================================================================="
	201	results[i]
	202