Codebase list python-pyeclib / dbe0e61
Added a new tool to help choose the best EC scheme, given a set of constraints. Kevin Greenan 10 years ago
3 changed file(s) with 244 addition(s) and 11 deletion(s). Raw diff Collapse all Expand all
8080 case PYECC_RS_VAND:
8181 default:
8282 {
83 int max_symbols;
83 long long max_symbols;
8484
8585 if (w != 8 && w != 16 && w != 32) {
8686 return 0;
8787 }
88 max_symbols = 1 << w;
88 max_symbols = 1LL << w;
8989 if ((k+m) > max_symbols) {
9090 return 0;
9191 }
2424 import pyeclib_c
2525 import math
2626
27 #
28 # Generic ECPyECLibException
29 #
30 class ECPyECLibException(Exception):
31 def __init__(self, error_str):
32 self.error_str = error_str
33 def __str__(self):
34 return self.error_str
35
2736 class ECPyECLibDriver(object):
2837 def __init__(self, k, m, type):
2938 self.ec_rs_vand = "rs_vand"
3645 self.ec_rs_cauchy_best_w = 4
3746 self.k = k
3847 self.m = m
48
49 #
50 # Override the default wordsize (w) for Reed-Solomon, if specified
51 #
52 if type[:len(self.ec_rs_vand)] == self.ec_rs_vand and len(type) > len(self.ec_rs_vand):
53 type_ary = type.split("_")
54 if len(type_ary) != 3:
55 raise ECPyECLibException("%s is not a valid EC type for PyECLib!" % type)
56 self.ec_rs_vand_best_w = int(type_ary[2])
57 type = self.ec_rs_vand
58
59 #
60 # Override the default wordsize (w) for Cauchy, if specified
61 #
62 if type[:len(self.ec_rs_cauchy_orig)] == self.ec_rs_cauchy_orig and len(type) > len(self.ec_rs_cauchy_orig):
63 type_ary = type.split("_")
64 if len(type_ary) != 4:
65 raise ECPyECLibException("%s is not a valid EC type for PyECLib!" % type)
66 self.ec_rs_cauchy_best_w = int(type_ary[3])
67 type = self.ec_rs_cauchy_orig
68
3969 if type in self.ec_types:
4070 self.type = type
4171 else:
42 raise ECDriverError("%s is not a valid EC type for PyECLib!")
72 raise ECPyECLibException("%s is not a valid EC type for PyECLib!")
4373
4474 if self.type == self.ec_rs_vand:
4575 self.w = self.ec_rs_vand_best_w
6595 try:
6696 ret_string = pyeclib_c.fragments_to_string(self.handle, fragment_payloads)
6797 except:
68 raise ECDriverError("Error in ECPyECLibDriver.decode")
98 raise ECPyECLibException("Error in ECPyECLibDriver.decode")
6999
70100 if ret_string is None:
71101 (data_frags, parity_frags, missing_idxs) = pyeclib_c.get_fragment_partition(self.handle, fragment_payloads)
134164 Stripe an arbitrary-sized string into k fragments
135165 :param k: the number of data fragments to stripe
136166 :param m: the number of parity fragments to stripe
137 :raises: ECDriverError if there is an error during encoding
167 :raises: ECPyECLibException if there is an error during encoding
138168 """
139169 self.k = k
140170
141171 if m != 0:
142 raise ECDriverError("This driver only supports m=0")
172 raise ECPyECLibException("This driver only supports m=0")
143173
144174 self.m = m
145175
148178 Stripe an arbitrary-sized string into k fragments
149179 :param bytes: the buffer to encode
150180 :returns: a list of k buffers (data only)
151 :raises: ECDriverError if there is an error during encoding
181 :raises: ECPyECLibException if there is an error during encoding
152182 """
153183 # Main fragment size
154184 fragment_size = math.ceil(len(bytes) / float(self.k))
171201 Convert a k-fragment data stripe into a string
172202 :param fragment_payloads: fragments (in order) to convert into a string
173203 :returns: a string containing the original data
174 :raises: ECDriverError if there is an error during decoding
204 :raises: ECPyECLibException if there is an error during decoding
175205 """
176206
177207 if len(fragment_payloads) != self.k:
178 raise ECDriverError("Decode requires %d fragments, %d fragments were given" % (len(fragment_payloads), self.k))
208 raise ECPyECLibException("Decode requires %d fragments, %d fragments were given" % (len(fragment_payloads), self.k))
179209
180210 ret_string = ''
181211
192222 :param available_fragment_payloads: available fragments (in order)
193223 :param missing_fragment_indexes: indexes of missing fragments
194224 :returns: a string containing the original data
195 :raises: ECDriverError if there is an error during reconstruction
225 :raises: ECPyECLibException if there is an error during reconstruction
196226 """
197227 if len(available_fragment_payloads) != self.k:
198 raise ECDriverError("Reconstruction requires %d fragments, %d fragments were given" % (len(available_fragment_payloads), self.k))
228 raise ECPyECLibException("Reconstruction requires %d fragments, %d fragments were given" % (len(available_fragment_payloads), self.k))
199229
200230 return available_fragment_payloads
201231
0 # PyEClib Companion tool
1 # Goal: When defining an EC pool, help cluster admin make an informed choice between available EC implementations. Generate sample swift.conf + swift-ring-builder hints.
2 #
3 # Suggested features:
4 #
5 # List the "EC types" - EC algorithms
6 # List implementations of each EC type available on the platform (dumb-software-only, software with SIMD acceleration, specialized hardware, etc).
7 # Benchmark each algorithm with possible implementation and display performance numbers.
8 # Generate sample EC policy entry (for inclusion in swift.conf) for the best performing algorithm + implementation. (And optionally provide swift-ring-builder hints).
9 # Suggested EC policy entry format:
10 #
11 # ======== swift.conf ============
12 # [storage-policy:10]
13 # type = erasure_coding
14 # name = ec_rs_cauchy_orig_12_2
15 # ec_type = rs_cauchy_orig
16 # ec_k = 12
17 # ec_m = 2
18 # ============================
19 #
20 # (ec_type values are one of those available within PyEClib)
21
22 #
23 # User input: Num data, num parity, average file size
24 # Output: Ordered list of options and their corresponding conf entries (limit 10)
25 #
26
27 import pyeclib
28 from pyeclib.ec_iface import ECDriver
29 import random
30 import string
31 import sys
32 import os
33 import argparse
34 import time
35 import math
36
37 class Timer:
38 def __init__(self):
39 self.start_time = 0
40 self.end_time = 0
41
42 def reset(self):
43 self.start_time = 0
44 self.end_time = 0
45
46 def start(self):
47 self.start_time = time.time()
48
49 def stop(self):
50 self.end_time = time.time()
51
52 def curr_delta(self):
53 return self.end_time - self.start_time
54
55 def stop_and_return(self):
56 self.end_time = time.time()
57 return self.curr_delta()
58
59 def nCr(n,r):
60 f = math.factorial
61 return f(n) / f(r) / f(n-r)
62
63 class ECScheme:
64 def __init__(self, k, m, w, type):
65 self.k = k
66 self.m = m
67 self.w = w
68 self.type = type
69
70 def __str__(self):
71 return "k=%d m=%d w=%d type=%s" % (self.k, self.m, self.w, self.type)
72
73 valid_flat_xor_3 = [(6,6), (7,6), (8,6), (9,6), (10,6), (11,6), (12,6), (13,6), (14,6), (15,6)]
74 valid_flat_xor_4 = [(6,6), (7,6), (8,6), (9,6), (10,6), (11,6), (12,6), (13,6), (14,6), (15,6), (16,6), (17,6), (18,6), (19,6), (20,6)]
75
76 def get_viable_schemes(max_num_frags, minimum_rate, avg_stripe_size, fault_tolerance):
77
78 list_of_schemes = []
79
80 #
81 # Get min_k from (minimum_rate * max_num_frags)
82 #
83 min_k = int(math.ceil(minimum_rate * max_num_frags))
84
85 #
86 # Get min_m from the fault tolerance
87 #
88 min_m = fault_tolerance
89
90 #
91 # Is not information theoretically possible
92 #
93 if (min_k + min_m) > max_num_frags:
94 return list_of_schemes
95
96 #
97 # Iterate over EC(k, max_num_frags-k) k \in [min_k, n-min_m]
98 #
99 for k in range(min_k, max_num_frags-min_m):
100 #
101 # RS(k, max_num_frags-k) is trivial, just add it (w=[8,16,32] for vand_rs)
102 #
103 for w in [8, 16, 32]:
104 list_of_schemes.append(ECScheme(k, max_num_frags-k, w, "rs_vand_%d" % w))
105
106 for w in [4, 8]:
107 list_of_schemes.append(ECScheme(k, max_num_frags-k, w, "rs_cauchy_orig_%d" % w))
108
109 #
110 # The XOR codes are a little tricker (only check if fault_tolerance = 2 or 3)
111 #
112 # Constraint for 2: k <= (m choose 2)
113 # Constraint for 3: k <= (m choose 3)
114 #
115 # The '3' flat_xor_3 (and '4' in flat_xor_4) refers to the Hamming distance,
116 # which means the code guarantees the reconstruction of any 2 lost fragments
117 # (or 3 in the case of flat_xor_4).
118 #
119 # So, only consider the XOR code if the fault_tolerance matches and
120 # the additional constraint is met
121 #
122 if fault_tolerance == 2:
123 max_k = nCr(max_num_frags-k, 2)
124 if k <= max_k and (k, max_num_frags-k) in valid_flat_xor_3:
125 list_of_schemes.append(ECScheme(k, max_num_frags-k, 0, "flat_xor_3"))
126
127 if fault_tolerance == 3:
128 max_k = nCr(max_num_frags-k, 3)
129 if k <= max_k and (k, max_num_frags-k) in valid_flat_xor_4:
130 list_of_schemes.append(ECScheme(k, max_num_frags-k, 0, "flat_xor_4"))
131
132 return list_of_schemes
133
134
135 parser = argparse.ArgumentParser(description='PyECLib tool to evaluate viable EC options, benchmark them and report results with the appropriate conf entries.')
136 parser.add_argument('-n', type=int, help='max number of fragments')
137 parser.add_argument('-f', type=int, help='fault tolerance')
138 parser.add_argument('-r', type=float, help='minimum coding rate (num_data / num_data+num_parity)')
139 parser.add_argument('-s', type=int, help='average stripe size')
140 parser.add_argument('-l', type=int, help='set limit on number of entries returned (default = 10)', default=10, )
141
142 args = parser.parse_args(sys.argv[1:])
143
144 MB=1024*1024
145
146 # Generate a buffer of size 's'
147 if args.s > 10*MB:
148 print "s must be smaller than 10 MB."
149 sys.exit(1)
150
151 # Instantiate the timer
152 timer = Timer()
153
154 return_limit = args.l
155
156 schemes = get_viable_schemes(args.n, args.r, args.s, args.f)
157
158 # Results will be List[(type, throughput)]
159 results = []
160
161 # Num iterations
162 num_iterations=10
163
164 for scheme in schemes:
165 print scheme
166
167 # Generate a new string for each test
168 file_str = ''.join(random.choice(string.ascii_uppercase + string.digits) for x in range(args.s))
169
170 try:
171 ec_driver = ECDriver("pyeclib.core.ECPyECLibDriver", k=scheme.k, m=scheme.m, type=scheme.type)
172 except Exception as e:
173 print "Scheme %s is not defined (%s)." % (scheme, e)
174 continue
175
176 timer.start()
177
178 for i in range(num_iterations):
179 ec_driver.encode(file_str)
180
181 duration = timer.stop_and_return()
182
183 results.append((scheme, duration))
184
185 timer.reset()
186
187 results.sort(lambda x,y: (int)((1000*x[1]) - (1000*y[1])))
188
189 for i in range(len(results)):
190 if i > return_limit:
191 break
192
193 print "\n\nPerf Rank #%d:" % i
194 print " ======== To Use this Policy, Copy and Paste Text (not including this header and footer) to Swift Conf ========"
195 print " type = erasure_coding"
196 print " name = %s_%d_%d" % (results[i][0].type, results[i][0].k, results[i][0].m)
197 print " ec_type = %s" % results[i][0].type
198 print " ec_k = %s" % results[i][0].k
199 print " ec_m = %s" % results[i][0].m
200 print " =============================================================================================================="
201 results[i]
202