Merge tag '1.2.1' into debian/unstable
Release 1.2.1
Ondřej Nový
7 years ago
0 | Kevin Greenan <kmgreen2@gmail.com> <kmg@box.com> | |
1 | Kevin Greenan <kmgreen2@gmail.com> <kmgreen@ubuntu.(none)> | |
2 | Kevin Greenan <kmgreen2@gmail.com> <kmgreen@Kevins-MacBook-Air-6.local> | |
3 | Tushar Gohad <tushar.gohad@intel.com> <tusharsg@gmail.com> | |
4 | Tushar Gohad <tushar.gohad@intel.com> <Tushar Gohad> | |
5 | Eric Lambert <eric_lambert@xyratex.com> <eric.lambert@seagate.com> | |
6 | Kota Tsuyuzaki <bloodeagle40234@gmail.com> <tsuyuzaki.kota@lab.ntt.co.jp> |
0 | Not in any particular order :^) | |
0 | Original Authors | |
1 | ---------------- | |
2 | Tushar Gohad (tushar.gohad@intel.com) | |
3 | Kevin Greenan (kmgreen2@gmail.com) | |
1 | 4 | |
2 | Kevin Greenan <kmgreen2@gmail.com, kmg@box.com> | |
3 | Tushar Gohad <tusharsg@gmail.com, tushar.gohad@intel.com> | |
4 | Eric Lambert <eric.d.lambert@gmail.com eric.lambert@seagate.com> | |
5 | Mark Storer <Mark.Storer@evault.com> | |
6 | Kota Tsuyuzaki <bloodeagle40123@gmail.com, tsuyuzaki.kota@lab.ntt.co.jp> | |
7 | Pete Zaitcev <zaitcev@kotori.zaitcev.us> | |
8 | Victor Stinner <vstinner@redhat.com> | |
5 | Contributors | |
6 | ------------ | |
7 | Timur Alperovich (timuralp@swiftstack.com) | |
8 | Thiago da Silva (thiago@redhat.com) | |
9 | Eric Lambert (eric_lambert@xyratex.com) | |
10 | Davanum Srinivas (davanum@gmail.com) | |
11 | Victor Stinner (vstinner@redhat.com) | |
12 | Mark Storer (Mark.Storer@evault.com) | |
13 | Kota Tsuyuzaki (bloodeagle40123@gmail.com) | |
14 | Pete Zaitcev (zaitcev@kotori.zaitcev.us) | |
15 | Yuan Zhou (yuan.zhou@intel.com) |
0 | New in 1.2.1 | |
1 | ------------ | |
2 | ||
3 | * Eliminate spurious syslog messages and added cleaner | |
4 | mechanism for querying all available backends on a system. | |
5 | ||
6 | * Moved source code hosting from bitbucket to Openstack infra. | |
7 | This is first release with Openstack Infra | |
8 | ||
0 | 9 | New in 1.2.0 |
1 | 10 | ------------ |
2 | 11 |
0 | This is v1.2 of PyECLib. This library provides a simple Python interface for | |
1 | implementing erasure codes and is known to work with Python v2.6, 2.7 and 3.x. | |
2 | ||
3 | To obtain the best possible performance, the library utilizes liberasurecode, | |
4 | which is a C based erasure code library. Please let us know if you have any | |
5 | issues building or installing (email: kmgreen2@gmail.com or tusharsg@gmail.com). | |
6 | ||
7 | PyECLib supports a variety of Erasure Coding backends including the standard Reed | |
8 | Soloman implementations provided by Jerasure [2], liberasurecode [3] and Intel | |
9 | ISA-L [4]. It also provides support for a flat XOR-based encoder and decoder | |
10 | (part of liberasurecode) - a class of HD Combination Codes based on "Flat | |
11 | XOR-based erasure codes in storage systems: Constructions, efficient recovery, | |
12 | and tradeoffs" in IEEE MSST 2010). These codes are well-suited to archival | |
13 | use-cases, have a simple construction and require a minimum number of | |
14 | participating disks during single-disk reconstruction (think XOR-based LRC code). | |
15 | ||
16 | Examples of using PyECLib are provided in the "tools" directory: | |
17 | ||
18 | Command-line encoder:: | |
19 | ||
20 | tools/pyeclib_encode.py | |
21 | ||
22 | Command-line decoder:: | |
23 | ||
24 | tools/pyeclib_decode.py | |
25 | ||
26 | Utility to determine what is needed to reconstruct missing fragments:: | |
27 | ||
28 | tools/pyeclib_fragments_needed.py | |
29 | ||
30 | ||
31 | PyEClib initialization:: | |
32 | ||
33 | ec_driver = ECDriver(k=<num_encoded_data_fragments>, | |
34 | m=<num_encoded_parity_fragments>, | |
35 | ec_type=<ec_scheme>)) | |
36 | ||
37 | Supported ``ec_type`` values: | |
38 | ||
39 | * ``liberasurecode_rs_vand`` => Vandermonde Reed-Solomon encoding, software-only backend implemented by liberasurecode [3] | |
40 | * ``jerasure_rs_vand`` => Vandermonde Reed-Solomon encoding, based on Jerasure [1] | |
41 | * ``jerasure_rs_cauchy`` => Cauchy Reed-Solomon encoding (Jerasure variant), based on Jerasure [2] | |
42 | * ``flat_xor_hd_3``, ``flat_xor_hd_4`` => Flat-XOR based HD combination codes, liberasurecode [3] | |
43 | * ``isa_l_rs_vand`` => Intel Storage Acceleration Library (ISA-L) - SIMD accelerated Erasure Coding backends [4] | |
44 | * ``shss`` => NTT Lab Japan's Erasure Coding Library | |
45 | ||
46 | A configuration utility is provided to help compare available EC schemes in | |
47 | terms of performance and redundancy:: tools/pyeclib_conf_tool.py | |
48 | ||
49 | ||
50 | The Python API supports the following functions: | |
51 | ||
52 | - EC Encode | |
53 | ||
54 | Encode N bytes of a data object into k (data) + m (parity) fragments:: | |
55 | ||
56 | def encode(self, data_bytes) | |
57 | ||
58 | input: data_bytes - input data object (bytes) | |
59 | returns: list of fragments (bytes) | |
60 | throws: | |
61 | ECBackendInstanceNotAvailable - if the backend library cannot be found | |
62 | ECBackendNotSupported - if the backend is not supported by PyECLib (see ec_types above) | |
63 | ECInvalidParameter - if invalid parameters were provided | |
64 | ECOutOfMemory - if the process has run out of memory | |
65 | ECDriverError - if an unknown error occurs | |
66 | ||
67 | - EC Decode | |
68 | ||
69 | Decode between k and k+m fragments into original object:: | |
70 | ||
71 | def decode(self, fragment_payloads) | |
72 | ||
73 | input: list of fragment_payloads (bytes) | |
74 | returns: decoded object (bytes) | |
75 | throws: | |
76 | ECBackendInstanceNotAvailable - if the backend library cannot be found | |
77 | ECBackendNotSupported - if the backend is not supported by PyECLib (see ec_types above) | |
78 | ECInvalidParameter - if invalid parameters were provided | |
79 | ECOutOfMemory - if the process has run out of memory | |
80 | ECInsufficientFragments - if an insufficient set of fragments has been provided (e.g. not enough) | |
81 | ECInvalidFragmentMetadata - if the fragment headers appear to be corrupted | |
82 | ECDriverError - if an unknown error occurs | |
83 | ||
84 | ||
85 | *Note*: ``bytes`` is a synonym to ``str`` in Python 2.6, 2.7. | |
86 | In Python 3.x, ``bytes`` and ``str`` types are non-interchangeable and care | |
87 | needs to be taken when handling input to and output from the ``encode()`` and | |
88 | ``decode()`` routines. | |
89 | ||
90 | ||
91 | - EC Reconstruct | |
92 | ||
93 | Reconstruct "missing_fragment_indexes" using "available_fragment_payloads":: | |
94 | ||
95 | def reconstruct(self, available_fragment_payloads, missing_fragment_indexes) | |
96 | ||
97 | input: available_fragment_payloads - list of fragment payloads | |
98 | input: missing_fragment_indexes - list of indexes to reconstruct | |
99 | output: list of reconstructed fragments corresponding to missing_fragment_indexes | |
100 | throws: | |
101 | ECBackendInstanceNotAvailable - if the backend library cannot be found | |
102 | ECBackendNotSupported - if the backend is not supported by PyECLib (see ec_types above) | |
103 | ECInvalidParameter - if invalid parameters were provided | |
104 | ECOutOfMemory - if the process has run out of memory | |
105 | ECInsufficientFragments - if an insufficient set of fragments has been provided (e.g. not enough) | |
106 | ECInvalidFragmentMetadata - if the fragment headers appear to be corrupted | |
107 | ECDriverError - if an unknown error occurs | |
108 | ||
109 | ||
110 | - Minimum parity fragments needed for durability gurantees:: | |
111 | ||
112 | def min_parity_fragments_needed(self) | |
113 | ||
114 | NOTE: Currently hard-coded to 1, so this can only be trusted for MDS codes, such as | |
115 | Reed-Solomon. | |
116 | ||
117 | output: minimum number of additional fragments needed to be synchronously written to tolerate | |
118 | the loss of any one fragment (similar guarantees to 2 out of 3 with 3x replication) | |
119 | throws: | |
120 | ECBackendInstanceNotAvailable - if the backend library cannot be found | |
121 | ECBackendNotSupported - if the backend is not supported by PyECLib (see ec_types above) | |
122 | ECInvalidParameter - if invalid parameters were provided | |
123 | ECOutOfMemory - if the process has run out of memory | |
124 | ECDriverError - if an unknown error occurs | |
125 | ||
126 | ||
127 | - Fragments needed for EC Reconstruct | |
128 | ||
129 | Return the indexes of fragments needed to reconstruct "missing_fragment_indexes":: | |
130 | ||
131 | def fragments_needed(self, missing_fragment_indexes) | |
132 | ||
133 | input: list of missing_fragment_indexes | |
134 | output: list of fragments needed to reconstruct fragments listed in missing_fragment_indexes | |
135 | throws: | |
136 | ECBackendInstanceNotAvailable - if the backend library cannot be found | |
137 | ECBackendNotSupported - if the backend is not supported by PyECLib (see ec_types above) | |
138 | ECInvalidParameter - if invalid parameters were provided | |
139 | ECOutOfMemory - if the process has run out of memory | |
140 | ECDriverError - if an unknown error occurs | |
141 | ||
142 | ||
143 | - Get EC Metadata | |
144 | ||
145 | Return an opaque header known by the underlying library or a formatted header (Python dict):: | |
146 | ||
147 | def get_metadata(self, fragment, formatted = 0) | |
148 | ||
149 | input: raw fragment payload | |
150 | input: boolean specifying if returned header is opaque buffer or formatted string | |
151 | output: fragment header (opaque or formatted) | |
152 | throws: | |
153 | ECBackendInstanceNotAvailable - if the backend library cannot be found | |
154 | ECBackendNotSupported - if the backend is not supported by PyECLib (see ec_types above) | |
155 | ECInvalidParameter - if invalid parameters were provided | |
156 | ECOutOfMemory - if the process has run out of memory | |
157 | ECDriverError - if an unknown error occurs | |
158 | ||
159 | - Verify EC Stripe Consistency | |
160 | ||
161 | Use opaque buffers from get_metadata() to verify a the consistency of a stripe:: | |
162 | ||
163 | def verify_stripe_metadata(self, fragment_metadata_list) | |
164 | ||
165 | intput: list of opaque fragment headers | |
166 | output: formatted string containing the 'status' (0 is success) and 'reason' if verification fails | |
167 | throws: | |
168 | ECBackendInstanceNotAvailable - if the backend library cannot be found | |
169 | ECBackendNotSupported - if the backend is not supported by PyECLib (see ec_types above) | |
170 | ECInvalidParameter - if invalid parameters were provided | |
171 | ECOutOfMemory - if the process has run out of memory | |
172 | ECDriverError - if an unknown error occurs | |
173 | ||
174 | ||
175 | - Get EC Segment Info | |
176 | ||
177 | Return a dict with the keys - segment_size, last_segment_size, fragment_size, last_fragment_size and num_segments:: | |
178 | ||
179 | def get_segment_info(self, data_len, segment_size) | |
180 | ||
181 | input: total data_len of the object to store | |
182 | input: target segment size used to segment the object into multiple EC stripes | |
183 | output: a dict with keys - segment_size, last_segment_size, fragment_size, last_fragment_size and num_segments | |
184 | throws: | |
185 | ECBackendInstanceNotAvailable - if the backend library cannot be found | |
186 | ECBackendNotSupported - if the backend is not supported by PyECLib (see ec_types above) | |
187 | ECInvalidParameter - if invalid parameters were provided | |
188 | ECOutOfMemory - if the process has run out of memory | |
189 | ECDriverError - if an unknown error occurs | |
190 | ||
191 | ||
192 | - Get EC Segment Info given a list of ranges, data length and segment size:: | |
193 | ||
194 | def get_segment_info_byterange(self, ranges, data_len, segment_size) | |
195 | ||
196 | input: byte ranges | |
197 | input: total data_len of the object to store | |
198 | input: target segment size used to segment the object into multiple EC stripes | |
199 | output: (see below) | |
200 | throws: | |
201 | ECBackendInstanceNotAvailable - if the backend library cannot be found | |
202 | ECBackendNotSupported - if the backend is not supported by PyECLib (see ec_types above) | |
203 | ECInvalidParameter - if invalid parameters were provided | |
204 | ECOutOfMemory - if the process has run out of memory | |
205 | ECDriverError - if an unknown error occurs | |
206 | ||
207 | Assume a range request is given for an object with segment size 3K and | |
208 | a 1 MB file:: | |
209 | ||
210 | Ranges = (0, 1), (1, 12), (10, 1000), (0, segment_size-1), | |
211 | (1, segment_size+1), (segment_size-1, 2*segment_size) | |
212 | ||
213 | This will return a map keyed on the ranges, where there is a recipe | |
214 | given for each range:: | |
215 | ||
216 | { | |
217 | (0, 1): {0: (0, 1)}, | |
218 | (10, 1000): {0: (10, 1000)}, | |
219 | (1, 12): {0: (1, 12)}, | |
220 | (0, 3071): {0: (0, 3071)}, | |
221 | (3071, 6144): {0: (3071, 3071), 1: (0, 3071), 2: (0, 0)}, | |
222 | (1, 3073): {0: (1, 3071), 1: (0,0)} | |
223 | } | |
224 | ||
225 | ||
226 | Quick Start | |
227 | ||
228 | Install pre-requisites: | |
229 | ||
230 | * Python 2.6, 2.7 or 3.x (including development packages), argparse, setuptools | |
231 | * liberasurecode v1.1.0 or greater [3] | |
232 | * Erasure code backend libraries, gf-complete and Jerasure [1],[2], ISA-L [4] etc | |
233 | ||
234 | Install PyECLib:: | |
235 | ||
236 | $ sudo python setup.py install | |
237 | ||
238 | Run test suite included:: | |
239 | ||
240 | $ ./.unittests | |
241 | ||
242 | If all of this works, then you should be good to go. If not, send us an email! | |
243 | ||
244 | If the test suite fails because it cannot find any of the shared libraries, | |
245 | then you probably need to add /usr/local/lib to the path searched when loading | |
246 | libraries. The best way to do this (on Linux) is to add '/usr/local/lib' to:: | |
247 | ||
248 | /etc/ld.so.conf | |
249 | ||
250 | and then make sure to run:: | |
251 | ||
252 | $ sudo ldconfig | |
253 | ||
254 | ||
255 | References | |
256 | ||
257 | [1] Jerasure, C library that supports erasure coding in storage applications, http://jerasure.org | |
258 | ||
259 | [2] Greenan, Kevin M et al, "Flat XOR-based erasure codes in storage systems", http://www.kaymgee.com/Kevin_Greenan/Publications_files/greenan-msst10.pdf | |
260 | ||
261 | [3] liberasurecode, C API abstraction layer for erasure coding backends, https://bitbucket.org/tsg-/liberasurecode | |
262 | ||
263 | [4] Intel(R) Storage Acceleration Library (Open Source Version), https://01.org/intel%C2%AE-storage-acceleration-library-open-source-version | |
264 | ||
265 | [5] Kota Tsuyuzaki <tsuyuzaki.kota@lab.ntt.co.jp>, Ryuta Kon <kon.ryuta@po.ntts.co.jp>, "NTT SHSS Erasure Coding backend" | |
266 | ||
267 | -- | |
268 | 1.2 |
0 | This library provides a simple Python interface for implementing erasure codes | |
1 | and is known to work with Python v2.6, 2.7 and 3.x. | |
2 | ||
3 | To obtain the best possible performance, the library utilizes liberasurecode, | |
4 | which is a C based erasure code library. Please let us know if you have any | |
5 | issues building or installing (email: kmgreen2@gmail.com or tusharsg@gmail.com). | |
6 | ||
7 | PyECLib supports a variety of Erasure Coding backends including the standard Reed | |
8 | Soloman implementations provided by Jerasure [2], liberasurecode [3] and Intel | |
9 | ISA-L [4]. It also provides support for a flat XOR-based encoder and decoder | |
10 | (part of liberasurecode) - a class of HD Combination Codes based on "Flat | |
11 | XOR-based erasure codes in storage systems: Constructions, efficient recovery, | |
12 | and tradeoffs" in IEEE MSST 2010). These codes are well-suited to archival | |
13 | use-cases, have a simple construction and require a minimum number of | |
14 | participating disks during single-disk reconstruction (think XOR-based LRC code). | |
15 | ||
16 | Examples of using PyECLib are provided in the "tools" directory: | |
17 | ||
18 | Command-line encoder:: | |
19 | ||
20 | tools/pyeclib_encode.py | |
21 | ||
22 | Command-line decoder:: | |
23 | ||
24 | tools/pyeclib_decode.py | |
25 | ||
26 | Utility to determine what is needed to reconstruct missing fragments:: | |
27 | ||
28 | tools/pyeclib_fragments_needed.py | |
29 | ||
30 | ||
31 | PyEClib initialization:: | |
32 | ||
33 | ec_driver = ECDriver(k=<num_encoded_data_fragments>, | |
34 | m=<num_encoded_parity_fragments>, | |
35 | ec_type=<ec_scheme>)) | |
36 | ||
37 | Supported ``ec_type`` values: | |
38 | ||
39 | * ``liberasurecode_rs_vand`` => Vandermonde Reed-Solomon encoding, software-only backend implemented by liberasurecode [3] | |
40 | * ``jerasure_rs_vand`` => Vandermonde Reed-Solomon encoding, based on Jerasure [1] | |
41 | * ``jerasure_rs_cauchy`` => Cauchy Reed-Solomon encoding (Jerasure variant), based on Jerasure [2] | |
42 | * ``flat_xor_hd_3``, ``flat_xor_hd_4`` => Flat-XOR based HD combination codes, liberasurecode [3] | |
43 | * ``isa_l_rs_vand`` => Intel Storage Acceleration Library (ISA-L) - SIMD accelerated Erasure Coding backends [4] | |
44 | * ``shss`` => NTT Lab Japan's Erasure Coding Library | |
45 | ||
46 | A configuration utility is provided to help compare available EC schemes in | |
47 | terms of performance and redundancy:: `tools/pyeclib_conf_tool.py` | |
48 | ||
49 | ||
50 | The Python API supports the following functions: | |
51 | ||
52 | - EC Encode | |
53 | ||
54 | Encode N bytes of a data object into k (data) + m (parity) fragments:: | |
55 | ||
56 | def encode(self, data_bytes) | |
57 | ||
58 | input: data_bytes - input data object (bytes) | |
59 | returns: list of fragments (bytes) | |
60 | throws: | |
61 | ECBackendInstanceNotAvailable - if the backend library cannot be found | |
62 | ECBackendNotSupported - if the backend is not supported by PyECLib (see ec_types above) | |
63 | ECInvalidParameter - if invalid parameters were provided | |
64 | ECOutOfMemory - if the process has run out of memory | |
65 | ECDriverError - if an unknown error occurs | |
66 | ||
67 | - EC Decode | |
68 | ||
69 | Decode between k and k+m fragments into original object:: | |
70 | ||
71 | def decode(self, fragment_payloads) | |
72 | ||
73 | input: list of fragment_payloads (bytes) | |
74 | returns: decoded object (bytes) | |
75 | throws: | |
76 | ECBackendInstanceNotAvailable - if the backend library cannot be found | |
77 | ECBackendNotSupported - if the backend is not supported by PyECLib (see ec_types above) | |
78 | ECInvalidParameter - if invalid parameters were provided | |
79 | ECOutOfMemory - if the process has run out of memory | |
80 | ECInsufficientFragments - if an insufficient set of fragments has been provided (e.g. not enough) | |
81 | ECInvalidFragmentMetadata - if the fragment headers appear to be corrupted | |
82 | ECDriverError - if an unknown error occurs | |
83 | ||
84 | ||
85 | *Note*: ``bytes`` is a synonym to ``str`` in Python 2.6, 2.7. | |
86 | In Python 3.x, ``bytes`` and ``str`` types are non-interchangeable and care | |
87 | needs to be taken when handling input to and output from the ``encode()`` and | |
88 | ``decode()`` routines. | |
89 | ||
90 | ||
91 | - EC Reconstruct | |
92 | ||
93 | Reconstruct "missing_fragment_indexes" using "available_fragment_payloads":: | |
94 | ||
95 | def reconstruct(self, available_fragment_payloads, missing_fragment_indexes) | |
96 | ||
97 | input: available_fragment_payloads - list of fragment payloads | |
98 | input: missing_fragment_indexes - list of indexes to reconstruct | |
99 | output: list of reconstructed fragments corresponding to missing_fragment_indexes | |
100 | throws: | |
101 | ECBackendInstanceNotAvailable - if the backend library cannot be found | |
102 | ECBackendNotSupported - if the backend is not supported by PyECLib (see ec_types above) | |
103 | ECInvalidParameter - if invalid parameters were provided | |
104 | ECOutOfMemory - if the process has run out of memory | |
105 | ECInsufficientFragments - if an insufficient set of fragments has been provided (e.g. not enough) | |
106 | ECInvalidFragmentMetadata - if the fragment headers appear to be corrupted | |
107 | ECDriverError - if an unknown error occurs | |
108 | ||
109 | ||
110 | - Minimum parity fragments needed for durability gurantees:: | |
111 | ||
112 | def min_parity_fragments_needed(self) | |
113 | ||
114 | NOTE: Currently hard-coded to 1, so this can only be trusted for MDS codes, such as | |
115 | Reed-Solomon. | |
116 | ||
117 | output: minimum number of additional fragments needed to be synchronously written to tolerate | |
118 | the loss of any one fragment (similar guarantees to 2 out of 3 with 3x replication) | |
119 | throws: | |
120 | ECBackendInstanceNotAvailable - if the backend library cannot be found | |
121 | ECBackendNotSupported - if the backend is not supported by PyECLib (see ec_types above) | |
122 | ECInvalidParameter - if invalid parameters were provided | |
123 | ECOutOfMemory - if the process has run out of memory | |
124 | ECDriverError - if an unknown error occurs | |
125 | ||
126 | ||
127 | - Fragments needed for EC Reconstruct | |
128 | ||
129 | Return the indexes of fragments needed to reconstruct "missing_fragment_indexes":: | |
130 | ||
131 | def fragments_needed(self, missing_fragment_indexes) | |
132 | ||
133 | input: list of missing_fragment_indexes | |
134 | output: list of fragments needed to reconstruct fragments listed in missing_fragment_indexes | |
135 | throws: | |
136 | ECBackendInstanceNotAvailable - if the backend library cannot be found | |
137 | ECBackendNotSupported - if the backend is not supported by PyECLib (see ec_types above) | |
138 | ECInvalidParameter - if invalid parameters were provided | |
139 | ECOutOfMemory - if the process has run out of memory | |
140 | ECDriverError - if an unknown error occurs | |
141 | ||
142 | ||
143 | - Get EC Metadata | |
144 | ||
145 | Return an opaque header known by the underlying library or a formatted header (Python dict):: | |
146 | ||
147 | def get_metadata(self, fragment, formatted = 0) | |
148 | ||
149 | input: raw fragment payload | |
150 | input: boolean specifying if returned header is opaque buffer or formatted string | |
151 | output: fragment header (opaque or formatted) | |
152 | throws: | |
153 | ECBackendInstanceNotAvailable - if the backend library cannot be found | |
154 | ECBackendNotSupported - if the backend is not supported by PyECLib (see ec_types above) | |
155 | ECInvalidParameter - if invalid parameters were provided | |
156 | ECOutOfMemory - if the process has run out of memory | |
157 | ECDriverError - if an unknown error occurs | |
158 | ||
159 | - Verify EC Stripe Consistency | |
160 | ||
161 | Use opaque buffers from get_metadata() to verify a the consistency of a stripe:: | |
162 | ||
163 | def verify_stripe_metadata(self, fragment_metadata_list) | |
164 | ||
165 | intput: list of opaque fragment headers | |
166 | output: formatted string containing the 'status' (0 is success) and 'reason' if verification fails | |
167 | throws: | |
168 | ECBackendInstanceNotAvailable - if the backend library cannot be found | |
169 | ECBackendNotSupported - if the backend is not supported by PyECLib (see ec_types above) | |
170 | ECInvalidParameter - if invalid parameters were provided | |
171 | ECOutOfMemory - if the process has run out of memory | |
172 | ECDriverError - if an unknown error occurs | |
173 | ||
174 | ||
175 | - Get EC Segment Info | |
176 | ||
177 | Return a dict with the keys - segment_size, last_segment_size, fragment_size, last_fragment_size and num_segments:: | |
178 | ||
179 | def get_segment_info(self, data_len, segment_size) | |
180 | ||
181 | input: total data_len of the object to store | |
182 | input: target segment size used to segment the object into multiple EC stripes | |
183 | output: a dict with keys - segment_size, last_segment_size, fragment_size, last_fragment_size and num_segments | |
184 | throws: | |
185 | ECBackendInstanceNotAvailable - if the backend library cannot be found | |
186 | ECBackendNotSupported - if the backend is not supported by PyECLib (see ec_types above) | |
187 | ECInvalidParameter - if invalid parameters were provided | |
188 | ECOutOfMemory - if the process has run out of memory | |
189 | ECDriverError - if an unknown error occurs | |
190 | ||
191 | ||
192 | - Get EC Segment Info given a list of ranges, data length and segment size:: | |
193 | ||
194 | def get_segment_info_byterange(self, ranges, data_len, segment_size) | |
195 | ||
196 | input: byte ranges | |
197 | input: total data_len of the object to store | |
198 | input: target segment size used to segment the object into multiple EC stripes | |
199 | output: (see below) | |
200 | throws: | |
201 | ECBackendInstanceNotAvailable - if the backend library cannot be found | |
202 | ECBackendNotSupported - if the backend is not supported by PyECLib (see ec_types above) | |
203 | ECInvalidParameter - if invalid parameters were provided | |
204 | ECOutOfMemory - if the process has run out of memory | |
205 | ECDriverError - if an unknown error occurs | |
206 | ||
207 | Assume a range request is given for an object with segment size 3K and | |
208 | a 1 MB file:: | |
209 | ||
210 | Ranges = (0, 1), (1, 12), (10, 1000), (0, segment_size-1), | |
211 | (1, segment_size+1), (segment_size-1, 2*segment_size) | |
212 | ||
213 | This will return a map keyed on the ranges, where there is a recipe | |
214 | given for each range:: | |
215 | ||
216 | { | |
217 | (0, 1): {0: (0, 1)}, | |
218 | (10, 1000): {0: (10, 1000)}, | |
219 | (1, 12): {0: (1, 12)}, | |
220 | (0, 3071): {0: (0, 3071)}, | |
221 | (3071, 6144): {0: (3071, 3071), 1: (0, 3071), 2: (0, 0)}, | |
222 | (1, 3073): {0: (1, 3071), 1: (0,0)} | |
223 | } | |
224 | ||
225 | ||
226 | Quick Start | |
227 | ||
228 | Install pre-requisites: | |
229 | ||
230 | * Python 2.6, 2.7 or 3.x (including development packages), argparse, setuptools | |
231 | * liberasurecode v1.1.0 or greater [3] | |
232 | * Erasure code backend libraries, gf-complete and Jerasure [1],[2], ISA-L [4] etc | |
233 | ||
234 | Install PyECLib:: | |
235 | ||
236 | $ sudo python setup.py install | |
237 | ||
238 | Run test suite included:: | |
239 | ||
240 | $ ./.unittests | |
241 | ||
242 | If all of this works, then you should be good to go. If not, send us an email! | |
243 | ||
244 | If the test suite fails because it cannot find any of the shared libraries, | |
245 | then you probably need to add /usr/local/lib to the path searched when loading | |
246 | libraries. The best way to do this (on Linux) is to add '/usr/local/lib' to:: | |
247 | ||
248 | /etc/ld.so.conf | |
249 | ||
250 | and then make sure to run:: | |
251 | ||
252 | $ sudo ldconfig | |
253 | ||
254 | ||
255 | References | |
256 | ||
257 | [1] Jerasure, C library that supports erasure coding in storage applications, http://jerasure.org | |
258 | ||
259 | [2] Greenan, Kevin M et al, "Flat XOR-based erasure codes in storage systems", http://www.kaymgee.com/Kevin_Greenan/Publications_files/greenan-msst10.pdf | |
260 | ||
261 | [3] liberasurecode, C API abstraction layer for erasure coding backends, https://bitbucket.org/tsg-/liberasurecode | |
262 | ||
263 | [4] Intel(R) Storage Acceleration Library (Open Source Version), https://01.org/intel%C2%AE-storage-acceleration-library-open-source-version | |
264 | ||
265 | [5] Kota Tsuyuzaki <tsuyuzaki.kota@lab.ntt.co.jp>, Ryuta Kon <kon.ryuta@po.ntts.co.jp>, "NTT SHSS Erasure Coding backend" |
25 | 25 | from .enum import unique |
26 | 26 | from .utils import create_instance |
27 | 27 | from .utils import positive_int_value |
28 | from pyeclib_c import check_backend_available | |
28 | 29 | |
29 | 30 | |
30 | 31 | def PyECLibVersion(z, y, x): |
475 | 476 | def _PyECLibValidECTypes(): |
476 | 477 | available_ec_types = [] |
477 | 478 | for _type in ALL_EC_TYPES: |
478 | driver = None | |
479 | try: | |
480 | if _type is 'shss': | |
481 | _m = 4 | |
482 | else: | |
483 | _m = 5 | |
484 | driver = ECDriver(k=10, m=_m, ec_type=_type, validate=True) | |
485 | if driver: | |
486 | available_ec_types.append(_type) | |
487 | except: | |
488 | # ignore any errors, assume backend not available | |
479 | if _type.startswith('flat_xor_hd'): | |
480 | int_type = PyECLib_EC_Types.get_by_name('flat_xor_hd') | |
481 | else: | |
482 | int_type = PyECLib_EC_Types.get_by_name(_type) | |
483 | if not int_type: | |
489 | 484 | continue |
485 | if check_backend_available(int_type.value): | |
486 | available_ec_types.append(_type) | |
490 | 487 | return available_ec_types |
491 | 488 | |
492 | 489 |
49 | 49 | default_python_incdir = get_python_inc() |
50 | 50 | |
51 | 51 | |
52 | # utility routines | |
53 | def _read_file_as_str(name): | |
54 | with open(name, "rt") as f: | |
55 | s = f.readline().strip() | |
56 | return s | |
57 | ||
58 | ||
52 | # this is to be used only for library existence/version checks, | |
53 | # not for rpath handling | |
59 | 54 | def _find_library(name): |
60 | target_lib = None | |
61 | if os.name == 'posix' and sys.platform.startswith('linux'): | |
62 | from ctypes.util import _findLib_gcc | |
63 | target_lib = _findLib_gcc(name) | |
64 | else: | |
65 | target_lib = find_library(name) | |
66 | if target_lib: | |
55 | target_lib = find_library(name) | |
56 | if platform_str.find("Darwin") > -1: | |
67 | 57 | target_lib = os.path.abspath(target_lib) |
68 | 58 | if os.path.islink(target_lib): |
69 | 59 | p = os.readlink(target_lib) |
83 | 73 | library = library_basename + "-" + library_version |
84 | 74 | library_url = "https://bitbucket.org/tsg-/liberasurecode.git" |
85 | 75 | |
76 | found_path = _find_library("erasurecode") | |
77 | if found_path: | |
78 | if found_path.endswith(library_version) or \ | |
79 | found_path.find(library_version + ".") > -1: | |
80 | # call 1.x.x the only compatible version for now | |
81 | return | |
82 | ||
86 | 83 | if platform_str.find("Darwin") > -1: |
87 | 84 | liberasure_file = \ |
88 | 85 | library_basename + "." + library_version + ".dylib" |
89 | 86 | else: |
90 | 87 | liberasure_file = \ |
91 | 88 | library_basename + ".so." + library_version |
92 | ||
93 | found_path = _find_library("erasurecode") | |
94 | if found_path: | |
95 | if found_path.endswith(library_version) or \ | |
96 | found_path.find(library_version + ".") > -1: | |
97 | # call 1.x.x the only compatible version for now | |
98 | return | |
99 | 89 | |
100 | 90 | print("**************************************************************") |
101 | 91 | print("*** ") |
181 | 171 | sources=['src/c/pyeclib_c/pyeclib_c.c']) |
182 | 172 | |
183 | 173 | setup(name='PyECLib', |
184 | version='1.2.0', | |
174 | version='1.2.1', | |
185 | 175 | author='Kevin Greenan', |
186 | 176 | author_email='kmgreen2@gmail.com', |
187 | 177 | maintainer='Kevin Greenan and Tushar Gohad', |
1171 | 1171 | return ret_obj; |
1172 | 1172 | } |
1173 | 1173 | |
1174 | static PyObject* | |
1175 | pyeclib_c_check_backend_available(PyObject *self, PyObject *args) | |
1176 | { | |
1177 | const ec_backend_id_t backend_id; | |
1178 | ||
1179 | if (!PyArg_ParseTuple(args, "i", &backend_id)) { | |
1180 | pyeclib_c_seterr(-EINVALIDPARAMS, "pyeclib_c_check_backend_available ERROR: "); | |
1181 | return NULL; | |
1182 | } | |
1183 | ||
1184 | if (liberasurecode_backend_available(backend_id)) { | |
1185 | Py_RETURN_TRUE; | |
1186 | } | |
1187 | ||
1188 | Py_RETURN_FALSE; | |
1189 | } | |
1174 | 1190 | |
1175 | 1191 | static PyMethodDef PyECLibMethods[] = { |
1176 | 1192 | {"init", pyeclib_c_init, METH_VARARGS, "Initialize a new erasure encoder/decoder"}, |
1181 | 1197 | {"get_segment_info", pyeclib_c_get_segment_info, METH_VARARGS, "Return segment and fragment size information needed when encoding a segmented stream"}, |
1182 | 1198 | {"get_metadata", pyeclib_c_get_metadata, METH_VARARGS, "Get the integrity checking metadata for a fragment"}, |
1183 | 1199 | {"check_metadata", pyeclib_c_check_metadata, METH_VARARGS, "Check the integrity checking metadata for a set of fragments"}, |
1200 | {"check_backend_available", pyeclib_c_check_backend_available, METH_VARARGS, "Check if a backend is available"}, | |
1184 | 1201 | {NULL, NULL, 0, NULL} /* Sentinel */ |
1185 | 1202 | }; |
1186 | 1203 |
29 | 29 | from pyeclib.ec_iface import ECDriver |
30 | 30 | from pyeclib.ec_iface import ECDriverError |
31 | 31 | from pyeclib.ec_iface import ECInsufficientFragments |
32 | from pyeclib.ec_iface import ECInvalidFragmentMetadata | |
32 | 33 | from pyeclib.ec_iface import PyECLib_EC_Types |
33 | 34 | from pyeclib.ec_iface import ALL_EC_TYPES |
34 | 35 | from pyeclib.ec_iface import VALID_EC_TYPES |
536 | 537 | fragments[i] = corrupted_fragment |
537 | 538 | i += 1 |
538 | 539 | |
539 | self.assertRaises(ECInsufficientFragments, | |
540 | self.assertRaises(ECInvalidFragmentMetadata, | |
540 | 541 | pyeclib_driver.decode, |
541 | 542 | fragments[:], force_metadata_checks=True) |
542 | 543 |