Merge tag '2.30.0' into debian/victoria
ISA-L 2.30 release
Ondřej Nový
3 years ago
0 | kind: pipeline | |
1 | name: arm64-linux-gcc-5.4 | |
2 | ||
3 | platform: | |
4 | os: linux | |
5 | arch: arm64 | |
6 | ||
7 | steps: | |
8 | - name: arm64-linux-gcc-5.4 | |
9 | image: ubuntu:xenial | |
10 | environment: | |
11 | C_COMPILER: gcc | |
12 | commands: | |
13 | - if [ -n "$C_COMPILER" ]; then export CC="$C_COMPILER"; fi | |
14 | - if [ -n "$AS_ASSEMBL" ]; then export AS="$AS_ASSEMBL"; fi | |
15 | - apt-get -qq update | |
16 | - apt-get install -qq -y build-essential git indent libtool libz-dev yasm autoconf | |
17 | - if [ -n "$CC" ]; then $CC --version; fi | |
18 | - if [ -n "$AS" ]; then $AS --version; fi | |
19 | - ./tools/test_autorun.sh "$TEST_TYPE" | |
20 | ||
21 | --- | |
22 | kind: pipeline | |
23 | name: arm64-linux-gcc-4.7 | |
24 | ||
25 | platform: | |
26 | os: linux | |
27 | arch: arm64 | |
28 | ||
29 | steps: | |
30 | - name: arm64-linux-gcc-4.7 | |
31 | image: ubuntu:xenial | |
32 | environment: | |
33 | C_COMPILER: gcc-4.7 | |
34 | commands: | |
35 | - if [ -n "$C_COMPILER" ]; then export CC="$C_COMPILER"; fi | |
36 | - if [ -n "$AS_ASSEMBL" ]; then export AS="$AS_ASSEMBL"; fi | |
37 | - apt-get -qq update | |
38 | - apt-get install -qq -y build-essential git indent libtool libz-dev software-properties-common yasm autoconf | |
39 | - add-apt-repository -y ppa:ubuntu-toolchain-r/test | |
40 | - apt-get -qq update | |
41 | - apt-get install -qq -y g++-4.7 | |
42 | - if [ -n "$CC" ]; then $CC --version; fi | |
43 | - if [ -n "$AS" ]; then $AS --version; fi | |
44 | - ./tools/test_autorun.sh "$TEST_TYPE" | |
45 | ||
46 | --- | |
47 | kind: pipeline | |
48 | name: arm64-linux-gcc-6 | |
49 | ||
50 | platform: | |
51 | os: linux | |
52 | arch: arm64 | |
53 | ||
54 | steps: | |
55 | - name: arm64-linux-gcc-6 | |
56 | image: debian:9 | |
57 | environment: | |
58 | C_COMPILER: gcc | |
59 | commands: | |
60 | - if [ -n "$C_COMPILER" ]; then export CC="$C_COMPILER"; fi | |
61 | - if [ -n "$AS_ASSEMBL" ]; then export AS="$AS_ASSEMBL"; fi | |
62 | - apt-get -q update | |
63 | - apt-get install -y build-essential git indent libtool libz-dev software-properties-common yasm autoconf | |
64 | - if [ -n "$CC" ]; then $CC --version; fi | |
65 | - if [ -n "$AS" ]; then $AS --version; fi | |
66 | - ./tools/test_autorun.sh "$TEST_TYPE" | |
67 | ||
68 | --- | |
69 | kind: pipeline | |
70 | name: arm64-linux-extended-tests | |
71 | ||
72 | platform: | |
73 | os: linux | |
74 | arch: arm64 | |
75 | ||
76 | steps: | |
77 | - name: arm64-linux-extended-tests | |
78 | image: ubuntu:xenial | |
79 | environment: | |
80 | TEST_TYPE: ext | |
81 | commands: | |
82 | - if [ -n "$C_COMPILER" ]; then export CC="$C_COMPILER"; fi | |
83 | - if [ -n "$AS_ASSEMBL" ]; then export AS="$AS_ASSEMBL"; fi | |
84 | - apt-get -qq update | |
85 | - apt-get install -qq -y build-essential git indent libtool libz-dev software-properties-common yasm autoconf | |
86 | - if [ -n "$CC" ]; then $CC --version; fi | |
87 | - if [ -n "$AS" ]; then $AS --version; fi | |
88 | - ./tools/test_autorun.sh "$TEST_TYPE" |
12 | 12 | include \ |
13 | 13 | README.md \ |
14 | 14 | CONTRIBUTING.md \ |
15 | Release_notes.txt | |
15 | Release_notes.txt \ | |
16 | doc/test.md \ | |
17 | doc/build.md | |
16 | 18 | |
17 | 19 | EXCLUDE = include/test.h include/types.h include/unaligned.h |
18 | 20 | EXCLUDE_PATTERNS = */include/*_multibinary.h |
50 | 50 | |
51 | 51 | # LIB version info not necessarily the same as package version |
52 | 52 | LIBISAL_CURRENT=2 |
53 | LIBISAL_REVISION=29 | |
53 | LIBISAL_REVISION=30 | |
54 | 54 | LIBISAL_AGE=0 |
55 | 55 | |
56 | 56 | lib_LTLIBRARIES = libisal.la |
116 | 116 | @echo Completed run: $< |
117 | 117 | |
118 | 118 | # Support for yasm/nasm/gas |
119 | if INTEL_CET_ENABLED | |
120 | export CET_LD=$(LD) | |
121 | endif | |
119 | 122 | if USE_YASM |
123 | if INTEL_CET_ENABLED | |
124 | as_filter = ${srcdir}/tools/yasm-cet-filter.sh | |
125 | else | |
120 | 126 | as_filter = ${srcdir}/tools/yasm-filter.sh |
121 | 127 | endif |
128 | endif | |
122 | 129 | if USE_NASM |
130 | if INTEL_CET_ENABLED | |
131 | as_filter = ${srcdir}/tools/nasm-cet-filter.sh | |
132 | else | |
123 | 133 | as_filter = ${srcdir}/tools/nasm-filter.sh |
134 | endif | |
124 | 135 | endif |
125 | 136 | if CPU_AARCH64 |
126 | 137 | as_filter = $(CC) -D__ASSEMBLY__ |
128 | 139 | |
129 | 140 | CCAS = $(as_filter) |
130 | 141 | EXTRA_DIST += tools/yasm-filter.sh tools/nasm-filter.sh |
142 | EXTRA_DIST += tools/yasm-cet-filter.sh tools/nasm-cet-filter.sh | |
131 | 143 | |
132 | 144 | AM_CFLAGS = ${my_CFLAGS} ${INCLUDE} $(src_include) ${D} |
133 | 145 | if CPU_AARCH64 |
0 | 0 | ######################################################################## |
1 | # Copyright(c) 2011-2016 Intel Corporation All rights reserved. | |
1 | # Copyright(c) 2011-2017 Intel Corporation All rights reserved. | |
2 | 2 | # |
3 | 3 | # Redistribution and use in source and binary forms, with or without |
4 | 4 | # modification, are permitted provided that the following conditions |
26 | 26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
27 | 27 | ######################################################################## |
28 | 28 | |
29 | objs = \ | |
29 | # This file can be auto-regenerated with $make -f Makefile.unx Makefile.nmake | |
30 | ||
31 | objs = \ | |
30 | 32 | bin\ec_base.obj \ |
33 | bin\raid_base.obj \ | |
34 | bin\crc_base.obj \ | |
35 | bin\crc64_base.obj \ | |
36 | bin\igzip.obj \ | |
37 | bin\hufftables_c.obj \ | |
38 | bin\igzip_base.obj \ | |
39 | bin\igzip_icf_base.obj \ | |
40 | bin\adler32_base.obj \ | |
41 | bin\flatten_ll.obj \ | |
42 | bin\encode_df.obj \ | |
43 | bin\igzip_icf_body.obj \ | |
44 | bin\huff_codes.obj \ | |
45 | bin\igzip_inflate.obj \ | |
46 | bin\mem_zero_detect_base.obj \ | |
31 | 47 | bin\ec_highlevel_func.obj \ |
32 | bin\ec_multibinary.obj \ | |
33 | bin\gf_2vect_dot_prod_avx.obj \ | |
34 | bin\gf_2vect_dot_prod_avx2.obj \ | |
35 | bin\gf_2vect_dot_prod_avx512.obj \ | |
36 | bin\gf_2vect_dot_prod_sse.obj \ | |
37 | bin\gf_2vect_mad_avx.obj \ | |
38 | bin\gf_2vect_mad_avx2.obj \ | |
39 | bin\gf_2vect_mad_avx512.obj \ | |
40 | bin\gf_2vect_mad_sse.obj \ | |
41 | bin\gf_3vect_dot_prod_avx.obj \ | |
42 | bin\gf_3vect_dot_prod_avx2.obj \ | |
43 | bin\gf_3vect_dot_prod_avx512.obj \ | |
44 | bin\gf_3vect_dot_prod_sse.obj \ | |
45 | bin\gf_3vect_mad_avx.obj \ | |
46 | bin\gf_3vect_mad_avx2.obj \ | |
47 | bin\gf_3vect_mad_avx512.obj \ | |
48 | bin\gf_3vect_mad_sse.obj \ | |
49 | bin\gf_4vect_dot_prod_avx.obj \ | |
50 | bin\gf_4vect_dot_prod_avx2.obj \ | |
51 | bin\gf_4vect_dot_prod_avx512.obj \ | |
52 | bin\gf_4vect_dot_prod_sse.obj \ | |
53 | bin\gf_4vect_mad_avx.obj \ | |
54 | bin\gf_4vect_mad_avx2.obj \ | |
55 | bin\gf_4vect_mad_avx512.obj \ | |
56 | bin\gf_4vect_mad_sse.obj \ | |
57 | bin\gf_5vect_dot_prod_avx.obj \ | |
58 | bin\gf_5vect_dot_prod_avx2.obj \ | |
59 | bin\gf_5vect_dot_prod_sse.obj \ | |
60 | bin\gf_5vect_mad_avx.obj \ | |
61 | bin\gf_5vect_mad_avx2.obj \ | |
62 | bin\gf_5vect_mad_sse.obj \ | |
63 | bin\gf_6vect_dot_prod_avx.obj \ | |
64 | bin\gf_6vect_dot_prod_avx2.obj \ | |
65 | bin\gf_6vect_dot_prod_sse.obj \ | |
66 | bin\gf_6vect_mad_avx.obj \ | |
67 | bin\gf_6vect_mad_avx2.obj \ | |
68 | bin\gf_6vect_mad_sse.obj \ | |
48 | bin\gf_vect_mul_sse.obj \ | |
49 | bin\gf_vect_mul_avx.obj \ | |
50 | bin\gf_vect_dot_prod_sse.obj \ | |
69 | 51 | bin\gf_vect_dot_prod_avx.obj \ |
70 | 52 | bin\gf_vect_dot_prod_avx2.obj \ |
53 | bin\gf_2vect_dot_prod_sse.obj \ | |
54 | bin\gf_3vect_dot_prod_sse.obj \ | |
55 | bin\gf_4vect_dot_prod_sse.obj \ | |
56 | bin\gf_5vect_dot_prod_sse.obj \ | |
57 | bin\gf_6vect_dot_prod_sse.obj \ | |
58 | bin\gf_2vect_dot_prod_avx.obj \ | |
59 | bin\gf_3vect_dot_prod_avx.obj \ | |
60 | bin\gf_4vect_dot_prod_avx.obj \ | |
61 | bin\gf_5vect_dot_prod_avx.obj \ | |
62 | bin\gf_6vect_dot_prod_avx.obj \ | |
63 | bin\gf_2vect_dot_prod_avx2.obj \ | |
64 | bin\gf_3vect_dot_prod_avx2.obj \ | |
65 | bin\gf_4vect_dot_prod_avx2.obj \ | |
66 | bin\gf_5vect_dot_prod_avx2.obj \ | |
67 | bin\gf_6vect_dot_prod_avx2.obj \ | |
68 | bin\gf_vect_mad_sse.obj \ | |
69 | bin\gf_2vect_mad_sse.obj \ | |
70 | bin\gf_3vect_mad_sse.obj \ | |
71 | bin\gf_4vect_mad_sse.obj \ | |
72 | bin\gf_5vect_mad_sse.obj \ | |
73 | bin\gf_6vect_mad_sse.obj \ | |
74 | bin\gf_vect_mad_avx.obj \ | |
75 | bin\gf_2vect_mad_avx.obj \ | |
76 | bin\gf_3vect_mad_avx.obj \ | |
77 | bin\gf_4vect_mad_avx.obj \ | |
78 | bin\gf_5vect_mad_avx.obj \ | |
79 | bin\gf_6vect_mad_avx.obj \ | |
80 | bin\gf_vect_mad_avx2.obj \ | |
81 | bin\gf_2vect_mad_avx2.obj \ | |
82 | bin\gf_3vect_mad_avx2.obj \ | |
83 | bin\gf_4vect_mad_avx2.obj \ | |
84 | bin\gf_5vect_mad_avx2.obj \ | |
85 | bin\gf_6vect_mad_avx2.obj \ | |
86 | bin\ec_multibinary.obj \ | |
71 | 87 | bin\gf_vect_dot_prod_avx512.obj \ |
72 | bin\gf_vect_dot_prod_sse.obj \ | |
73 | bin\gf_vect_mad_avx.obj \ | |
74 | bin\gf_vect_mad_avx2.obj \ | |
88 | bin\gf_2vect_dot_prod_avx512.obj \ | |
89 | bin\gf_3vect_dot_prod_avx512.obj \ | |
90 | bin\gf_4vect_dot_prod_avx512.obj \ | |
91 | bin\gf_5vect_dot_prod_avx512.obj \ | |
92 | bin\gf_6vect_dot_prod_avx512.obj \ | |
75 | 93 | bin\gf_vect_mad_avx512.obj \ |
76 | bin\gf_vect_mad_sse.obj \ | |
77 | bin\gf_vect_mul_avx.obj \ | |
78 | bin\gf_vect_mul_sse.obj \ | |
94 | bin\gf_2vect_mad_avx512.obj \ | |
95 | bin\gf_3vect_mad_avx512.obj \ | |
96 | bin\gf_4vect_mad_avx512.obj \ | |
97 | bin\gf_5vect_mad_avx512.obj \ | |
98 | bin\gf_6vect_mad_avx512.obj \ | |
99 | bin\xor_gen_sse.obj \ | |
100 | bin\pq_gen_sse.obj \ | |
101 | bin\xor_check_sse.obj \ | |
79 | 102 | bin\pq_check_sse.obj \ |
80 | 103 | bin\pq_gen_avx.obj \ |
104 | bin\xor_gen_avx.obj \ | |
81 | 105 | bin\pq_gen_avx2.obj \ |
106 | bin\xor_gen_avx512.obj \ | |
82 | 107 | bin\pq_gen_avx512.obj \ |
83 | bin\pq_gen_sse.obj \ | |
84 | bin\raid_base.obj \ | |
85 | 108 | bin\raid_multibinary.obj \ |
86 | bin\xor_check_sse.obj \ | |
87 | bin\xor_gen_avx.obj \ | |
88 | bin\xor_gen_avx512.obj \ | |
89 | bin\xor_gen_sse.obj \ | |
90 | 109 | bin\crc16_t10dif_01.obj \ |
91 | 110 | bin\crc16_t10dif_by4.obj \ |
92 | 111 | bin\crc16_t10dif_02.obj \ |
112 | bin\crc16_t10dif_by16_10.obj \ | |
93 | 113 | bin\crc16_t10dif_copy_by4.obj \ |
94 | 114 | bin\crc16_t10dif_copy_by4_02.obj \ |
95 | 115 | bin\crc32_ieee_01.obj \ |
96 | 116 | bin\crc32_ieee_02.obj \ |
97 | 117 | bin\crc32_ieee_by4.obj \ |
118 | bin\crc32_ieee_by16_10.obj \ | |
119 | bin\crc32_iscsi_01.obj \ | |
98 | 120 | bin\crc32_iscsi_00.obj \ |
99 | bin\crc32_iscsi_01.obj \ | |
100 | bin\crc64_base.obj \ | |
121 | bin\crc32_iscsi_by16_10.obj \ | |
122 | bin\crc_multibinary.obj \ | |
123 | bin\crc64_multibinary.obj \ | |
124 | bin\crc64_ecma_refl_by8.obj \ | |
125 | bin\crc64_ecma_refl_by16_10.obj \ | |
101 | 126 | bin\crc64_ecma_norm_by8.obj \ |
102 | bin\crc64_ecma_refl_by8.obj \ | |
127 | bin\crc64_ecma_norm_by16_10.obj \ | |
128 | bin\crc64_iso_refl_by8.obj \ | |
129 | bin\crc64_iso_refl_by16_10.obj \ | |
103 | 130 | bin\crc64_iso_norm_by8.obj \ |
104 | bin\crc64_iso_refl_by8.obj \ | |
131 | bin\crc64_iso_norm_by16_10.obj \ | |
132 | bin\crc64_jones_refl_by8.obj \ | |
133 | bin\crc64_jones_refl_by16_10.obj \ | |
105 | 134 | bin\crc64_jones_norm_by8.obj \ |
106 | bin\crc64_jones_refl_by8.obj \ | |
107 | bin\crc64_multibinary.obj \ | |
108 | bin\crc_base.obj \ | |
109 | bin\adler32_base.obj \ | |
110 | bin\crc_multibinary.obj \ | |
111 | bin\huff_codes.obj \ | |
112 | bin\hufftables_c.obj \ | |
113 | bin\igzip.obj \ | |
114 | bin\igzip_base.obj \ | |
135 | bin\crc64_jones_norm_by16_10.obj \ | |
136 | bin\crc32_gzip_refl_by8.obj \ | |
137 | bin\crc32_gzip_refl_by8_02.obj \ | |
138 | bin\crc32_gzip_refl_by16_10.obj \ | |
115 | 139 | bin\igzip_body.obj \ |
116 | bin\igzip_decode_block_stateless_01.obj \ | |
117 | bin\igzip_decode_block_stateless_04.obj \ | |
118 | 140 | bin\igzip_finish.obj \ |
119 | bin\flatten_ll.obj \ | |
120 | bin\encode_df.obj \ | |
121 | bin\encode_df_04.obj \ | |
122 | bin\proc_heap.obj \ | |
123 | 141 | bin\igzip_icf_body_h1_gr_bt.obj \ |
124 | 142 | bin\igzip_icf_finish.obj \ |
125 | bin\igzip_icf_base.obj \ | |
126 | bin\igzip_inflate.obj \ | |
127 | bin\igzip_inflate_multibinary.obj \ | |
143 | bin\rfc1951_lookup.obj \ | |
144 | bin\adler32_sse.obj \ | |
145 | bin\adler32_avx2_4.obj \ | |
128 | 146 | bin\igzip_multibinary.obj \ |
129 | 147 | bin\igzip_update_histogram_01.obj \ |
130 | 148 | bin\igzip_update_histogram_04.obj \ |
131 | bin\rfc1951_lookup.obj \ | |
132 | bin\crc32_gzip_refl_by8.obj \ | |
133 | bin\crc32_gzip_refl_by8_02.obj \ | |
134 | bin\crc32_gzip_refl_by16_10.obj \ | |
135 | bin\adler32_sse.obj \ | |
136 | bin\adler32_avx2_4.obj \ | |
149 | bin\igzip_decode_block_stateless_01.obj \ | |
150 | bin\igzip_decode_block_stateless_04.obj \ | |
151 | bin\igzip_inflate_multibinary.obj \ | |
152 | bin\encode_df_04.obj \ | |
153 | bin\encode_df_06.obj \ | |
154 | bin\proc_heap.obj \ | |
137 | 155 | bin\igzip_deflate_hash.obj \ |
156 | bin\igzip_gen_icf_map_lh1_06.obj \ | |
138 | 157 | bin\igzip_gen_icf_map_lh1_04.obj \ |
139 | bin\igzip_gen_icf_map_lh1_06.obj \ | |
140 | 158 | bin\igzip_set_long_icf_fg_04.obj \ |
141 | 159 | bin\igzip_set_long_icf_fg_06.obj \ |
142 | bin\igzip_icf_body.obj \ | |
143 | 160 | bin\mem_zero_detect_avx.obj \ |
144 | bin\mem_zero_detect_base.obj \ | |
145 | bin\mem_multibinary.obj \ | |
146 | bin\mem_zero_detect_sse.obj | |
147 | ||
148 | INCLUDES = -I./ -Ierasure_code/ -Iraid/ -Icrc/ -Iigzip/ -Iinclude/ -Imem/ | |
149 | LINKFLAGS = /nologo | |
150 | CFLAGS = -O2 -D NDEBUG /nologo -D_USE_MATH_DEFINES -Qstd=c99 $(INCLUDES) $(D) | |
151 | AFLAGS = -f win64 $(INCLUDES) $(D) | |
152 | CC = icl | |
153 | AS = yasm | |
161 | bin\mem_zero_detect_sse.obj \ | |
162 | bin\mem_multibinary.obj | |
163 | ||
164 | INCLUDES = -I./ -Ierasure_code/ -Iraid/ -Icrc/ -Iigzip/ -Iprograms/ -Imem/ -Iinclude/ -Itests/fuzz/ -Iexamples/ec/ | |
165 | # Modern asm feature level, consider upgrading nasm/yasm before decreasing feature_level | |
166 | FEAT_FLAGS = -DHAVE_AS_KNOWS_AVX512 -DAS_FEATURE_LEVEL=10 | |
167 | CFLAGS_REL = -O2 -DNDEBUG /Z7 /MD /Gy | |
168 | CFLAGS_DBG = -Od -DDEBUG /Z7 /MDd | |
169 | LINKFLAGS = -nologo -incremental:no -debug | |
170 | CFLAGS = $(CFLAGS_REL) -nologo -D_USE_MATH_DEFINES $(FEAT_FLAGS) $(INCLUDES) $(D) | |
171 | AFLAGS = -f win64 $(FEAT_FLAGS) $(INCLUDES) $(D) | |
172 | CC = cl | |
173 | # or CC = icl -Qstd=c99 | |
174 | AS = nasm | |
154 | 175 | |
155 | 176 | lib: bin static dll |
156 | 177 | static: bin isa-l_static.lib |
164 | 185 | << |
165 | 186 | |
166 | 187 | isa-l.dll: $(objs) |
167 | link -out:$@ -dll -def:isa-l.def @<< | |
188 | link -out:$@ -dll -def:isa-l.def $(LINKFLAGS) @<< | |
168 | 189 | $? |
169 | 190 | << |
170 | 191 | |
188 | 209 | {igzip}.asm.obj: |
189 | 210 | $(AS) $(AFLAGS) -o $@ $? |
190 | 211 | |
212 | {programs}.c.obj: | |
213 | $(CC) $(CFLAGS) /c -Fo$@ $? | |
214 | {programs}.asm.obj: | |
215 | $(AS) $(AFLAGS) -o $@ $? | |
216 | ||
191 | 217 | {mem}.c.obj: |
192 | 218 | $(CC) $(CFLAGS) /c -Fo$@ $? |
193 | 219 | {mem}.asm.obj: |
194 | 220 | $(AS) $(AFLAGS) -o $@ $? |
195 | 221 | |
222 | ||
196 | 223 | # Examples |
197 | ex = xor_example.exe crc_simple_test.exe crc64_example.exe igzip_example.exe igzip_sync_flush_example.exe | |
224 | ex = \ | |
225 | xor_example.exe \ | |
226 | crc_simple_test.exe \ | |
227 | crc64_example.exe \ | |
228 | igzip_example.exe \ | |
229 | igzip_sync_flush_example.exe \ | |
230 | ec_simple_example.exe \ | |
231 | ec_piggyback_example.exe | |
232 | ||
198 | 233 | ex: lib $(ex) |
199 | 234 | |
200 | 235 | $(ex): $(@B).obj |
214 | 249 | pq_check_test.exe \ |
215 | 250 | crc16_t10dif_test.exe \ |
216 | 251 | crc16_t10dif_copy_test.exe \ |
252 | crc64_funcs_test.exe \ | |
217 | 253 | crc32_funcs_test.exe \ |
218 | crc64_funcs_test.exe \ | |
254 | igzip_rand_test.exe \ | |
219 | 255 | igzip_wrapper_hdr_test.exe \ |
220 | igzip_rand_test.exe \ | |
256 | checksum32_funcs_test.exe \ | |
221 | 257 | mem_zero_detect_test.exe |
222 | 258 | |
223 | 259 | checks: lib $(checks) |
243 | 279 | gf_vect_dot_prod_1tbl.exe \ |
244 | 280 | erasure_code_perf.exe \ |
245 | 281 | erasure_code_base_perf.exe \ |
246 | erasure_code_sse_perf.exe \ | |
247 | 282 | erasure_code_update_perf.exe \ |
248 | 283 | xor_gen_perf.exe \ |
249 | 284 | pq_gen_perf.exe \ |
250 | 285 | crc16_t10dif_perf.exe \ |
286 | crc16_t10dif_copy_perf.exe \ | |
287 | crc16_t10dif_op_perf.exe \ | |
251 | 288 | crc32_ieee_perf.exe \ |
252 | 289 | crc32_iscsi_perf.exe \ |
253 | igzip_perf.exe \ | |
254 | igzip_sync_flush_perf.exe \ | |
290 | crc64_funcs_perf.exe \ | |
255 | 291 | crc32_gzip_refl_perf.exe \ |
292 | adler32_perf.exe \ | |
256 | 293 | mem_zero_detect_perf.exe |
257 | 294 | |
258 | 295 | perfs: lib $(perfs) |
259 | 296 | $(perfs): $(@B).obj |
297 | ||
298 | progs = \ | |
299 | igzip.exe | |
300 | ||
301 | progs: lib $(progs) | |
302 | igzip.exe: programs\igzip_cli.obj | |
303 | link /out:$@ $(LINKFLAGS) isa-l.lib $? | |
260 | 304 | |
261 | 305 | clean: |
262 | 306 | -if exist *.obj del *.obj |
263 | 307 | -if exist bin\*.obj del bin\*.obj |
264 | 308 | -if exist isa-l_static.lib del isa-l_static.lib |
265 | 309 | -if exist *.exe del *.exe |
310 | -if exist *.pdb del *.pdb | |
266 | 311 | -if exist isa-l.lib del isa-l.lib |
267 | 312 | -if exist isa-l.dll del isa-l.dll |
313 | -if exist isa-l.exp del isa-l.exp | |
268 | 314 | |
269 | 315 | zlib.lib: |
316 | igzip_perf.exe: zlib.lib | |
270 | 317 | igzip_inflate_test.exe: zlib.lib |
50 | 50 | lib_name := bin/isa-l.a |
51 | 51 | |
52 | 52 | include make.inc |
53 | include tools/gen_nmake.mk | |
53 | 54 | |
54 | 55 | VPATH = . $(units) include tests/fuzz examples/ec |
1 | 1 | ================================================= |
2 | 2 | |
3 | 3 | [![Build Status](https://travis-ci.org/intel/isa-l.svg?branch=master)](https://travis-ci.org/intel/isa-l) |
4 | [![Package on conda-forge](https://img.shields.io/conda/v/conda-forge/isa-l.svg)](https://anaconda.org/conda-forge/isa-l) | |
4 | 5 | |
5 | 6 | ISA-L is a collection of optimized low-level functions targeting storage |
6 | 7 | applications. ISA-L includes: |
60 | 61 | |
61 | 62 | nmake -f Makefile.nmake |
62 | 63 | |
64 | or see [details on setting up environment here](doc/build.md). | |
65 | ||
63 | 66 | ### Other make targets |
64 | 67 | Other targets include: |
65 | 68 | * `make check` : create and run tests |
0 | v2.29 Intel Intelligent Storage Acceleration Library Release Notes | |
0 | v2.30 Intel Intelligent Storage Acceleration Library Release Notes | |
1 | 1 | ================================================================== |
2 | 2 | |
3 | 3 | RELEASE NOTE CONTENTS |
14 | 14 | |
15 | 15 | 2. FIXED ISSUES |
16 | 16 | --------------- |
17 | v2.30 | |
18 | ||
19 | * Intel CET support. | |
20 | * Windows nasm support fix. | |
21 | ||
17 | 22 | v2.28 |
18 | 23 | |
19 | 24 | * Fix documentation on gf_vect_mad(). Min length listed as 32 instead of |
108 | 113 | |
109 | 114 | 3. CHANGE LOG & FEATURES ADDED |
110 | 115 | ------------------------------ |
116 | v2.30 | |
117 | ||
118 | * Igzip compression enhancements. | |
119 | - New functions for dictionary acceleration. Split dictionary processing and | |
120 | resetting can greatly accelerate the performance of compressing many small | |
121 | files with a dictionary. | |
122 | - New static level 0 header decode tables. Accelerates decompressing small | |
123 | files that are level 0 compressed by skipping the known header parsing. | |
124 | - New feature for igzip cli tool: support for concatenated .gz files. On | |
125 | decompression, igzip will process a series of independent, concatenated .gz | |
126 | files into one output stream. | |
127 | ||
128 | * CRC Improvements | |
129 | - New vclmul version of crc32_iscsi(). | |
130 | - Updates for aarch64. | |
131 | ||
111 | 132 | v2.29 |
112 | 133 | |
113 | 134 | * CRC Improvements |
2 | 2 | |
3 | 3 | AC_PREREQ(2.69) |
4 | 4 | AC_INIT([libisal], |
5 | [2.29.0], | |
5 | [2.30.0], | |
6 | 6 | [sg.support.isal@intel.com], |
7 | 7 | [isa-l], |
8 | 8 | [http://01.org/storage-acceleration-library]) |
50 | 50 | |
51 | 51 | # Check for programs |
52 | 52 | AC_PROG_CC_STDC |
53 | AC_PROG_LD | |
53 | 54 | AC_USE_SYSTEM_EXTENSIONS |
54 | 55 | AM_SILENT_RULES([yes]) |
55 | 56 | LT_INIT |
67 | 68 | |
68 | 69 | # If this build is for x86, look for yasm and nasm |
69 | 70 | if test x"$is_x86" = x"yes"; then |
71 | AC_MSG_CHECKING([whether Intel CET is enabled]) | |
72 | AC_TRY_COMPILE([],[ | |
73 | #ifndef __CET__ | |
74 | # error CET is not enabled | |
75 | #endif], | |
76 | [AC_MSG_RESULT([yes]) | |
77 | intel_cet_enabled=yes], | |
78 | [AC_MSG_RESULT([no]) | |
79 | intel_cet_enabled=no]) | |
80 | ||
81 | ||
70 | 82 | # Pick an assembler yasm or nasm |
71 | 83 | if test x"$AS" = x""; then |
72 | 84 | # Check for yasm and yasm features |
248 | 260 | AM_CONDITIONAL(DARWIN, test "x" = "y") |
249 | 261 | fi |
250 | 262 | |
263 | AM_CONDITIONAL(INTEL_CET_ENABLED, [test x"$intel_cet_enabled" = x"yes"]) | |
251 | 264 | |
252 | 265 | # Check for header files |
253 | 266 | AC_CHECK_HEADERS([limits.h stdint.h stdlib.h string.h]) |
49 | 49 | crc/crc32_ieee_by16_10.asm \ |
50 | 50 | crc/crc32_iscsi_01.asm \ |
51 | 51 | crc/crc32_iscsi_00.asm \ |
52 | crc/crc32_iscsi_by16_10.asm \ | |
52 | 53 | crc/crc_multibinary.asm \ |
53 | 54 | crc/crc64_multibinary.asm \ |
54 | 55 | crc/crc64_ecma_refl_by8.asm \ |
0 | 0 | ######################################################################## |
1 | # Copyright(c) 2019 Arm Corporation All rights reserved. | |
1 | # Copyright(c) 2020 Arm Corporation All rights reserved. | |
2 | 2 | # |
3 | 3 | # Redistribution and use in source and binary forms, with or without |
4 | 4 | # modification, are permitted provided that the following conditions |
33 | 33 | lsrc_aarch64 += \ |
34 | 34 | crc/aarch64/crc16_t10dif_pmull.S \ |
35 | 35 | crc/aarch64/crc16_t10dif_copy_pmull.S \ |
36 | crc/aarch64/crc32_iscsi_refl_pmull.S \ | |
37 | crc/aarch64/crc32_iscsi_refl_hw_fold.S \ | |
38 | 36 | crc/aarch64/crc32_ieee_norm_pmull.S \ |
39 | crc/aarch64/crc32_gzip_refl_pmull.S \ | |
40 | crc/aarch64/crc32_gzip_refl_hw_fold.S \ | |
41 | 37 | crc/aarch64/crc64_ecma_refl_pmull.S \ |
42 | 38 | crc/aarch64/crc64_ecma_norm_pmull.S \ |
43 | 39 | crc/aarch64/crc64_iso_refl_pmull.S \ |
44 | 40 | crc/aarch64/crc64_iso_norm_pmull.S \ |
45 | 41 | crc/aarch64/crc64_jones_refl_pmull.S \ |
46 | 42 | crc/aarch64/crc64_jones_norm_pmull.S |
43 | ||
44 | #CRC32/CRC32C for micro-architecture | |
45 | lsrc_aarch64 += \ | |
46 | crc/aarch64/crc32_iscsi_refl_pmull.S \ | |
47 | crc/aarch64/crc32_gzip_refl_pmull.S \ | |
48 | crc/aarch64/crc32_iscsi_3crc_fold.S \ | |
49 | crc/aarch64/crc32_gzip_refl_3crc_fold.S \ | |
50 | crc/aarch64/crc32_iscsi_crc_ext.S \ | |
51 | crc/aarch64/crc32_gzip_refl_crc_ext.S \ | |
52 | crc/aarch64/crc32_mix_default.S \ | |
53 | crc/aarch64/crc32c_mix_default.S \ | |
54 | crc/aarch64/crc32_mix_neoverse_n1.S \ | |
55 | crc/aarch64/crc32c_mix_neoverse_n1.S | |
56 |
0 | /********************************************************************** | |
1 | Copyright(c) 2020 Arm Corporation All rights reserved. | |
2 | ||
3 | Redistribution and use in source and binary forms, with or without | |
4 | modification, are permitted provided that the following conditions | |
5 | are met: | |
6 | * Redistributions of source code must retain the above copyright | |
7 | notice, this list of conditions and the following disclaimer. | |
8 | * Redistributions in binary form must reproduce the above copyright | |
9 | notice, this list of conditions and the following disclaimer in | |
10 | the documentation and/or other materials provided with the | |
11 | distribution. | |
12 | * Neither the name of Arm Corporation nor the names of its | |
13 | contributors may be used to endorse or promote products derived | |
14 | from this software without specific prior written permission. | |
15 | ||
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
17 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
18 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
19 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
20 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
21 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
22 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
23 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
24 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
27 | **********************************************************************/ | |
28 | ||
29 | ||
30 | ||
31 | ||
32 | .macro crc32_hw_common poly_type | |
33 | ||
34 | .ifc \poly_type,crc32 | |
35 | mvn wCRC,wCRC | |
36 | .endif | |
37 | cbz LEN, .zero_length_ret | |
38 | tbz BUF, 0, .align_short | |
39 | ldrb wdata,[BUF],1 | |
40 | sub LEN,LEN,1 | |
41 | crc32_u8 wCRC,wCRC,wdata | |
42 | .align_short: | |
43 | tst BUF,2 | |
44 | ccmp LEN,1,0,ne | |
45 | bhi .align_short_2 | |
46 | tst BUF,4 | |
47 | ccmp LEN,3,0,ne | |
48 | bhi .align_word | |
49 | ||
50 | .align_finish: | |
51 | ||
52 | cmp LEN, 63 | |
53 | bls .loop_16B | |
54 | .loop_64B: | |
55 | ldp data0, data1, [BUF],#16 | |
56 | prfm pldl2keep,[BUF,2048] | |
57 | sub LEN,LEN,#64 | |
58 | ldp data2, data3, [BUF],#16 | |
59 | prfm pldl1keep,[BUF,256] | |
60 | cmp LEN,#64 | |
61 | crc32_u64 wCRC, wCRC, data0 | |
62 | crc32_u64 wCRC, wCRC, data1 | |
63 | ldp data0, data1, [BUF],#16 | |
64 | crc32_u64 wCRC, wCRC, data2 | |
65 | crc32_u64 wCRC, wCRC, data3 | |
66 | ldp data2, data3, [BUF],#16 | |
67 | crc32_u64 wCRC, wCRC, data0 | |
68 | crc32_u64 wCRC, wCRC, data1 | |
69 | crc32_u64 wCRC, wCRC, data2 | |
70 | crc32_u64 wCRC, wCRC, data3 | |
71 | bge .loop_64B | |
72 | ||
73 | .loop_16B: | |
74 | cmp LEN, 15 | |
75 | bls .less_16B | |
76 | ldp data0, data1, [BUF],#16 | |
77 | sub LEN,LEN,#16 | |
78 | cmp LEN,15 | |
79 | crc32_u64 wCRC, wCRC, data0 | |
80 | crc32_u64 wCRC, wCRC, data1 | |
81 | bls .less_16B | |
82 | ldp data0, data1, [BUF],#16 | |
83 | sub LEN,LEN,#16 | |
84 | cmp LEN,15 | |
85 | crc32_u64 wCRC, wCRC, data0 | |
86 | crc32_u64 wCRC, wCRC, data1 | |
87 | bls .less_16B | |
88 | ldp data0, data1, [BUF],#16 | |
89 | sub LEN,LEN,#16 //MUST less than 16B | |
90 | crc32_u64 wCRC, wCRC, data0 | |
91 | crc32_u64 wCRC, wCRC, data1 | |
92 | .less_16B: | |
93 | cmp LEN, 7 | |
94 | bls .less_8B | |
95 | ldr data0, [BUF], 8 | |
96 | sub LEN, LEN, #8 | |
97 | crc32_u64 wCRC, wCRC, data0 | |
98 | .less_8B: | |
99 | cmp LEN, 3 | |
100 | bls .less_4B | |
101 | ldr wdata, [BUF], 4 | |
102 | sub LEN, LEN, #4 | |
103 | crc32_u32 wCRC, wCRC, wdata | |
104 | .less_4B: | |
105 | cmp LEN, 1 | |
106 | bls .less_2B | |
107 | ldrh wdata, [BUF], 2 | |
108 | sub LEN, LEN, #2 | |
109 | crc32_u16 wCRC, wCRC, wdata | |
110 | .less_2B: | |
111 | cbz LEN, .zero_length_ret | |
112 | ldrb wdata, [BUF] | |
113 | crc32_u8 wCRC, wCRC, wdata | |
114 | .zero_length_ret: | |
115 | .ifc \poly_type,crc32 | |
116 | mvn w0, wCRC | |
117 | .else | |
118 | mov w0, wCRC | |
119 | .endif | |
120 | ret | |
121 | .align_short_2: | |
122 | ldrh wdata, [BUF], 2 | |
123 | sub LEN, LEN, 2 | |
124 | tst BUF, 4 | |
125 | crc32_u16 wCRC, wCRC, wdata | |
126 | ccmp LEN, 3, 0, ne | |
127 | bls .align_finish | |
128 | .align_word: | |
129 | ldr wdata, [BUF], 4 | |
130 | sub LEN, LEN, #4 | |
131 | crc32_u32 wCRC, wCRC, wdata | |
132 | b .align_finish | |
133 | .endm | |
134 | ||
135 | .macro crc32_3crc_fold poly_type | |
136 | .ifc \poly_type,crc32 | |
137 | mvn wCRC,wCRC | |
138 | .endif | |
139 | cbz LEN, .zero_length_ret | |
140 | tbz BUF, 0, .align_short | |
141 | ldrb wdata,[BUF],1 | |
142 | sub LEN,LEN,1 | |
143 | crc32_u8 wCRC,wCRC,wdata | |
144 | .align_short: | |
145 | tst BUF,2 | |
146 | ccmp LEN,1,0,ne | |
147 | bhi .align_short_2 | |
148 | tst BUF,4 | |
149 | ccmp LEN,3,0,ne | |
150 | bhi .align_word | |
151 | ||
152 | .align_finish: | |
153 | cmp LEN,1023 | |
154 | adr const_adr, .Lconstants | |
155 | bls 1f | |
156 | ldp dconst0,dconst1,[const_adr] | |
157 | 2: | |
158 | ldr crc0_data0,[ptr_crc0],8 | |
159 | prfm pldl2keep,[ptr_crc0,3*1024-8] | |
160 | mov crc1,0 | |
161 | mov crc2,0 | |
162 | add ptr_crc1,ptr_crc0,336 | |
163 | add ptr_crc2,ptr_crc0,336*2 | |
164 | crc32_u64 crc0,crc0,crc0_data0 | |
165 | .set offset,0 | |
166 | .set ptr_offset,8 | |
167 | .rept 5 | |
168 | ldp crc0_data0,crc0_data1,[ptr_crc0],16 | |
169 | ldp crc1_data0,crc1_data1,[ptr_crc1],16 | |
170 | .set offset,offset+64 | |
171 | .set ptr_offset,ptr_offset+16 | |
172 | prfm pldl2keep,[ptr_crc0,3*1024-ptr_offset+offset] | |
173 | crc32_u64 crc0,crc0,crc0_data0 | |
174 | crc32_u64 crc0,crc0,crc0_data1 | |
175 | ldp crc2_data0,crc2_data1,[ptr_crc2],16 | |
176 | crc32_u64 crc1,crc1,crc1_data0 | |
177 | crc32_u64 crc1,crc1,crc1_data1 | |
178 | crc32_u64 crc2,crc2,crc2_data0 | |
179 | crc32_u64 crc2,crc2,crc2_data1 | |
180 | .endr | |
181 | .set l1_offset,0 | |
182 | .rept 10 | |
183 | ldp crc0_data0,crc0_data1,[ptr_crc0],16 | |
184 | ldp crc1_data0,crc1_data1,[ptr_crc1],16 | |
185 | .set offset,offset+64 | |
186 | .set ptr_offset,ptr_offset+16 | |
187 | prfm pldl2keep,[ptr_crc0,3*1024-ptr_offset+offset] | |
188 | prfm pldl1keep,[ptr_crc0,2*1024-ptr_offset+l1_offset] | |
189 | .set l1_offset,l1_offset+64 | |
190 | crc32_u64 crc0,crc0,crc0_data0 | |
191 | crc32_u64 crc0,crc0,crc0_data1 | |
192 | ldp crc2_data0,crc2_data1,[ptr_crc2],16 | |
193 | crc32_u64 crc1,crc1,crc1_data0 | |
194 | crc32_u64 crc1,crc1,crc1_data1 | |
195 | crc32_u64 crc2,crc2,crc2_data0 | |
196 | crc32_u64 crc2,crc2,crc2_data1 | |
197 | .endr | |
198 | ||
199 | .rept 6 | |
200 | ldp crc0_data0,crc0_data1,[ptr_crc0],16 | |
201 | ldp crc1_data0,crc1_data1,[ptr_crc1],16 | |
202 | .set ptr_offset,ptr_offset+16 | |
203 | prfm pldl1keep,[ptr_crc0,2*1024-ptr_offset+l1_offset] | |
204 | .set l1_offset,l1_offset+64 | |
205 | crc32_u64 crc0,crc0,crc0_data0 | |
206 | crc32_u64 crc0,crc0,crc0_data1 | |
207 | ldp crc2_data0,crc2_data1,[ptr_crc2],16 | |
208 | crc32_u64 crc1,crc1,crc1_data0 | |
209 | crc32_u64 crc1,crc1,crc1_data1 | |
210 | crc32_u64 crc2,crc2,crc2_data0 | |
211 | crc32_u64 crc2,crc2,crc2_data1 | |
212 | .endr | |
213 | ldr crc2_data0,[ptr_crc2] | |
214 | fmov dtmp0,xcrc0 | |
215 | fmov dtmp1,xcrc1 | |
216 | crc32_u64 crc2,crc2,crc2_data0 | |
217 | add ptr_crc0,ptr_crc0,1024-(336+8) | |
218 | pmull vtmp0.1q,vtmp0.1d,vconst0.1d | |
219 | sub LEN,LEN,1024 | |
220 | pmull vtmp1.1q,vtmp1.1d,vconst1.1d | |
221 | cmp LEN,1024 | |
222 | fmov xcrc0,dtmp0 | |
223 | fmov xcrc1,dtmp1 | |
224 | crc32_u64 crc0,wzr,xcrc0 | |
225 | crc32_u64 crc1,wzr,xcrc1 | |
226 | ||
227 | eor crc0,crc0,crc2 | |
228 | eor crc0,crc0,crc1 | |
229 | ||
230 | bhs 2b | |
231 | 1: | |
232 | cmp LEN, 63 | |
233 | bls .loop_16B | |
234 | .loop_64B: | |
235 | ldp data0, data1, [BUF],#16 | |
236 | sub LEN,LEN,#64 | |
237 | ldp data2, data3, [BUF],#16 | |
238 | cmp LEN,#64 | |
239 | crc32_u64 wCRC, wCRC, data0 | |
240 | crc32_u64 wCRC, wCRC, data1 | |
241 | ldp data0, data1, [BUF],#16 | |
242 | crc32_u64 wCRC, wCRC, data2 | |
243 | crc32_u64 wCRC, wCRC, data3 | |
244 | ldp data2, data3, [BUF],#16 | |
245 | crc32_u64 wCRC, wCRC, data0 | |
246 | crc32_u64 wCRC, wCRC, data1 | |
247 | crc32_u64 wCRC, wCRC, data2 | |
248 | crc32_u64 wCRC, wCRC, data3 | |
249 | bge .loop_64B | |
250 | ||
251 | .loop_16B: | |
252 | cmp LEN, 15 | |
253 | bls .less_16B | |
254 | ldp data0, data1, [BUF],#16 | |
255 | sub LEN,LEN,#16 | |
256 | cmp LEN,15 | |
257 | crc32_u64 wCRC, wCRC, data0 | |
258 | crc32_u64 wCRC, wCRC, data1 | |
259 | bls .less_16B | |
260 | ldp data0, data1, [BUF],#16 | |
261 | sub LEN,LEN,#16 | |
262 | cmp LEN,15 | |
263 | crc32_u64 wCRC, wCRC, data0 | |
264 | crc32_u64 wCRC, wCRC, data1 | |
265 | bls .less_16B | |
266 | ldp data0, data1, [BUF],#16 | |
267 | sub LEN,LEN,#16 //MUST less than 16B | |
268 | crc32_u64 wCRC, wCRC, data0 | |
269 | crc32_u64 wCRC, wCRC, data1 | |
270 | .less_16B: | |
271 | cmp LEN, 7 | |
272 | bls .less_8B | |
273 | ldr data0, [BUF], 8 | |
274 | sub LEN, LEN, #8 | |
275 | crc32_u64 wCRC, wCRC, data0 | |
276 | .less_8B: | |
277 | cmp LEN, 3 | |
278 | bls .less_4B | |
279 | ldr wdata, [BUF], 4 | |
280 | sub LEN, LEN, #4 | |
281 | crc32_u32 wCRC, wCRC, wdata | |
282 | .less_4B: | |
283 | cmp LEN, 1 | |
284 | bls .less_2B | |
285 | ldrh wdata, [BUF], 2 | |
286 | sub LEN, LEN, #2 | |
287 | crc32_u16 wCRC, wCRC, wdata | |
288 | .less_2B: | |
289 | cbz LEN, .zero_length_ret | |
290 | ldrb wdata, [BUF] | |
291 | crc32_u8 wCRC, wCRC, wdata | |
292 | .zero_length_ret: | |
293 | .ifc \poly_type,crc32 | |
294 | mvn w0, wCRC | |
295 | .else | |
296 | mov w0, wCRC | |
297 | .endif | |
298 | ret | |
299 | .align_short_2: | |
300 | ldrh wdata, [BUF], 2 | |
301 | sub LEN, LEN, 2 | |
302 | tst BUF, 4 | |
303 | crc32_u16 wCRC, wCRC, wdata | |
304 | ccmp LEN, 3, 0, ne | |
305 | bls .align_finish | |
306 | .align_word: | |
307 | ldr wdata, [BUF], 4 | |
308 | sub LEN, LEN, #4 | |
309 | crc32_u32 wCRC, wCRC, wdata | |
310 | b .align_finish | |
311 | .Lconstants: | |
312 | .ifc \poly_type,crc32 | |
313 | .quad 0xb486819b | |
314 | .quad 0x76278617 | |
315 | .else | |
316 | .quad 0xe417f38a | |
317 | .quad 0x8f158014 | |
318 | .endif | |
319 | ||
320 | .endm |
0 | /********************************************************************** | |
1 | Copyright(c) 2020 Arm Corporation All rights reserved. | |
2 | ||
3 | Redistribution and use in source and binary forms, with or without | |
4 | modification, are permitted provided that the following conditions | |
5 | are met: | |
6 | * Redistributions of source code must retain the above copyright | |
7 | notice, this list of conditions and the following disclaimer. | |
8 | * Redistributions in binary form must reproduce the above copyright | |
9 | notice, this list of conditions and the following disclaimer in | |
10 | the documentation and/or other materials provided with the | |
11 | distribution. | |
12 | * Neither the name of Arm Corporation nor the names of its | |
13 | contributors may be used to endorse or promote products derived | |
14 | from this software without specific prior written permission. | |
15 | ||
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
17 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
18 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
19 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
20 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
21 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
22 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
23 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
24 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
27 | **********************************************************************/ | |
28 | ||
29 | ||
30 | ||
31 | ||
32 | .macro crc32_hw_common poly_type | |
33 | cbz LEN, .zero_length_ret | |
34 | .ifc \poly_type,crc32 | |
35 | mvn wCRC,wCRC | |
36 | .endif | |
37 | tbz BUF, 0, .align_short | |
38 | ldrb wdata,[BUF],1 | |
39 | sub LEN,LEN,1 | |
40 | crc32_u8 wCRC,wCRC,wdata | |
41 | .align_short: | |
42 | tst BUF,2 | |
43 | ccmp LEN,1,0,ne | |
44 | bhi .align_short_2 | |
45 | tst BUF,4 | |
46 | ccmp LEN,3,0,ne | |
47 | bhi .align_word | |
48 | ||
49 | .align_finish: | |
50 | ||
51 | cmp LEN, 63 | |
52 | bls .loop_16B | |
53 | .loop_64B: | |
54 | ldp data0, data1, [BUF],#16 | |
55 | sub LEN,LEN,#64 | |
56 | ldp data2, data3, [BUF],#16 | |
57 | cmp LEN,#64 | |
58 | crc32_u64 wCRC, wCRC, data0 | |
59 | crc32_u64 wCRC, wCRC, data1 | |
60 | ldp data0, data1, [BUF],#16 | |
61 | crc32_u64 wCRC, wCRC, data2 | |
62 | crc32_u64 wCRC, wCRC, data3 | |
63 | ldp data2, data3, [BUF],#16 | |
64 | crc32_u64 wCRC, wCRC, data0 | |
65 | crc32_u64 wCRC, wCRC, data1 | |
66 | crc32_u64 wCRC, wCRC, data2 | |
67 | crc32_u64 wCRC, wCRC, data3 | |
68 | bge .loop_64B | |
69 | ||
70 | .loop_16B: | |
71 | cmp LEN, 15 | |
72 | bls .less_16B | |
73 | ldp data0, data1, [BUF],#16 | |
74 | sub LEN,LEN,#16 | |
75 | cmp LEN,15 | |
76 | crc32_u64 wCRC, wCRC, data0 | |
77 | crc32_u64 wCRC, wCRC, data1 | |
78 | bls .less_16B | |
79 | ldp data0, data1, [BUF],#16 | |
80 | sub LEN,LEN,#16 | |
81 | cmp LEN,15 | |
82 | crc32_u64 wCRC, wCRC, data0 | |
83 | crc32_u64 wCRC, wCRC, data1 | |
84 | bls .less_16B | |
85 | ldp data0, data1, [BUF],#16 | |
86 | sub LEN,LEN,#16 //MUST less than 16B | |
87 | crc32_u64 wCRC, wCRC, data0 | |
88 | crc32_u64 wCRC, wCRC, data1 | |
89 | .less_16B: | |
90 | cmp LEN, 7 | |
91 | bls .less_8B | |
92 | ldr data0, [BUF], 8 | |
93 | sub LEN, LEN, #8 | |
94 | crc32_u64 wCRC, wCRC, data0 | |
95 | .less_8B: | |
96 | cmp LEN, 3 | |
97 | bls .less_4B | |
98 | ldr wdata, [BUF], 4 | |
99 | sub LEN, LEN, #4 | |
100 | crc32_u32 wCRC, wCRC, wdata | |
101 | .less_4B: | |
102 | cmp LEN, 1 | |
103 | bls .less_2B | |
104 | ldrh wdata, [BUF], 2 | |
105 | sub LEN, LEN, #2 | |
106 | crc32_u16 wCRC, wCRC, wdata | |
107 | .less_2B: | |
108 | cbz LEN, .finish_exit | |
109 | ldrb wdata, [BUF] | |
110 | crc32_u8 wCRC, wCRC, wdata | |
111 | .finish_exit: | |
112 | .ifc \poly_type,crc32 | |
113 | mvn w0, wCRC | |
114 | .else | |
115 | mov w0, wCRC | |
116 | .endif | |
117 | ret | |
118 | .zero_length_ret: | |
119 | mov w0, wCRC | |
120 | ret | |
121 | .align_short_2: | |
122 | ldrh wdata, [BUF], 2 | |
123 | sub LEN, LEN, 2 | |
124 | tst BUF, 4 | |
125 | crc32_u16 wCRC, wCRC, wdata | |
126 | ccmp LEN, 3, 0, ne | |
127 | bls .align_finish | |
128 | .align_word: | |
129 | ldr wdata, [BUF], 4 | |
130 | sub LEN, LEN, #4 | |
131 | crc32_u32 wCRC, wCRC, wdata | |
132 | b .align_finish | |
133 | ||
134 | .endm |
0 | /********************************************************************** | |
1 | Copyright(c) 2020 Arm Corporation All rights reserved. | |
2 | ||
3 | Redistribution and use in source and binary forms, with or without | |
4 | modification, are permitted provided that the following conditions | |
5 | are met: | |
6 | * Redistributions of source code must retain the above copyright | |
7 | notice, this list of conditions and the following disclaimer. | |
8 | * Redistributions in binary form must reproduce the above copyright | |
9 | notice, this list of conditions and the following disclaimer in | |
10 | the documentation and/or other materials provided with the | |
11 | distribution. | |
12 | * Neither the name of Arm Corporation nor the names of its | |
13 | contributors may be used to endorse or promote products derived | |
14 | from this software without specific prior written permission. | |
15 | ||
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
17 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
18 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
19 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
20 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
21 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
22 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
23 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
24 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
27 | **********************************************************************/ | |
28 | ||
29 | ||
30 | .macro declare_var_vector_reg name:req,reg:req | |
31 | \name\()_q .req q\reg | |
32 | \name\()_v .req v\reg | |
33 | \name\()_s .req s\reg | |
34 | \name\()_d .req d\reg | |
35 | .endm | |
36 | declare_var_vector_reg k1k2,20 | |
37 | declare_var_vector_reg k3k4,21 | |
38 | declare_var_vector_reg poly,22 | |
39 | declare_var_vector_reg k5k0,23 | |
40 | declare_var_vector_reg mask,24 | |
41 | declare_var_vector_reg fold_poly,25 | |
42 | ||
43 | declare_var_vector_reg tmp0,0 | |
44 | declare_var_vector_reg tmp1,1 | |
45 | declare_var_vector_reg tmp2,2 | |
46 | declare_var_vector_reg tmp3,3 | |
47 | declare_var_vector_reg tmp4,4 | |
48 | declare_var_vector_reg tmp5,5 | |
49 | declare_var_vector_reg tmp6,6 | |
50 | declare_var_vector_reg tmp7,7 | |
51 | declare_var_vector_reg pmull_data0,16 | |
52 | declare_var_vector_reg pmull_data1,17 | |
53 | declare_var_vector_reg pmull_data2,18 | |
54 | declare_var_vector_reg pmull_data3,19 | |
55 | ||
56 | vzr .req v26 | |
57 | ||
58 | const_addr .req x3 | |
59 | crc_blk_ptr .req x4 | |
60 | pmull_blk_ptr .req x5 | |
61 | crc_data0 .req x6 | |
62 | crc_data1 .req x7 | |
63 | crc_data2 .req x9 | |
64 | crc_data3 .req x10 | |
65 | wPmull .req w11 | |
66 | xPmull .req x11 | |
67 | ||
68 | data0 .req x4 | |
69 | data1 .req x5 | |
70 | data2 .req x6 | |
71 | data3 .req x7 | |
72 | wdata .req w4 | |
73 | ||
74 | .macro pmull_fold | |
75 | ||
76 | pmull2 tmp4_v.1q, tmp0_v.2d, k1k2_v.2d | |
77 | pmull2 tmp5_v.1q, tmp1_v.2d, k1k2_v.2d | |
78 | pmull2 tmp6_v.1q, tmp2_v.2d, k1k2_v.2d | |
79 | pmull2 tmp7_v.1q, tmp3_v.2d, k1k2_v.2d | |
80 | ||
81 | pmull tmp0_v.1q, tmp0_v.1d, k1k2_v.1d | |
82 | pmull tmp1_v.1q, tmp1_v.1d, k1k2_v.1d | |
83 | pmull tmp2_v.1q, tmp2_v.1d, k1k2_v.1d | |
84 | pmull tmp3_v.1q, tmp3_v.1d, k1k2_v.1d | |
85 | ld1 {pmull_data0_v.16b-pmull_data3_v.16b},[pmull_blk_ptr],#64 | |
86 | crc32_u64 wCRC,wCRC,crc_data0 | |
87 | crc32_u64 wCRC,wCRC,crc_data1 | |
88 | ldp crc_data0,crc_data1,[crc_blk_ptr],16 | |
89 | crc32_u64 wCRC,wCRC,crc_data2 | |
90 | crc32_u64 wCRC,wCRC,crc_data3 | |
91 | ldp crc_data2,crc_data3,[crc_blk_ptr],16 | |
92 | ||
93 | eor tmp0_v.16b, tmp0_v.16b, tmp4_v.16b | |
94 | eor tmp1_v.16b, tmp1_v.16b, tmp5_v.16b | |
95 | eor tmp2_v.16b, tmp2_v.16b, tmp6_v.16b | |
96 | eor tmp3_v.16b, tmp3_v.16b, tmp7_v.16b | |
97 | ||
98 | crc32_u64 wCRC,wCRC,crc_data0 | |
99 | crc32_u64 wCRC,wCRC,crc_data1 | |
100 | ldp crc_data0,crc_data1,[crc_blk_ptr],16 | |
101 | crc32_u64 wCRC,wCRC,crc_data2 | |
102 | crc32_u64 wCRC,wCRC,crc_data3 | |
103 | ldp crc_data2,crc_data3,[crc_blk_ptr],16 | |
104 | eor tmp0_v.16b, tmp0_v.16b, v16.16b | |
105 | eor tmp1_v.16b, tmp1_v.16b, v17.16b | |
106 | eor tmp2_v.16b, tmp2_v.16b, v18.16b | |
107 | eor tmp3_v.16b, tmp3_v.16b, v19.16b | |
108 | crc32_u64 wCRC,wCRC,crc_data0 | |
109 | crc32_u64 wCRC,wCRC,crc_data1 | |
110 | ldp crc_data0,crc_data1,[crc_blk_ptr],16 | |
111 | crc32_u64 wCRC,wCRC,crc_data2 | |
112 | crc32_u64 wCRC,wCRC,crc_data3 | |
113 | ldp crc_data2,crc_data3,[crc_blk_ptr],16 | |
114 | .endm | |
115 | ||
116 | ||
117 | ||
118 | .macro crc32_common_mix poly_type | |
119 | .set MIX_BLK_SIZE,2048 | |
120 | ||
121 | .ifc \poly_type,crc32 | |
122 | mvn wCRC,wCRC | |
123 | .endif | |
124 | cmp LEN,MIX_BLK_SIZE-1 | |
125 | adr const_addr, .Lconstants | |
126 | bls start_final | |
127 | ld1 {k1k2_v.16b,k3k4_v.16b,poly_v.16b},[const_addr],#48 | |
128 | movi vzr.16b, #0 | |
129 | ld1 {k5k0_v.8b,mask_v.8b,fold_poly_v.8b},[const_addr] | |
130 | ||
131 | loop_2048: | |
132 | ld1 {tmp0_v.16b-tmp3_v.16b}, [BUF] | |
133 | add pmull_blk_ptr,BUF,0x40 | |
134 | add crc_blk_ptr, BUF,512 | |
135 | mov tmp4_v.16b,vzr.16b | |
136 | fmov tmp4_s, wCRC | |
137 | ldp crc_data0,crc_data1,[crc_blk_ptr],16 | |
138 | eor tmp0_v.16b,tmp0_v.16b,tmp4_v.16b | |
139 | mov wCRC, 0 | |
140 | sub LEN,LEN,MIX_BLK_SIZE | |
141 | cmp LEN,MIX_BLK_SIZE | |
142 | ldp crc_data2,crc_data3,[crc_blk_ptr],16 | |
143 | crc32_u64 wCRC,wCRC,crc_data0 | |
144 | crc32_u64 wCRC,wCRC,crc_data1 | |
145 | ldp crc_data0,crc_data1,[crc_blk_ptr],16 | |
146 | crc32_u64 wCRC,wCRC,crc_data2 | |
147 | crc32_u64 wCRC,wCRC,crc_data3 | |
148 | ldp crc_data2,crc_data3,[crc_blk_ptr],16 | |
149 | ||
150 | pmull_fold | |
151 | pmull_fold | |
152 | pmull_fold | |
153 | pmull_fold | |
154 | pmull_fold | |
155 | pmull_fold | |
156 | pmull_fold | |
157 | ||
158 | /* Folding cache line into 128bit */ | |
159 | pmull2 tmp4_v.1q, tmp0_v.2d, k3k4_v.2d | |
160 | crc32_u64 wCRC,wCRC,crc_data0 | |
161 | crc32_u64 wCRC,wCRC,crc_data1 | |
162 | ldp crc_data0,crc_data1,[crc_blk_ptr],16 | |
163 | crc32_u64 wCRC,wCRC,crc_data2 | |
164 | crc32_u64 wCRC,wCRC,crc_data3 | |
165 | ldp crc_data2,crc_data3,[crc_blk_ptr],16 | |
166 | crc32_u64 wCRC,wCRC,crc_data0 | |
167 | crc32_u64 wCRC,wCRC,crc_data1 | |
168 | ldp crc_data0,crc_data1,[crc_blk_ptr],16 | |
169 | pmull tmp0_v.1q, tmp0_v.1d, k3k4_v.1d | |
170 | crc32_u64 wCRC,wCRC,crc_data2 | |
171 | crc32_u64 wCRC,wCRC,crc_data3 | |
172 | ldp crc_data2,crc_data3,[crc_blk_ptr],16 | |
173 | crc32_u64 wCRC,wCRC,crc_data0 | |
174 | crc32_u64 wCRC,wCRC,crc_data1 | |
175 | ldp crc_data0,crc_data1,[crc_blk_ptr],16 | |
176 | crc32_u64 wCRC,wCRC,crc_data2 | |
177 | crc32_u64 wCRC,wCRC,crc_data3 | |
178 | ldp crc_data2,crc_data3,[crc_blk_ptr],16 | |
179 | eor tmp0_v.16b, tmp0_v.16b, tmp4_v.16b | |
180 | crc32_u64 wCRC,wCRC,crc_data0 | |
181 | crc32_u64 wCRC,wCRC,crc_data1 | |
182 | ldp crc_data0,crc_data1,[crc_blk_ptr],16 | |
183 | eor tmp0_v.16b, tmp0_v.16b, tmp1_v.16b | |
184 | crc32_u64 wCRC,wCRC,crc_data2 | |
185 | crc32_u64 wCRC,wCRC,crc_data3 | |
186 | ldp crc_data2,crc_data3,[crc_blk_ptr],16 | |
187 | crc32_u64 wCRC,wCRC,crc_data0 | |
188 | crc32_u64 wCRC,wCRC,crc_data1 | |
189 | ldp crc_data0,crc_data1,[crc_blk_ptr],16 | |
190 | pmull2 tmp4_v.1q, tmp0_v.2d, k3k4_v.2d | |
191 | crc32_u64 wCRC,wCRC,crc_data2 | |
192 | crc32_u64 wCRC,wCRC,crc_data3 | |
193 | ldp crc_data2,crc_data3,[crc_blk_ptr],16 | |
194 | pmull tmp0_v.1q, tmp0_v.1d, k3k4_v.1d | |
195 | crc32_u64 wCRC,wCRC,crc_data0 | |
196 | crc32_u64 wCRC,wCRC,crc_data1 | |
197 | ldp crc_data0,crc_data1,[crc_blk_ptr],16 | |
198 | eor tmp0_v.16b, tmp0_v.16b, tmp4_v.16b | |
199 | crc32_u64 wCRC,wCRC,crc_data2 | |
200 | crc32_u64 wCRC,wCRC,crc_data3 | |
201 | ldp crc_data2,crc_data3,[crc_blk_ptr],16 | |
202 | crc32_u64 wCRC,wCRC,crc_data0 | |
203 | crc32_u64 wCRC,wCRC,crc_data1 | |
204 | ldp crc_data0,crc_data1,[crc_blk_ptr],16 | |
205 | crc32_u64 wCRC,wCRC,crc_data2 | |
206 | crc32_u64 wCRC,wCRC,crc_data3 | |
207 | ldp crc_data2,crc_data3,[crc_blk_ptr],16 | |
208 | eor tmp0_v.16b, tmp0_v.16b, tmp2_v.16b | |
209 | crc32_u64 wCRC,wCRC,crc_data0 | |
210 | crc32_u64 wCRC,wCRC,crc_data1 | |
211 | ldp crc_data0,crc_data1,[crc_blk_ptr],16 | |
212 | pmull2 tmp4_v.1q, tmp0_v.2d, k3k4_v.2d | |
213 | crc32_u64 wCRC,wCRC,crc_data2 | |
214 | crc32_u64 wCRC,wCRC,crc_data3 | |
215 | ldp crc_data2,crc_data3,[crc_blk_ptr],16 | |
216 | crc32_u64 wCRC,wCRC,crc_data0 | |
217 | crc32_u64 wCRC,wCRC,crc_data1 | |
218 | ldp crc_data0,crc_data1,[crc_blk_ptr],16 | |
219 | crc32_u64 wCRC,wCRC,crc_data2 | |
220 | crc32_u64 wCRC,wCRC,crc_data3 | |
221 | ldp crc_data2,crc_data3,[crc_blk_ptr],16 | |
222 | pmull tmp0_v.1q, tmp0_v.1d, k3k4_v.1d | |
223 | crc32_u64 wCRC,wCRC,crc_data0 | |
224 | crc32_u64 wCRC,wCRC,crc_data1 | |
225 | ldp crc_data0,crc_data1,[crc_blk_ptr],16 | |
226 | crc32_u64 wCRC,wCRC,crc_data2 | |
227 | crc32_u64 wCRC,wCRC,crc_data3 | |
228 | ldp crc_data2,crc_data3,[crc_blk_ptr],16 | |
229 | eor tmp0_v.16b, tmp0_v.16b, tmp4_v.16b | |
230 | crc32_u64 wCRC,wCRC,crc_data0 | |
231 | crc32_u64 wCRC,wCRC,crc_data1 | |
232 | ldp crc_data0,crc_data1,[crc_blk_ptr],16 | |
233 | crc32_u64 wCRC,wCRC,crc_data2 | |
234 | crc32_u64 wCRC,wCRC,crc_data3 | |
235 | ldp crc_data2,crc_data3,[crc_blk_ptr],16 | |
236 | crc32_u64 wCRC,wCRC,crc_data0 | |
237 | crc32_u64 wCRC,wCRC,crc_data1 | |
238 | ldp crc_data0,crc_data1,[crc_blk_ptr],16 | |
239 | eor tmp0_v.16b, tmp0_v.16b, tmp3_v.16b | |
240 | crc32_u64 wCRC,wCRC,crc_data2 | |
241 | crc32_u64 wCRC,wCRC,crc_data3 | |
242 | ldp crc_data2,crc_data3,[crc_blk_ptr],16 | |
243 | crc32_u64 wCRC,wCRC,crc_data0 | |
244 | crc32_u64 wCRC,wCRC,crc_data1 | |
245 | ldp crc_data0,crc_data1,[crc_blk_ptr],16 | |
246 | ||
247 | ||
248 | /** | |
249 | * perform the last 64 bit fold, also | |
250 | * adds 32 zeroes to the input stream | |
251 | */ | |
252 | ext tmp1_v.16b, tmp0_v.16b, tmp0_v.16b, #8 | |
253 | crc32_u64 wCRC,wCRC,crc_data2 | |
254 | crc32_u64 wCRC,wCRC,crc_data3 | |
255 | ldp crc_data2,crc_data3,[crc_blk_ptr],16 | |
256 | crc32_u64 wCRC,wCRC,crc_data0 | |
257 | crc32_u64 wCRC,wCRC,crc_data1 | |
258 | ldp crc_data0,crc_data1,[crc_blk_ptr],16 | |
259 | pmull2 tmp1_v.1q, tmp1_v.2d, k3k4_v.2d | |
260 | crc32_u64 wCRC,wCRC,crc_data2 | |
261 | crc32_u64 wCRC,wCRC,crc_data3 | |
262 | ldp crc_data2,crc_data3,[crc_blk_ptr],16 | |
263 | crc32_u64 wCRC,wCRC,crc_data0 | |
264 | crc32_u64 wCRC,wCRC,crc_data1 | |
265 | ldp crc_data0,crc_data1,[crc_blk_ptr],16 | |
266 | crc32_u64 wCRC,wCRC,crc_data2 | |
267 | crc32_u64 wCRC,wCRC,crc_data3 | |
268 | ldp crc_data2,crc_data3,[crc_blk_ptr],16 | |
269 | ext tmp0_v.16b, tmp0_v.16b, vzr.16b, #8 | |
270 | crc32_u64 wCRC,wCRC,crc_data0 | |
271 | crc32_u64 wCRC,wCRC,crc_data1 | |
272 | ldp crc_data0,crc_data1,[crc_blk_ptr],16 | |
273 | crc32_u64 wCRC,wCRC,crc_data2 | |
274 | crc32_u64 wCRC,wCRC,crc_data3 | |
275 | ldp crc_data2,crc_data3,[crc_blk_ptr],16 | |
276 | eor tmp0_v.16b, tmp0_v.16b, tmp1_v.16b | |
277 | crc32_u64 wCRC,wCRC,crc_data0 | |
278 | crc32_u64 wCRC,wCRC,crc_data1 | |
279 | ldp crc_data0,crc_data1,[crc_blk_ptr],16 | |
280 | crc32_u64 wCRC,wCRC,crc_data2 | |
281 | crc32_u64 wCRC,wCRC,crc_data3 | |
282 | ldp crc_data2,crc_data3,[crc_blk_ptr],16 | |
283 | ||
284 | /* final 32-bit fold */ | |
285 | ext tmp1_v.16b, tmp0_v.16b, vzr.16b, #4 | |
286 | and tmp0_v.16b, tmp0_v.16b, mask_v.16b | |
287 | crc32_u64 wCRC,wCRC,crc_data0 | |
288 | crc32_u64 wCRC,wCRC,crc_data1 | |
289 | ldp crc_data0,crc_data1,[crc_blk_ptr],16 | |
290 | pmull tmp0_v.1q, tmp0_v.1d, k5k0_v.1d | |
291 | crc32_u64 wCRC,wCRC,crc_data2 | |
292 | crc32_u64 wCRC,wCRC,crc_data3 | |
293 | ldp crc_data2,crc_data3,[crc_blk_ptr],16 | |
294 | crc32_u64 wCRC,wCRC,crc_data0 | |
295 | crc32_u64 wCRC,wCRC,crc_data1 | |
296 | ldp crc_data0,crc_data1,[crc_blk_ptr],16 | |
297 | crc32_u64 wCRC,wCRC,crc_data2 | |
298 | crc32_u64 wCRC,wCRC,crc_data3 | |
299 | ldp crc_data2,crc_data3,[crc_blk_ptr],16 | |
300 | eor tmp0_v.16b, tmp0_v.16b, tmp1_v.16b | |
301 | ||
302 | /** | |
303 | * Finish up with the bit-reversed barrett | |
304 | * reduction 64 ==> 32 bits | |
305 | */ | |
306 | crc32_u64 wCRC,wCRC,crc_data0 | |
307 | crc32_u64 wCRC,wCRC,crc_data1 | |
308 | and tmp1_v.16b, tmp0_v.16b, mask_v.16b | |
309 | ldp crc_data0,crc_data1,[crc_blk_ptr],16 | |
310 | ext tmp1_v.16b, vzr.16b, tmp1_v.16b, #8 | |
311 | crc32_u64 wCRC,wCRC,crc_data2 | |
312 | crc32_u64 wCRC,wCRC,crc_data3 | |
313 | pmull2 tmp1_v.1q, tmp1_v.2d, poly_v.2d | |
314 | ldp crc_data2,crc_data3,[crc_blk_ptr],16 | |
315 | crc32_u64 wCRC,wCRC,crc_data0 | |
316 | crc32_u64 wCRC,wCRC,crc_data1 | |
317 | ldp crc_data0,crc_data1,[crc_blk_ptr],16 | |
318 | crc32_u64 wCRC,wCRC,crc_data2 | |
319 | crc32_u64 wCRC,wCRC,crc_data3 | |
320 | and tmp1_v.16b, tmp1_v.16b, mask_v.16b | |
321 | ldp crc_data2,crc_data3,[crc_blk_ptr],16 | |
322 | pmull tmp1_v.1q, tmp1_v.1d, poly_v.1d | |
323 | crc32_u64 wCRC,wCRC,crc_data0 | |
324 | crc32_u64 wCRC,wCRC,crc_data1 | |
325 | ldp crc_data0,crc_data1,[crc_blk_ptr],16 | |
326 | eor tmp0_v.16b, tmp0_v.16b, tmp1_v.16b | |
327 | crc32_u64 wCRC,wCRC,crc_data2 | |
328 | crc32_u64 wCRC,wCRC,crc_data3 | |
329 | mov tmp4_v.16b,vzr.16b | |
330 | mov tmp4_v.s[0], tmp0_v.s[1] | |
331 | ldp crc_data2,crc_data3,[crc_blk_ptr],16 | |
332 | crc32_u64 wCRC,wCRC,crc_data0 | |
333 | crc32_u64 wCRC,wCRC,crc_data1 | |
334 | ldp crc_data0,crc_data1,[crc_blk_ptr],16 | |
335 | crc32_u64 wCRC,wCRC,crc_data2 | |
336 | crc32_u64 wCRC,wCRC,crc_data3 | |
337 | ldp crc_data2,crc_data3,[crc_blk_ptr],16 | |
338 | crc32_u64 wCRC,wCRC,crc_data0 | |
339 | crc32_u64 wCRC,wCRC,crc_data1 | |
340 | ldp crc_data0,crc_data1,[crc_blk_ptr],16 | |
341 | crc32_u64 wCRC,wCRC,crc_data2 | |
342 | crc32_u64 wCRC,wCRC,crc_data3 | |
343 | ldp crc_data2,crc_data3,[crc_blk_ptr],16 | |
344 | crc32_u64 wCRC,wCRC,crc_data0 | |
345 | crc32_u64 wCRC,wCRC,crc_data1 | |
346 | ldp crc_data0,crc_data1,[crc_blk_ptr],16 | |
347 | crc32_u64 wCRC,wCRC,crc_data2 | |
348 | crc32_u64 wCRC,wCRC,crc_data3 | |
349 | ldp crc_data2,crc_data3,[crc_blk_ptr],16 | |
350 | ||
351 | crc32_u64 wCRC,wCRC,crc_data0 | |
352 | crc32_u64 wCRC,wCRC,crc_data1 | |
353 | crc32_u64 wCRC,wCRC,crc_data2 | |
354 | crc32_u64 wCRC,wCRC,crc_data3 | |
355 | ||
356 | pmull tmp4_v.1q, tmp4_v.1d, fold_poly_v.1d | |
357 | add BUF,BUF,MIX_BLK_SIZE | |
358 | fmov xPmull, tmp4_d | |
359 | crc32_u64 wPmull, wzr, xPmull | |
360 | eor wCRC, wPmull, wCRC | |
361 | bge loop_2048 | |
362 | start_final: | |
363 | cmp LEN, 63 | |
364 | bls .loop_16B | |
365 | .loop_64B: | |
366 | ldp data0, data1, [BUF],#16 | |
367 | sub LEN,LEN,#64 | |
368 | ldp data2, data3, [BUF],#16 | |
369 | cmp LEN,#64 | |
370 | crc32_u64 wCRC, wCRC, data0 | |
371 | crc32_u64 wCRC, wCRC, data1 | |
372 | ldp data0, data1, [BUF],#16 | |
373 | crc32_u64 wCRC, wCRC, data2 | |
374 | crc32_u64 wCRC, wCRC, data3 | |
375 | ldp data2, data3, [BUF],#16 | |
376 | crc32_u64 wCRC, wCRC, data0 | |
377 | crc32_u64 wCRC, wCRC, data1 | |
378 | crc32_u64 wCRC, wCRC, data2 | |
379 | crc32_u64 wCRC, wCRC, data3 | |
380 | bge .loop_64B | |
381 | ||
382 | .loop_16B: | |
383 | cmp LEN, 15 | |
384 | bls .less_16B | |
385 | ldp data0, data1, [BUF],#16 | |
386 | sub LEN,LEN,#16 | |
387 | cmp LEN,15 | |
388 | crc32_u64 wCRC, wCRC, data0 | |
389 | crc32_u64 wCRC, wCRC, data1 | |
390 | bls .less_16B | |
391 | ldp data0, data1, [BUF],#16 | |
392 | sub LEN,LEN,#16 | |
393 | cmp LEN,15 | |
394 | crc32_u64 wCRC, wCRC, data0 | |
395 | crc32_u64 wCRC, wCRC, data1 | |
396 | bls .less_16B | |
397 | ldp data0, data1, [BUF],#16 | |
398 | sub LEN,LEN,#16 //MUST less than 16B | |
399 | crc32_u64 wCRC, wCRC, data0 | |
400 | crc32_u64 wCRC, wCRC, data1 | |
401 | .less_16B: | |
402 | cmp LEN, 7 | |
403 | bls .less_8B | |
404 | ldr data0, [BUF], 8 | |
405 | sub LEN, LEN, #8 | |
406 | crc32_u64 wCRC, wCRC, data0 | |
407 | .less_8B: | |
408 | cmp LEN, 3 | |
409 | bls .less_4B | |
410 | ldr wdata, [BUF], 4 | |
411 | sub LEN, LEN, #4 | |
412 | crc32_u32 wCRC, wCRC, wdata | |
413 | .less_4B: | |
414 | cmp LEN, 1 | |
415 | bls .less_2B | |
416 | ldrh wdata, [BUF], 2 | |
417 | sub LEN, LEN, #2 | |
418 | crc32_u16 wCRC, wCRC, wdata | |
419 | .less_2B: | |
420 | cbz LEN, .finish_exit | |
421 | ldrb wdata, [BUF] | |
422 | crc32_u8 wCRC, wCRC, wdata | |
423 | .finish_exit: | |
424 | .ifc \poly_type,crc32 | |
425 | mvn w0, wCRC | |
426 | .else | |
427 | mov w0, wCRC | |
428 | .endif | |
429 | ret | |
430 | .endm | |
431 |
0 | ######################################################################## | |
1 | # Copyright(c) 2020 Arm Corporation All rights reserved. | |
2 | # | |
3 | # Redistribution and use in source and binary forms, with or without | |
4 | # modification, are permitted provided that the following conditions | |
5 | # are met: | |
6 | # * Redistributions of source code must retain the above copyright | |
7 | # notice, this list of conditions and the following disclaimer. | |
8 | # * Redistributions in binary form must reproduce the above copyright | |
9 | # notice, this list of conditions and the following disclaimer in | |
10 | # the documentation and/or other materials provided with the | |
11 | # distribution. | |
12 | # * Neither the name of Arm Corporation nor the names of its | |
13 | # contributors may be used to endorse or promote products derived | |
14 | # from this software without specific prior written permission. | |
15 | # | |
16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
17 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
18 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
19 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
20 | # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
21 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
22 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
23 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
24 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
25 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
27 | ######################################################################### | |
28 | #include "crc32_aarch64_common.h" | |
29 | .text | |
30 | .align 6 | |
31 | .arch armv8-a+crc+crypto | |
32 | .macro crc32_u64 dst,src,data | |
33 | crc32x \dst,\src,\data | |
34 | .endm | |
35 | .macro crc32_u32 dst,src,data | |
36 | crc32w \dst,\src,\data | |
37 | .endm | |
38 | .macro crc32_u16 dst,src,data | |
39 | crc32h \dst,\src,\data | |
40 | .endm | |
41 | .macro crc32_u8 dst,src,data | |
42 | crc32b \dst,\src,\data | |
43 | .endm | |
44 | .macro declare_var_vector_reg name:req,reg:req | |
45 | q\name .req q\reg | |
46 | v\name .req v\reg | |
47 | s\name .req s\reg | |
48 | d\name .req d\reg | |
49 | .endm | |
50 | ||
51 | BUF .req x1 | |
52 | ptr_crc0 .req x1 | |
53 | LEN .req x2 | |
54 | wCRC .req w0 | |
55 | crc0 .req w0 | |
56 | xcrc0 .req x0 | |
57 | ||
58 | crc1 .req w3 | |
59 | crc2 .req w4 | |
60 | xcrc1 .req x3 | |
61 | const_adr .req x3 | |
62 | ptr_crc1 .req x6 | |
63 | ptr_crc2 .req x7 | |
64 | crc0_data0 .req x9 | |
65 | crc0_data1 .req x10 | |
66 | crc1_data0 .req x11 | |
67 | crc1_data1 .req x12 | |
68 | crc2_data0 .req x13 | |
69 | crc2_data1 .req x14 | |
70 | ||
71 | wdata .req w3 | |
72 | data0 .req x3 | |
73 | data1 .req x4 | |
74 | data2 .req x5 | |
75 | data3 .req x6 | |
76 | ||
77 | declare_var_vector_reg tmp0,0 | |
78 | declare_var_vector_reg tmp1,1 | |
79 | declare_var_vector_reg const0,2 | |
80 | declare_var_vector_reg const1,3 | |
81 | ||
82 | /** | |
83 | uint32_t crc32_gzip_refl( | |
84 | uint32_t wCRC, | |
85 | const unsigned char *BUF, | |
86 | uint64_t LEN | |
87 | ); | |
88 | */ | |
89 | ||
90 | .global crc32_gzip_refl_3crc_fold | |
91 | .type crc32_gzip_refl_3crc_fold, %function | |
92 | crc32_gzip_refl_3crc_fold: | |
93 | crc32_3crc_fold crc32 | |
94 | .size crc32_gzip_refl_3crc_fold, .-crc32_gzip_refl_3crc_fold |
0 | /********************************************************************** | |
1 | Copyright(c) 2020 Arm Corporation All rights reserved. | |
2 | ||
3 | Redistribution and use in source and binary forms, with or without | |
4 | modification, are permitted provided that the following conditions | |
5 | are met: | |
6 | * Redistributions of source code must retain the above copyright | |
7 | notice, this list of conditions and the following disclaimer. | |
8 | * Redistributions in binary form must reproduce the above copyright | |
9 | notice, this list of conditions and the following disclaimer in | |
10 | the documentation and/or other materials provided with the | |
11 | distribution. | |
12 | * Neither the name of Arm Corporation nor the names of its | |
13 | contributors may be used to endorse or promote products derived | |
14 | from this software without specific prior written permission. | |
15 | ||
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
17 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
18 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
19 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
20 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
21 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
22 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
23 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
24 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
27 | **********************************************************************/ | |
28 | ||
29 | .text | |
30 | .align 6 | |
31 | .arch armv8-a+crc | |
32 | ||
33 | ||
34 | #include "crc32_aarch64_common.h" | |
35 | ||
36 | BUF .req x1 | |
37 | LEN .req x2 | |
38 | wCRC .req w0 | |
39 | data0 .req x4 | |
40 | data1 .req x5 | |
41 | data2 .req x6 | |
42 | data3 .req x7 | |
43 | wdata .req w3 | |
44 | .macro crc32_u64 dst,src,data | |
45 | crc32x \dst,\src,\data | |
46 | .endm | |
47 | .macro crc32_u32 dst,src,data | |
48 | crc32w \dst,\src,\data | |
49 | .endm | |
50 | .macro crc32_u16 dst,src,data | |
51 | crc32h \dst,\src,\data | |
52 | .endm | |
53 | .macro crc32_u8 dst,src,data | |
54 | crc32b \dst,\src,\data | |
55 | .endm | |
56 | ||
57 | /** | |
58 | * uint32_t crc32_gzip_refl_crc_ext(const unsigned char *BUF, | |
59 | * uint64_t LEN,uint32_t wCRC); | |
60 | */ | |
61 | .global crc32_gzip_refl_crc_ext | |
62 | .type crc32_gzip_refl_crc_ext, %function | |
63 | crc32_gzip_refl_crc_ext: | |
64 | crc32_hw_common crc32 | |
65 | .size crc32_gzip_refl_crc_ext, .-crc32_gzip_refl_crc_ext |
0 | ######################################################################## | |
1 | # Copyright(c) 2019 Arm Corporation All rights reserved. | |
2 | # | |
3 | # Redistribution and use in source and binary forms, with or without | |
4 | # modification, are permitted provided that the following conditions | |
5 | # are met: | |
6 | # * Redistributions of source code must retain the above copyright | |
7 | # notice, this list of conditions and the following disclaimer. | |
8 | # * Redistributions in binary form must reproduce the above copyright | |
9 | # notice, this list of conditions and the following disclaimer in | |
10 | # the documentation and/or other materials provided with the | |
11 | # distribution. | |
12 | # * Neither the name of Arm Corporation nor the names of its | |
13 | # contributors may be used to endorse or promote products derived | |
14 | # from this software without specific prior written permission. | |
15 | # | |
16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
17 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
18 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
19 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
20 | # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
21 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
22 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
23 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
24 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
25 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
27 | ######################################################################### | |
28 | ||
29 | .arch armv8-a+crc+crypto | |
30 | .text | |
31 | .align 3 | |
32 | .global crc32_gzip_refl_hw_fold | |
33 | .type crc32_gzip_refl_hw_fold, %function | |
34 | ||
35 | /* uint32_t crc32_gzip_refl_hw_fold(uint32_t seed, const unsigned char *buf, uint64_t len) */ | |
36 | ||
37 | w_seed .req w0 | |
38 | w_crc .req w0 | |
39 | x_buf .req x1 | |
40 | x_len .req x2 | |
41 | ||
42 | x_buf_loop_end .req x10 | |
43 | x_buf_iter .req x10 | |
44 | ||
45 | x_tmp .req x15 | |
46 | w_tmp .req w15 | |
47 | ||
48 | d_c0 .req d3 | |
49 | d_c1 .req d1 | |
50 | v_c0 .req v3 | |
51 | v_c1 .req v1 | |
52 | crc32_gzip_refl_hw_fold: | |
53 | mvn w_seed, w_seed | |
54 | cmp x_len, 1023 | |
55 | mov x_buf_iter, x_buf | |
56 | bls .loop_fold_end | |
57 | ||
58 | sub x_buf_loop_end, x_len, #1024 | |
59 | and x_buf_loop_end, x_buf_loop_end, -1024 | |
60 | add x_buf_loop_end, x_buf_loop_end, 1024 | |
61 | add x_buf_loop_end, x_buf, x_buf_loop_end | |
62 | ||
63 | mov x_tmp, 0x819b | |
64 | movk x_tmp, 0xb486, lsl 16 | |
65 | fmov d_c0, x_tmp | |
66 | ||
67 | mov x_tmp, 0x8617 | |
68 | movk x_tmp, 0x7627, lsl 16 | |
69 | fmov d_c1, x_tmp | |
70 | ||
71 | x_in64 .req x3 | |
72 | w_crc0 .req w0 | |
73 | w_crc1 .req w4 | |
74 | w_crc2 .req w5 | |
75 | ||
76 | d_crc0 .req d4 | |
77 | d_crc1 .req d5 | |
78 | v_crc0 .req v4 | |
79 | v_crc1 .req v5 | |
80 | .align 3 | |
81 | .loop_fold: | |
82 | add x9, x_buf, 336 | |
83 | mov x_in64, x_buf | |
84 | mov w_crc1, 0 | |
85 | mov w_crc2, 0 | |
86 | ||
87 | .align 3 | |
88 | .loop_for: | |
89 | ldr x8, [x_in64] | |
90 | ldr x7, [x_in64, 336] | |
91 | ldr x6, [x_in64, 672] | |
92 | ||
93 | add x_in64, x_in64, 8 | |
94 | cmp x_in64, x9 | |
95 | ||
96 | crc32x w_crc0, w_crc0, x8 | |
97 | crc32x w_crc1, w_crc1, x7 | |
98 | crc32x w_crc2, w_crc2, x6 | |
99 | bne .loop_for | |
100 | ||
101 | uxtw x_tmp, w_crc0 | |
102 | fmov d_crc0, x_tmp | |
103 | pmull v_crc0.1q, v_crc0.1d, v_c0.1d | |
104 | ||
105 | uxtw x_tmp, w_crc1 | |
106 | fmov d_crc1, x_tmp | |
107 | pmull v_crc1.1q, v_crc1.1d, v_c1.1d | |
108 | ||
109 | ldr x_tmp, [x_buf, 1008] | |
110 | crc32x w_crc2, w_crc2, x_tmp | |
111 | ||
112 | fmov x_tmp, d_crc0 | |
113 | crc32x w_crc0, wzr, x_tmp | |
114 | ||
115 | fmov x_tmp, d_crc1 | |
116 | crc32x w_crc1, wzr, x_tmp | |
117 | ||
118 | eor w_crc0, w_crc0, w_crc1 | |
119 | eor w_crc0, w_crc0, w_crc2 | |
120 | ||
121 | ldr x_tmp, [x_buf, 1016] | |
122 | crc32x w_crc0, w_crc0, x_tmp | |
123 | ||
124 | add x_buf, x_buf, 1024 | |
125 | cmp x_buf_loop_end, x_buf | |
126 | bne .loop_fold | |
127 | ||
128 | and x_len, x_len, 1023 | |
129 | ||
130 | x_buf_loop_size8_end .req x3 | |
131 | .loop_fold_end: | |
132 | cmp x_len, 7 | |
133 | bls .size_4 | |
134 | ||
135 | sub x_buf_loop_size8_end, x_len, #8 | |
136 | and x_buf_loop_size8_end, x_buf_loop_size8_end, -8 | |
137 | add x_buf_loop_size8_end, x_buf_loop_size8_end, 8 | |
138 | add x_buf_loop_size8_end, x_buf_iter, x_buf_loop_size8_end | |
139 | ||
140 | .align 3 | |
141 | .loop_size_8: | |
142 | ldr x_tmp, [x_buf_iter], 8 | |
143 | crc32x w_crc, w_crc, x_tmp | |
144 | ||
145 | cmp x_buf_iter, x_buf_loop_size8_end | |
146 | bne .loop_size_8 | |
147 | ||
148 | and x_len, x_len, 7 | |
149 | .size_4: | |
150 | cmp x_len, 3 | |
151 | bls .size_2 | |
152 | ||
153 | ldr w_tmp, [x_buf_iter], 4 | |
154 | crc32w w_crc, w_crc, w_tmp | |
155 | ||
156 | sub x_len, x_len, #4 | |
157 | .size_2: | |
158 | cmp x_len, 1 | |
159 | bls .size_1 | |
160 | ||
161 | ldrh w_tmp, [x_buf_iter], 2 | |
162 | crc32h w_crc, w_crc, w_tmp | |
163 | ||
164 | sub x_len, x_len, #2 | |
165 | .size_1: | |
166 | cbz x_len, .done | |
167 | ||
168 | ldrb w_tmp, [x_buf_iter] | |
169 | crc32b w_crc, w_crc, w_tmp | |
170 | ||
171 | .done: | |
172 | mvn w_crc, w_crc | |
173 | ret | |
174 | ||
175 | .size crc32_gzip_refl_hw_fold, .-crc32_gzip_refl_hw_fold |
0 | ######################################################################## | |
1 | # Copyright(c) 2020 Arm Corporation All rights reserved. | |
2 | # | |
3 | # Redistribution and use in source and binary forms, with or without | |
4 | # modification, are permitted provided that the following conditions | |
5 | # are met: | |
6 | # * Redistributions of source code must retain the above copyright | |
7 | # notice, this list of conditions and the following disclaimer. | |
8 | # * Redistributions in binary form must reproduce the above copyright | |
9 | # notice, this list of conditions and the following disclaimer in | |
10 | # the documentation and/or other materials provided with the | |
11 | # distribution. | |
12 | # * Neither the name of Arm Corporation nor the names of its | |
13 | # contributors may be used to endorse or promote products derived | |
14 | # from this software without specific prior written permission. | |
15 | # | |
16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
17 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
18 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
19 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
20 | # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
21 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
22 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
23 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
24 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
25 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
27 | ######################################################################### | |
28 | ||
29 | ||
30 | .text | |
31 | .align 6 | |
32 | .arch armv8-a+crc+crypto | |
33 | #include "crc32_aarch64_common.h" | |
34 | .macro crc32_u64 dst,src,data | |
35 | crc32cx \dst,\src,\data | |
36 | .endm | |
37 | .macro crc32_u32 dst,src,data | |
38 | crc32cw \dst,\src,\data | |
39 | .endm | |
40 | .macro crc32_u16 dst,src,data | |
41 | crc32ch \dst,\src,\data | |
42 | .endm | |
43 | .macro crc32_u8 dst,src,data | |
44 | crc32cb \dst,\src,\data | |
45 | .endm | |
46 | .macro declare_var_vector_reg name:req,reg:req | |
47 | q\name .req q\reg | |
48 | v\name .req v\reg | |
49 | s\name .req s\reg | |
50 | d\name .req d\reg | |
51 | .endm | |
52 | ||
53 | BUF .req x0 | |
54 | LEN .req x1 | |
55 | wCRC .req w2 | |
56 | crc0 .req w2 | |
57 | crc1 .req w3 | |
58 | crc2 .req w4 | |
59 | xcrc0 .req x2 | |
60 | xcrc1 .req x3 | |
61 | const_adr .req x3 | |
62 | ptr_crc0 .req x0 | |
63 | ptr_crc1 .req x6 | |
64 | ptr_crc2 .req x7 | |
65 | crc0_data0 .req x9 | |
66 | crc0_data1 .req x10 | |
67 | crc1_data0 .req x11 | |
68 | crc1_data1 .req x12 | |
69 | crc2_data0 .req x13 | |
70 | crc2_data1 .req x14 | |
71 | ||
72 | wdata .req w3 | |
73 | data0 .req x3 | |
74 | data1 .req x4 | |
75 | data2 .req x5 | |
76 | data3 .req x6 | |
77 | ||
78 | declare_var_vector_reg tmp0,0 | |
79 | declare_var_vector_reg tmp1,1 | |
80 | declare_var_vector_reg const0,2 | |
81 | declare_var_vector_reg const1,3 | |
82 | ||
83 | /** | |
84 | unsigned int crc32_iscsi( | |
85 | unsigned char *BUF, | |
86 | int LEN, | |
87 | unsigned int wCRC | |
88 | ); | |
89 | ||
90 | */ | |
91 | ||
92 | .global crc32_iscsi_3crc_fold | |
93 | .type crc32_iscsi_3crc_fold, %function | |
94 | crc32_iscsi_3crc_fold: | |
95 | crc32_3crc_fold crc32c | |
96 | .size crc32_iscsi_3crc_fold, .-crc32_iscsi_3crc_fold |
0 | /********************************************************************** | |
1 | Copyright(c) 2020 Arm Corporation All rights reserved. | |
2 | ||
3 | Redistribution and use in source and binary forms, with or without | |
4 | modification, are permitted provided that the following conditions | |
5 | are met: | |
6 | * Redistributions of source code must retain the above copyright | |
7 | notice, this list of conditions and the following disclaimer. | |
8 | * Redistributions in binary form must reproduce the above copyright | |
9 | notice, this list of conditions and the following disclaimer in | |
10 | the documentation and/or other materials provided with the | |
11 | distribution. | |
12 | * Neither the name of Arm Corporation nor the names of its | |
13 | contributors may be used to endorse or promote products derived | |
14 | from this software without specific prior written permission. | |
15 | ||
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
17 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
18 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
19 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
20 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
21 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
22 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
23 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
24 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
27 | **********************************************************************/ | |
28 | ||
29 | .text | |
30 | .align 6 | |
31 | .arch armv8-a+crc | |
32 | ||
33 | ||
34 | #include "crc32_aarch64_common.h" | |
35 | BUF .req x0 | |
36 | LEN .req x1 | |
37 | wCRC .req w2 | |
38 | data0 .req x4 | |
39 | data1 .req x5 | |
40 | data2 .req x6 | |
41 | data3 .req x7 | |
42 | wdata .req w3 | |
43 | .macro crc32_u64 dst,src,data | |
44 | crc32cx \dst,\src,\data | |
45 | .endm | |
46 | .macro crc32_u32 dst,src,data | |
47 | crc32cw \dst,\src,\data | |
48 | .endm | |
49 | .macro crc32_u16 dst,src,data | |
50 | crc32ch \dst,\src,\data | |
51 | .endm | |
52 | .macro crc32_u8 dst,src,data | |
53 | crc32cb \dst,\src,\data | |
54 | .endm | |
55 | ||
56 | /** | |
57 | * uint32_t crc32_iscsi_crc_ext(const unsigned char *BUF, | |
58 | * uint64_t LEN,uint32_t wCRC); | |
59 | */ | |
60 | .global crc32_iscsi_crc_ext | |
61 | .type crc32_iscsi_crc_ext, %function | |
62 | crc32_iscsi_crc_ext: | |
63 | crc32_hw_common crc32c | |
64 | .size crc32_iscsi_crc_ext, .-crc32_iscsi_crc_ext |
0 | ######################################################################## | |
1 | # Copyright(c) 2019 Arm Corporation All rights reserved. | |
2 | # | |
3 | # Redistribution and use in source and binary forms, with or without | |
4 | # modification, are permitted provided that the following conditions | |
5 | # are met: | |
6 | # * Redistributions of source code must retain the above copyright | |
7 | # notice, this list of conditions and the following disclaimer. | |
8 | # * Redistributions in binary form must reproduce the above copyright | |
9 | # notice, this list of conditions and the following disclaimer in | |
10 | # the documentation and/or other materials provided with the | |
11 | # distribution. | |
12 | # * Neither the name of Arm Corporation nor the names of its | |
13 | # contributors may be used to endorse or promote products derived | |
14 | # from this software without specific prior written permission. | |
15 | # | |
16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
17 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
18 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
19 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
20 | # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
21 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
22 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
23 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
24 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
25 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
27 | ######################################################################### | |
28 | ||
29 | .arch armv8-a+crc+crypto | |
30 | .text | |
31 | .align 3 | |
32 | .global crc32_iscsi_refl_hw_fold | |
33 | .type crc32_iscsi_refl_hw_fold, %function | |
34 | ||
35 | /* unsigned int crc32_iscsi_refl_hw_fold(unsigned char *buffer, int len, unsigned int crc_init) */ | |
36 | ||
37 | x_buffer .req x0 | |
38 | w_len .req w1 | |
39 | w_crc_init .req w2 | |
40 | w_crc .req w2 | |
41 | ||
42 | w_len_loop_end .req w9 | |
43 | x_buf_loop_end .req x9 | |
44 | x_buf_iter .req x9 | |
45 | ||
46 | x_tmp .req x15 | |
47 | w_tmp .req w15 | |
48 | ||
49 | w_crc_ret .req w0 | |
50 | crc32_iscsi_refl_hw_fold: | |
51 | cmp w_len, 1023 | |
52 | mov x_buf_iter, x_buffer | |
53 | ble .loop_fold_end | |
54 | ||
55 | sub w10, w_len, #1024 | |
56 | lsr w12, w10, 10 | |
57 | lsl w_len_loop_end, w12, 10 | |
58 | ||
59 | add x_buf_loop_end, x_buf_loop_end, 1024 | |
60 | add x_buf_loop_end, x_buffer, x_buf_loop_end | |
61 | ||
62 | mov x_tmp, 0xf38a | |
63 | movk x_tmp, 0xe417, lsl 16 | |
64 | fmov d3, x_tmp | |
65 | ||
66 | mov x_tmp, 0x8014 | |
67 | movk x_tmp, 0x8f15, lsl 16 | |
68 | fmov d1, x_tmp | |
69 | ||
70 | x_in64 .req x1 | |
71 | w_crc0 .req w2 | |
72 | w_crc1 .req w3 | |
73 | w_crc2 .req w4 | |
74 | .align 3 | |
75 | .loop_fold: | |
76 | add x8, x_buffer, 336 | |
77 | mov x_in64, x_buffer | |
78 | mov w_crc1, 0 | |
79 | mov w_crc2, 0 | |
80 | ||
81 | .align 3 | |
82 | .loop_for: | |
83 | ldr x7, [x_in64] | |
84 | ldr x6, [x_in64, 336] | |
85 | ldr x5, [x_in64, 672] | |
86 | ||
87 | add x_in64, x_in64, 8 | |
88 | cmp x_in64, x8 | |
89 | ||
90 | crc32cx w_crc0, w_crc0, x7 | |
91 | crc32cx w_crc1, w_crc1, x6 | |
92 | crc32cx w_crc2, w_crc2, x5 | |
93 | bne .loop_for | |
94 | ||
95 | uxtw x_tmp, w_crc0 | |
96 | fmov d4, x_tmp | |
97 | pmull v2.1q, v4.1d, v3.1d | |
98 | ||
99 | uxtw x_tmp, w_crc1 | |
100 | fmov d5, x_tmp | |
101 | pmull v5.1q, v5.1d, v1.1d | |
102 | ||
103 | fmov x_tmp, d2 | |
104 | crc32cx w_crc0, wzr, x_tmp | |
105 | ||
106 | fmov x_tmp, d5 | |
107 | crc32cx w_crc1, wzr, x_tmp | |
108 | ||
109 | ldr x_tmp, [x_buffer, 1008] | |
110 | crc32cx w_crc2, w_crc2, x_tmp | |
111 | ||
112 | eor w_crc1, w_crc1, w_crc0 | |
113 | eor w_crc1, w_crc1, w_crc2 | |
114 | ||
115 | ldr x_tmp, [x_buffer, 1016] | |
116 | crc32cx w_crc0, w_crc1, x_tmp | |
117 | ||
118 | add x_buffer, x_buffer, 1024 | |
119 | cmp x_buf_loop_end, x_buffer | |
120 | bne .loop_fold | |
121 | ||
122 | sub w_len, w10, w12, lsl 10 | |
123 | ||
124 | x_buf_loop_size8_end .req x3 | |
125 | .loop_fold_end: | |
126 | cmp w_len, 7 | |
127 | ble .size_4 | |
128 | ||
129 | sub w_len, w_len, #8 | |
130 | lsr w4, w_len, 3 | |
131 | lsl w3, w4, 3 | |
132 | add x_buf_loop_size8_end, x_buf_loop_size8_end, 8 | |
133 | add x_buf_loop_size8_end, x_buf_iter, x_buf_loop_size8_end | |
134 | ||
135 | .align 3 | |
136 | .loop_size_8: | |
137 | ldr x_tmp, [x_buf_iter], 8 | |
138 | crc32cx w_crc, w_crc, x_tmp | |
139 | ||
140 | cmp x_buf_iter, x_buf_loop_size8_end | |
141 | bne .loop_size_8 | |
142 | ||
143 | sub w_len, w_len, w4, lsl 3 | |
144 | .size_4: | |
145 | cmp w_len, 3 | |
146 | ble .size_2 | |
147 | ||
148 | ldr w_tmp, [x_buf_iter], 4 | |
149 | crc32cw w_crc, w_crc, w_tmp | |
150 | sub w_len, w_len, #4 | |
151 | ||
152 | .size_2: | |
153 | cmp w_len, 1 | |
154 | ble .size_1 | |
155 | ||
156 | ldrh w_tmp, [x_buf_iter], 2 | |
157 | crc32ch w_crc, w_crc, w_tmp | |
158 | sub w_len, w_len, #2 | |
159 | ||
160 | .size_1: | |
161 | mov w_crc_ret, w_crc | |
162 | cmp w_len, 1 | |
163 | bne .done | |
164 | ||
165 | ldrb w_tmp, [x_buf_iter] | |
166 | crc32cb w_crc_ret, w_crc, w_tmp | |
167 | ||
168 | .done: | |
169 | ret | |
170 | ||
171 | .size crc32_iscsi_refl_hw_fold, .-crc32_iscsi_refl_hw_fold |
0 | /********************************************************************** | |
1 | Copyright(c) 2020 Arm Corporation All rights reserved. | |
2 | ||
3 | Redistribution and use in source and binary forms, with or without | |
4 | modification, are permitted provided that the following conditions | |
5 | are met: | |
6 | * Redistributions of source code must retain the above copyright | |
7 | notice, this list of conditions and the following disclaimer. | |
8 | * Redistributions in binary form must reproduce the above copyright | |
9 | notice, this list of conditions and the following disclaimer in | |
10 | the documentation and/or other materials provided with the | |
11 | distribution. | |
12 | * Neither the name of Arm Corporation nor the names of its | |
13 | contributors may be used to endorse or promote products derived | |
14 | from this software without specific prior written permission. | |
15 | ||
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
17 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
18 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
19 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
20 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
21 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
22 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
23 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
24 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
27 | **********************************************************************/ | |
28 | ||
29 | .arch armv8-a+crypto+crc | |
30 | .text | |
31 | .align 6 | |
32 | ||
33 | #define CRC32 | |
34 | ||
35 | .macro crc32_u64 dst,src,data | |
36 | crc32x \dst,\src,\data | |
37 | .endm | |
38 | ||
39 | .macro crc32_u32 dst,src,data | |
40 | crc32w \dst,\src,\data | |
41 | .endm | |
42 | ||
43 | .macro crc32_u16 dst,src,data | |
44 | crc32h \dst,\src,\data | |
45 | .endm | |
46 | ||
47 | .macro crc32_u8 dst,src,data | |
48 | crc32b \dst,\src,\data | |
49 | .endm | |
50 | ||
51 | #include "crc32_mix_default_common.S" | |
52 | ||
53 | .global crc32_mix_default | |
54 | .type crc32_mix_default, %function | |
55 | crc32_mix_default: | |
56 | crc32_mix_main_default | |
57 | .size crc32_mix_default, .-crc32_mix_default | |
58 | ||
59 | .section .rodata | |
60 | .align 4 | |
61 | .set lanchor_crc32,. + 0 | |
62 | ||
63 | .type k1k2, %object | |
64 | .size k1k2, 16 | |
65 | k1k2: | |
66 | .xword 0x0154442bd4 | |
67 | .xword 0x01c6e41596 | |
68 | ||
69 | .type k3k4, %object | |
70 | .size k3k4, 16 | |
71 | k3k4: | |
72 | .xword 0x01751997d0 | |
73 | .xword 0x00ccaa009e | |
74 | ||
75 | .type k5k0, %object | |
76 | .size k5k0, 16 | |
77 | k5k0: | |
78 | .xword 0x0163cd6124 | |
79 | .xword 0 | |
80 | ||
81 | .type poly, %object | |
82 | .size poly, 16 | |
83 | poly: | |
84 | .xword 0x01db710641 | |
85 | .xword 0x01f7011641 | |
86 | ||
87 | .type crc32_const, %object | |
88 | .size crc32_const, 48 | |
89 | crc32_const: | |
90 | .xword 0x1753ab84 | |
91 | .xword 0 | |
92 | .xword 0xbbf2f6d6 | |
93 | .xword 0 | |
94 | .xword 0x0c30f51d | |
95 | .xword 0 | |
96 | ||
97 | .align 4 | |
98 | .set .lanchor_mask,. + 0 | |
99 | ||
100 | .type mask, %object | |
101 | .size mask, 16 | |
102 | mask: | |
103 | .word -1 | |
104 | .word 0 | |
105 | .word -1 | |
106 | .word 0 |
0 | /********************************************************************** | |
1 | Copyright(c) 2020 Arm Corporation All rights reserved. | |
2 | ||
3 | Redistribution and use in source and binary forms, with or without | |
4 | modification, are permitted provided that the following conditions | |
5 | are met: | |
6 | * Redistributions of source code must retain the above copyright | |
7 | notice, this list of conditions and the following disclaimer. | |
8 | * Redistributions in binary form must reproduce the above copyright | |
9 | notice, this list of conditions and the following disclaimer in | |
10 | the documentation and/or other materials provided with the | |
11 | distribution. | |
12 | * Neither the name of Arm Corporation nor the names of its | |
13 | contributors may be used to endorse or promote products derived | |
14 | from this software without specific prior written permission. | |
15 | ||
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
17 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
18 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
19 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
20 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
21 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
22 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
23 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
24 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
27 | **********************************************************************/ | |
28 | ||
29 | .macro declare_generic_reg name:req, reg:req, default:req | |
30 | \name .req \default\reg | |
31 | w_\name .req w\reg | |
32 | x_\name .req x\reg | |
33 | .endm | |
34 | ||
35 | .macro declare_neon_reg name:req, reg:req, default:req | |
36 | \name .req \default\reg | |
37 | v_\name .req v\reg | |
38 | q_\name .req q\reg | |
39 | d_\name .req d\reg | |
40 | s_\name .req s\reg | |
41 | .endm | |
42 | ||
43 | /********************************************************************** | |
44 | variables | |
45 | **********************************************************************/ | |
46 | declare_generic_reg crc, 0,w | |
47 | declare_generic_reg buf, 1,x | |
48 | declare_generic_reg len, 2,x | |
49 | declare_generic_reg buf_saved, 3,x | |
50 | declare_generic_reg buf_iter, 4,x | |
51 | declare_generic_reg len_saved, 5,x | |
52 | declare_generic_reg buf_tmp, 6,x | |
53 | ||
54 | declare_generic_reg crc0, 7,x | |
55 | declare_generic_reg crc1, 8,x | |
56 | declare_generic_reg crc2, 9,x | |
57 | declare_generic_reg pconst, 10,x | |
58 | declare_generic_reg data_crc0, 11,x | |
59 | declare_generic_reg data_crc1, 12,x | |
60 | declare_generic_reg data_crc2, 13,x | |
61 | ||
62 | declare_generic_reg size, 9,x | |
63 | declare_generic_reg crc_tmp, 10,w | |
64 | declare_generic_reg size_tmp, 11,x | |
65 | declare_generic_reg data_tmp1, 11,x | |
66 | declare_generic_reg data_tmp2, 12,x | |
67 | declare_generic_reg data_tmp3, 13,x | |
68 | ||
69 | declare_generic_reg tmp, 14,x | |
70 | declare_generic_reg tmp1, 15,x | |
71 | ||
72 | // return | |
73 | declare_generic_reg ret_crc, 0,w | |
74 | ||
75 | /********************************************************************** | |
76 | simd variables | |
77 | **********************************************************************/ | |
78 | declare_neon_reg a0, 0,v | |
79 | declare_neon_reg a1, 1,v | |
80 | declare_neon_reg a2, 2,v | |
81 | declare_neon_reg a3, 3,v | |
82 | declare_neon_reg a4, 4,v | |
83 | ||
84 | declare_neon_reg a5, 16,v | |
85 | declare_neon_reg a6, 17,v | |
86 | declare_neon_reg a7, 18,v | |
87 | declare_neon_reg a8, 19,v | |
88 | ||
89 | declare_neon_reg y5, 20,v | |
90 | declare_neon_reg y6, 21,v | |
91 | declare_neon_reg y7, 22,v | |
92 | declare_neon_reg y8, 23,v | |
93 | ||
94 | declare_neon_reg neon_zero, 24,v | |
95 | declare_neon_reg neon_tmp, 24,v | |
96 | ||
97 | declare_neon_reg k5k0, 25,v | |
98 | declare_neon_reg neon_tmp1, 26,v | |
99 | declare_neon_reg neon_tmp2, 27,v | |
100 | declare_neon_reg neon_tmp3, 28,v | |
101 | ||
102 | declare_neon_reg crc_pmull, 29,v | |
103 | declare_neon_reg neon_crc0, 30,v | |
104 | declare_neon_reg neon_crc1, 31,v | |
105 | ||
106 | declare_neon_reg neon_const0, 5,v | |
107 | declare_neon_reg neon_const1, 6,v | |
108 | declare_neon_reg neon_const2, 7,v | |
109 | ||
110 | // constants | |
111 | .equ offset_k3k4, 16 | |
112 | .equ offset_k5k0, 32 | |
113 | .equ offset_poly, 48 | |
114 | .equ offset_crc32_const, 64 | |
115 | ||
116 | // pmull fold | |
117 | .macro pmull_fold | |
118 | ldr x_data_crc0, [x_buf_tmp, 464] | |
119 | ldr x_data_crc1, [x_buf_tmp, 976] | |
120 | ldr x_data_crc2, [x_buf_tmp, 1488] | |
121 | ||
122 | pmull v_a5.1q, v_a1.1d, v_a0.1d | |
123 | crc32_u64 w_crc0, w_crc0, x_data_crc0 | |
124 | crc32_u64 w_crc1, w_crc1, x_data_crc1 | |
125 | crc32_u64 w_crc2, w_crc2, x_data_crc2 | |
126 | ||
127 | ldr x_data_crc0, [x_buf_tmp, 472] | |
128 | ldr x_data_crc1, [x_buf_tmp, 984] | |
129 | ldr x_data_crc2, [x_buf_tmp, 1496] | |
130 | ||
131 | pmull v_a6.1q, v_a2.1d, v_a0.1d | |
132 | crc32_u64 w_crc0, w_crc0, x_data_crc0 | |
133 | crc32_u64 w_crc1, w_crc1, x_data_crc1 | |
134 | crc32_u64 w_crc2, w_crc2, x_data_crc2 | |
135 | ||
136 | ldr x_data_crc0, [x_buf_tmp, 480] | |
137 | ldr x_data_crc1, [x_buf_tmp, 992] | |
138 | ldr x_data_crc2, [x_buf_tmp, 1504] | |
139 | ||
140 | pmull v_a7.1q, v_a3.1d, v_a0.1d | |
141 | crc32_u64 w_crc0, w_crc0, x_data_crc0 | |
142 | crc32_u64 w_crc1, w_crc1, x_data_crc1 | |
143 | crc32_u64 w_crc2, w_crc2, x_data_crc2 | |
144 | ||
145 | ldr x_data_crc0, [x_buf_tmp, 488] | |
146 | ldr x_data_crc1, [x_buf_tmp, 1000] | |
147 | ldr x_data_crc2, [x_buf_tmp, 1512] | |
148 | ||
149 | pmull v_a8.1q, v_a4.1d, v_a0.1d | |
150 | crc32_u64 w_crc0, w_crc0, x_data_crc0 | |
151 | crc32_u64 w_crc1, w_crc1, x_data_crc1 | |
152 | crc32_u64 w_crc2, w_crc2, x_data_crc2 | |
153 | ||
154 | ldr x_data_crc0, [x_buf_tmp, 496] | |
155 | ldr x_data_crc1, [x_buf_tmp, 1008] | |
156 | ldr x_data_crc2, [x_buf_tmp, 1520] | |
157 | ||
158 | pmull2 v_a1.1q, v_a1.2d, v_a0.2d | |
159 | crc32_u64 w_crc0, w_crc0, x_data_crc0 | |
160 | crc32_u64 w_crc1, w_crc1, x_data_crc1 | |
161 | crc32_u64 w_crc2, w_crc2, x_data_crc2 | |
162 | ||
163 | ld1 {v_y5.4s, v_y6.4s, v_y7.4s, v_y8.4s}, [x_buf_tmp] | |
164 | ||
165 | ldr x_data_crc0, [x_buf_tmp, 504] | |
166 | ldr x_data_crc1, [x_buf_tmp, 1016] | |
167 | ldr x_data_crc2, [x_buf_tmp, 1528] | |
168 | ||
169 | pmull2 v_a2.1q, v_a2.2d, v_a0.2d | |
170 | crc32_u64 w_crc0, w_crc0, x_data_crc0 | |
171 | crc32_u64 w_crc1, w_crc1, x_data_crc1 | |
172 | crc32_u64 w_crc2, w_crc2, x_data_crc2 | |
173 | ||
174 | pmull2 v_a3.1q, v_a3.2d, v_a0.2d | |
175 | pmull2 v_a4.1q, v_a4.2d, v_a0.2d | |
176 | ||
177 | eor v_y5.16b, v_y5.16b, v_a5.16b | |
178 | eor v_y6.16b, v_y6.16b, v_a6.16b | |
179 | eor v_y7.16b, v_y7.16b, v_a7.16b | |
180 | eor v_y8.16b, v_y8.16b, v_a8.16b | |
181 | ||
182 | ldr x_data_crc0, [x_buf_tmp, 512] | |
183 | ldr x_data_crc1, [x_buf_tmp, 1024] | |
184 | ldr x_data_crc2, [x_buf_tmp, 1536] | |
185 | ||
186 | eor v_a1.16b, v_y5.16b, v_a1.16b | |
187 | eor v_a2.16b, v_y6.16b, v_a2.16b | |
188 | eor v_a3.16b, v_y7.16b, v_a3.16b | |
189 | eor v_a4.16b, v_y8.16b, v_a4.16b | |
190 | ||
191 | crc32_u64 w_crc0, w_crc0, x_data_crc0 | |
192 | crc32_u64 w_crc1, w_crc1, x_data_crc1 | |
193 | crc32_u64 w_crc2, w_crc2, x_data_crc2 | |
194 | ||
195 | ldr x_data_crc0, [x_buf_tmp, 520] | |
196 | ldr x_data_crc1, [x_buf_tmp, 1032] | |
197 | ldr x_data_crc2, [x_buf_tmp, 1544] | |
198 | ||
199 | crc32_u64 w_crc0, w_crc0, x_data_crc0 | |
200 | crc32_u64 w_crc1, w_crc1, x_data_crc1 | |
201 | crc32_u64 w_crc2, w_crc2, x_data_crc2 | |
202 | .endm | |
203 | ||
204 | // crc32 mix for 2048 byte input data | |
205 | .macro crc32_mix2048 | |
206 | fmov s_a1, w_crc | |
207 | movi v_neon_tmp.4s, 0 | |
208 | ||
209 | adrp x_pconst, lanchor_crc32 | |
210 | add x_buf_tmp, x_buf, 64 | |
211 | ||
212 | ldr x_data_crc0, [x_buf, 512] | |
213 | ldr x_data_crc1, [x_buf, 1024] | |
214 | ldr x_data_crc2, [x_buf, 1536] | |
215 | ||
216 | crc32_u64 w_crc0, wzr, x_data_crc0 | |
217 | crc32_u64 w_crc1, wzr, x_data_crc1 | |
218 | crc32_u64 w_crc2, wzr, x_data_crc2 | |
219 | ||
220 | #ifdef CRC32 | |
221 | mvn v_a1.8b, v_a1.8b | |
222 | #endif | |
223 | ||
224 | ins v_neon_tmp.s[0], v_a1.s[0] | |
225 | ||
226 | ld1 {v_a1.4s, v_a2.4s, v_a3.4s, v_a4.4s}, [x_buf] | |
227 | ||
228 | ldr x_data_crc0, [x_buf, 520] | |
229 | ldr x_data_crc1, [x_buf, 1032] | |
230 | ldr x_data_crc2, [x_buf, 1544] | |
231 | ||
232 | eor v_a1.16b, v_a1.16b, v_neon_tmp.16b | |
233 | ldr q_a0, [x_pconst, #:lo12:lanchor_crc32] // k1k2 | |
234 | ||
235 | crc32_u64 w_crc0, w_crc0, x_data_crc0 | |
236 | crc32_u64 w_crc1, w_crc1, x_data_crc1 | |
237 | crc32_u64 w_crc2, w_crc2, x_data_crc2 | |
238 | ||
239 | // loop start, unroll the loop | |
240 | .align 4 | |
241 | pmull_fold | |
242 | ||
243 | add x_buf_tmp, x_buf_tmp, 64 | |
244 | pmull_fold | |
245 | ||
246 | add x_buf_tmp, x_buf_tmp, 64 | |
247 | pmull_fold | |
248 | ||
249 | add x_buf_tmp, x_buf_tmp, 64 | |
250 | pmull_fold | |
251 | ||
252 | add x_buf_tmp, x_buf_tmp, 64 | |
253 | pmull_fold | |
254 | ||
255 | add x_buf_tmp, x_buf_tmp, 64 | |
256 | pmull_fold | |
257 | ||
258 | add x_buf_tmp, x_buf_tmp, 64 | |
259 | pmull_fold | |
260 | // loop end | |
261 | ||
262 | // PMULL: fold into 128-bits | |
263 | add x_pconst, x_pconst, :lo12:lanchor_crc32 | |
264 | ||
265 | ldr x_data_crc0, [x_buf, 976] | |
266 | ldr x_data_crc1, [x_buf, 1488] | |
267 | ldr x_data_crc2, [x_buf, 2000] | |
268 | ||
269 | ldr q_a0, [x_pconst, offset_k3k4] // k3k4 | |
270 | ||
271 | crc32_u64 w_crc0, w_crc0, x_data_crc0 | |
272 | crc32_u64 w_crc1, w_crc1, x_data_crc1 | |
273 | crc32_u64 w_crc2, w_crc2, x_data_crc2 | |
274 | ||
275 | pmull v_a5.1q, v_a1.1d, v_a0.1d | |
276 | pmull2 v_a1.1q, v_a1.2d, v_a0.2d | |
277 | ||
278 | eor v_a1.16b, v_a5.16b, v_a1.16b | |
279 | eor v_a1.16b, v_a1.16b, v_a2.16b | |
280 | ||
281 | ldr x_data_crc0, [x_buf, 984] | |
282 | ldr x_data_crc1, [x_buf, 1496] | |
283 | ldr x_data_crc2, [x_buf, 2008] | |
284 | ||
285 | crc32_u64 w_crc0, w_crc0, x_data_crc0 | |
286 | crc32_u64 w_crc1, w_crc1, x_data_crc1 | |
287 | crc32_u64 w_crc2, w_crc2, x_data_crc2 | |
288 | ||
289 | pmull v_a5.1q, v_a1.1d, v_a0.1d | |
290 | pmull2 v_a1.1q, v_a1.2d, v_a0.2d | |
291 | ||
292 | ldr x_data_crc0, [x_buf, 992] | |
293 | ldr x_data_crc1, [x_buf, 1504] | |
294 | ldr x_data_crc2, [x_buf, 2016] | |
295 | ||
296 | eor v_a1.16b, v_a5.16b, v_a1.16b | |
297 | eor v_a1.16b, v_a1.16b, v_a3.16b | |
298 | ||
299 | crc32_u64 w_crc0, w_crc0, x_data_crc0 | |
300 | crc32_u64 w_crc1, w_crc1, x_data_crc1 | |
301 | crc32_u64 w_crc2, w_crc2, x_data_crc2 | |
302 | ||
303 | pmull v_a5.1q, v_a1.1d, v_a0.1d | |
304 | pmull2 v_a1.1q, v_a1.2d, v_a0.2d | |
305 | ||
306 | ldr x_data_crc0, [x_buf, 1000] | |
307 | ldr x_data_crc1, [x_buf, 1512] | |
308 | ldr x_data_crc2, [x_buf, 2024] | |
309 | ||
310 | eor v_a1.16b, v_a5.16b, v_a1.16b | |
311 | eor v_a1.16b, v_a1.16b, v_a4.16b | |
312 | ||
313 | // PMULL: fold 128-bits to 64-bits | |
314 | crc32_u64 w_crc0, w_crc0, x_data_crc0 | |
315 | crc32_u64 w_crc1, w_crc1, x_data_crc1 | |
316 | crc32_u64 w_crc2, w_crc2, x_data_crc2 | |
317 | ||
318 | dup d_a0, v_a0.d[1] | |
319 | pmull v_a2.1q, v_a1.1d, v_a0.1d | |
320 | ||
321 | movi v_neon_zero.4s, 0 | |
322 | ldr q_k5k0, [x_pconst, offset_k5k0] // k5k0 | |
323 | adrp x_tmp, .lanchor_mask | |
324 | ||
325 | ldr x_data_crc0, [x_buf, 1008] | |
326 | ldr x_data_crc1, [x_buf, 1520] | |
327 | ldr x_data_crc2, [x_buf, 2032] | |
328 | ||
329 | ext v_a1.16b, v_a1.16b, v_neon_zero.16b, #8 | |
330 | eor v_a1.16b, v_a2.16b, v_a1.16b | |
331 | ldr q_neon_tmp3, [x_tmp, #:lo12:.lanchor_mask] | |
332 | ||
333 | crc32_u64 w_crc0, w_crc0, x_data_crc0 | |
334 | crc32_u64 w_crc1, w_crc1, x_data_crc1 | |
335 | crc32_u64 w_crc2, w_crc2, x_data_crc2 | |
336 | ||
337 | dup d_a0, v_k5k0.d[1] | |
338 | pmull v_a3.1q, v_a2.1d, v_a0.1d | |
339 | ||
340 | ext v_a2.16b, v_a1.16b, v_neon_zero.16b, #4 | |
341 | and v_a1.16b, v_a1.16b, v_neon_tmp3.16b | |
342 | pmull v_a1.1q, v_a1.1d, v_k5k0.1d | |
343 | eor v_a1.16b, v_a2.16b, v_a1.16b | |
344 | ||
345 | // PMULL: barret reduce to 32-bits | |
346 | ldr q_neon_tmp1, [x_pconst, offset_poly] // poly | |
347 | ||
348 | ldr x_data_crc0, [x_buf, 1016] | |
349 | ldr x_data_crc1, [x_buf, 1528] | |
350 | ldr x_data_crc2, [x_buf, 2040] | |
351 | ||
352 | dup d_neon_tmp2, v_neon_tmp1.d[1] | |
353 | ||
354 | crc32_u64 w_crc0, w_crc0, x_data_crc0 | |
355 | crc32_u64 w_crc1, w_crc1, x_data_crc1 | |
356 | crc32_u64 w_crc2, w_crc2, x_data_crc2 | |
357 | ||
358 | and v_a2.16b, v_a1.16b, v_neon_tmp3.16b | |
359 | pmull v_a2.1q, v_a2.1d, v_neon_tmp2.1d | |
360 | and v_a2.16b, v_neon_tmp3.16b, v_a2.16b | |
361 | pmull v_a2.1q, v_a2.1d, v_neon_tmp1.1d | |
362 | ||
363 | // crc_pmull result | |
364 | eor v_a1.16b, v_a1.16b, v_a2.16b | |
365 | dup s_crc_pmull, v_a1.s[1] | |
366 | ||
367 | // merge crc_pmull, crc0, crc1, crc2 using pmull instruction | |
368 | fmov s_neon_crc0, w_crc0 | |
369 | fmov s_neon_crc1, w_crc1 | |
370 | ||
371 | ldr q_neon_const0, [x_pconst, offset_crc32_const] | |
372 | ldr q_neon_const1, [x_pconst, offset_crc32_const+16] | |
373 | ldr q_neon_const2, [x_pconst, offset_crc32_const+32] | |
374 | ||
375 | pmull v_crc_pmull.1q, v_crc_pmull.1d, v_neon_const0.1d | |
376 | pmull v_neon_crc0.1q, v_neon_crc0.1d, v_neon_const1.1d | |
377 | pmull v_neon_crc1.1q, v_neon_crc1.1d, v_neon_const2.1d | |
378 | ||
379 | fmov x_tmp1, d_neon_crc0 | |
380 | crc32_u64 w_crc0, wzr, x_tmp1 | |
381 | ||
382 | fmov x_tmp1, d_neon_crc1 | |
383 | crc32_u64 w_crc1, wzr, x_tmp1 | |
384 | ||
385 | eor w_ret_crc, w_crc1, w_crc0 | |
386 | ||
387 | fmov x_tmp1, d_crc_pmull | |
388 | crc32_u64 w_tmp, wzr, x_tmp1 | |
389 | ||
390 | eor w_crc2, w_tmp, w_crc2 | |
391 | ||
392 | // handle crc32/crc32c | |
393 | #ifdef CRC32 | |
394 | eon w_ret_crc, w_crc2, w_ret_crc | |
395 | #else | |
396 | eor w_ret_crc, w_crc2, w_ret_crc | |
397 | #endif | |
398 | .endm | |
399 | ||
400 | // crc32 mix main default | |
401 | .macro crc32_mix_main_default | |
402 | cmp x_len, 2047 | |
403 | mov x_len_saved, x_len | |
404 | mov x_buf_saved, x_buf | |
405 | bls .less_than_2048 | |
406 | ||
407 | sub x_buf_iter, x_len, #2048 | |
408 | stp x29, x30, [sp, -16]! | |
409 | ||
410 | mov x29, sp | |
411 | and x_buf_iter, x_buf_iter, -2048 | |
412 | add x_buf_iter, x_buf_iter, 2048 | |
413 | add x_buf_iter, x_buf, x_buf_iter | |
414 | ||
415 | .align 4 | |
416 | .loop_mix: | |
417 | mov x_buf, x_buf_saved | |
418 | crc32_mix2048 | |
419 | ||
420 | add x_buf_saved, x_buf_saved, 2048 | |
421 | cmp x_buf_saved, x_buf_iter | |
422 | bne .loop_mix | |
423 | ||
424 | and x_len_saved, x_len_saved, 2047 | |
425 | cbnz x_len_saved, .remain_ldp | |
426 | ||
427 | ldp x29, x30, [sp], 16 | |
428 | ret | |
429 | ||
430 | .align 4 | |
431 | .remain_ldp: | |
432 | mov w_crc_tmp, crc | |
433 | ldp x29, x30, [sp], 16 | |
434 | mov size, x_len_saved | |
435 | mov buf, x_buf_iter | |
436 | b .crc32_hw_handle | |
437 | ||
438 | .remain: | |
439 | mov w_crc_tmp, crc | |
440 | mov size, x_len_saved | |
441 | mov buf, x_buf_saved | |
442 | b .crc32_hw_handle | |
443 | ||
444 | .align 4 | |
445 | .less_than_2048: | |
446 | cbnz x_len, .remain | |
447 | ret | |
448 | ||
449 | .crc32_hw_handle: | |
450 | cmp size, 63 | |
451 | ||
452 | #ifdef CRC32 | |
453 | mvn crc_tmp, crc_tmp | |
454 | #endif | |
455 | ||
456 | bls .less_than_64 | |
457 | sub buf_saved, size, #64 | |
458 | and buf_saved, buf_saved, -64 | |
459 | add buf_saved, buf_saved, 64 | |
460 | add buf_saved, buf, buf_saved | |
461 | ||
462 | .align 4 | |
463 | .loop_64: | |
464 | ldp data_tmp1, data_tmp2, [buf] | |
465 | ldr data_tmp3, [buf, 16] | |
466 | crc32_u64 crc_tmp, crc_tmp, data_tmp1 | |
467 | crc32_u64 crc_tmp, crc_tmp, data_tmp2 | |
468 | ||
469 | ldp data_tmp1, data_tmp2, [buf, 24] | |
470 | add buf, buf, 64 | |
471 | ||
472 | crc32_u64 crc_tmp, crc_tmp, data_tmp3 | |
473 | ldr data_tmp3, [buf, -24] | |
474 | ||
475 | crc32_u64 crc_tmp, crc_tmp, data_tmp1 | |
476 | crc32_u64 crc_tmp, crc_tmp, data_tmp2 | |
477 | ||
478 | ldp data_tmp1, data_tmp2, [buf, -16] | |
479 | cmp buf_saved, buf | |
480 | crc32_u64 crc_tmp, crc_tmp, data_tmp3 | |
481 | ||
482 | crc32_u64 crc_tmp, crc_tmp, data_tmp1 | |
483 | crc32_u64 crc_tmp, crc_tmp, data_tmp2 | |
484 | bne .loop_64 | |
485 | ||
486 | and size, size, 63 | |
487 | .less_than_64: | |
488 | cmp size, 7 | |
489 | bls .crc32_hw_w | |
490 | ||
491 | ldr data_tmp2, [buf] | |
492 | sub size_tmp, size, #8 | |
493 | cmp size_tmp, 7 | |
494 | crc32_u64 crc_tmp, crc_tmp, data_tmp2 | |
495 | bls .crc32_hw_w_pre | |
496 | ||
497 | ldr data_tmp2, [buf, 8] | |
498 | sub data_tmp3, size, #16 | |
499 | cmp data_tmp3, 7 | |
500 | crc32_u64 crc_tmp, crc_tmp, data_tmp2 | |
501 | bls .crc32_hw_w_pre | |
502 | ||
503 | ldr data_tmp2, [buf, 16] | |
504 | sub data_tmp3, size, #24 | |
505 | cmp data_tmp3, 7 | |
506 | crc32_u64 crc_tmp, crc_tmp, data_tmp2 | |
507 | bls .crc32_hw_w_pre | |
508 | ||
509 | ldr data_tmp2, [buf, 24] | |
510 | sub data_tmp3, size, #32 | |
511 | cmp data_tmp3, 7 | |
512 | crc32_u64 crc_tmp, crc_tmp, data_tmp2 | |
513 | bls .crc32_hw_w_pre | |
514 | ||
515 | ldr data_tmp2, [buf, 32] | |
516 | sub data_tmp3, size, #40 | |
517 | cmp data_tmp3, 7 | |
518 | crc32_u64 crc_tmp, crc_tmp, data_tmp2 | |
519 | bls .crc32_hw_w_pre | |
520 | ||
521 | ldr data_tmp2, [buf, 40] | |
522 | sub data_tmp3, size, #48 | |
523 | cmp data_tmp3, 7 | |
524 | crc32_u64 crc_tmp, crc_tmp, data_tmp2 | |
525 | bls .crc32_hw_w_pre | |
526 | ||
527 | ldr data_tmp2, [buf, 48] | |
528 | crc32_u64 crc_tmp, crc_tmp, data_tmp2 | |
529 | ||
530 | .crc32_hw_w_pre: | |
531 | and size_tmp, size_tmp, -8 | |
532 | and size, size, 7 | |
533 | add size_tmp, size_tmp, 8 | |
534 | add buf, buf, size_tmp | |
535 | ||
536 | .crc32_hw_w: | |
537 | cmp size, 3 | |
538 | bls .crc32_hw_h | |
539 | ldr w_data_tmp2, [buf], 4 | |
540 | sub size, size, #4 | |
541 | crc32_u32 crc_tmp, crc_tmp, w_data_tmp2 | |
542 | ||
543 | .crc32_hw_h: | |
544 | cmp size, 1 | |
545 | bls .crc32_hw_b | |
546 | ldrh w_data_tmp2, [buf], 2 | |
547 | sub size, size, #2 | |
548 | crc32_u16 crc_tmp, crc_tmp, w_data_tmp2 | |
549 | ||
550 | .crc32_hw_b: | |
551 | cbz size, .crc32_hw_done | |
552 | ldrb w_data_tmp2, [buf] | |
553 | crc32_u8 crc_tmp, crc_tmp, w_data_tmp2 | |
554 | ||
555 | .crc32_hw_done: | |
556 | #ifdef CRC32 | |
557 | mvn ret_crc, crc_tmp | |
558 | #else | |
559 | mov ret_crc, crc_tmp | |
560 | #endif | |
561 | ret | |
562 | .endm |
0 | /********************************************************************** | |
1 | Copyright(c) 2020 Arm Corporation All rights reserved. | |
2 | ||
3 | Redistribution and use in source and binary forms, with or without | |
4 | modification, are permitted provided that the following conditions | |
5 | are met: | |
6 | * Redistributions of source code must retain the above copyright | |
7 | notice, this list of conditions and the following disclaimer. | |
8 | * Redistributions in binary form must reproduce the above copyright | |
9 | notice, this list of conditions and the following disclaimer in | |
10 | the documentation and/or other materials provided with the | |
11 | distribution. | |
12 | * Neither the name of Arm Corporation nor the names of its | |
13 | contributors may be used to endorse or promote products derived | |
14 | from this software without specific prior written permission. | |
15 | ||
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
17 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
18 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
19 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
20 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
21 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
22 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
23 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
24 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
27 | **********************************************************************/ | |
28 | ||
29 | .text | |
30 | .align 6 | |
31 | .arch armv8-a+crypto+crc | |
32 | ||
33 | #include "crc32_common_mix_neoverse_n1.S" | |
34 | .Lconstants: | |
35 | .octa 0x00000001c6e415960000000154442bd4 | |
36 | .octa 0x00000000ccaa009e00000001751997d0 | |
37 | .octa 0x00000001F701164100000001DB710641 | |
38 | .quad 0x0000000163cd6124 | |
39 | .quad 0x00000000FFFFFFFF | |
40 | .quad 0x000000001753ab84 | |
41 | .macro crc32_u64 dst,src,data | |
42 | crc32x \dst,\src,\data | |
43 | .endm | |
44 | .macro crc32_u32 dst,src,data | |
45 | crc32w \dst,\src,\data | |
46 | .endm | |
47 | .macro crc32_u16 dst,src,data | |
48 | crc32h \dst,\src,\data | |
49 | .endm | |
50 | .macro crc32_u8 dst,src,data | |
51 | crc32b \dst,\src,\data | |
52 | .endm | |
53 | ||
54 | ||
55 | /** | |
56 | * uint32_t crc32_mix_neoverse_n1(uint CRC ,uint8_t * BUF, | |
57 | * size_t LEN) | |
58 | */ | |
59 | BUF .req x1 | |
60 | LEN .req x2 | |
61 | CRC .req x0 | |
62 | wCRC .req w0 | |
63 | .align 6 | |
64 | .global crc32_mix_neoverse_n1 | |
65 | .type crc32_mix_neoverse_n1, %function | |
66 | crc32_mix_neoverse_n1: | |
67 | crc32_common_mix crc32 | |
68 | .size crc32_mix_neoverse_n1, .-crc32_mix_neoverse_n1 | |
69 |
0 | /********************************************************************** | |
1 | Copyright(c) 2020 Arm Corporation All rights reserved. | |
2 | ||
3 | Redistribution and use in source and binary forms, with or without | |
4 | modification, are permitted provided that the following conditions | |
5 | are met: | |
6 | * Redistributions of source code must retain the above copyright | |
7 | notice, this list of conditions and the following disclaimer. | |
8 | * Redistributions in binary form must reproduce the above copyright | |
9 | notice, this list of conditions and the following disclaimer in | |
10 | the documentation and/or other materials provided with the | |
11 | distribution. | |
12 | * Neither the name of Arm Corporation nor the names of its | |
13 | contributors may be used to endorse or promote products derived | |
14 | from this software without specific prior written permission. | |
15 | ||
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
17 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
18 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
19 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
20 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
21 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
22 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
23 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
24 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
27 | **********************************************************************/ | |
28 | ||
29 | .text | |
30 | .arch armv8-a+crypto+crc | |
31 | .align 6 | |
32 | ||
33 | .macro crc32_u64 dst,src,data | |
34 | crc32cx \dst,\src,\data | |
35 | .endm | |
36 | ||
37 | .macro crc32_u32 dst,src,data | |
38 | crc32cw \dst,\src,\data | |
39 | .endm | |
40 | ||
41 | .macro crc32_u16 dst,src,data | |
42 | crc32ch \dst,\src,\data | |
43 | .endm | |
44 | ||
45 | .macro crc32_u8 dst,src,data | |
46 | crc32cb \dst,\src,\data | |
47 | .endm | |
48 | ||
49 | #include "crc32_mix_default_common.S" | |
50 | ||
51 | .global crc32c_mix_default | |
52 | .type crc32c_mix_default, %function | |
53 | crc32c_mix_default: | |
54 | mov w3, w2 | |
55 | sxtw x2, w1 | |
56 | mov x1, x0 | |
57 | mov w0, w3 | |
58 | crc32_mix_main_default | |
59 | .size crc32c_mix_default, .-crc32c_mix_default | |
60 | ||
61 | .section .rodata | |
62 | .align 4 | |
63 | .set lanchor_crc32,. + 0 | |
64 | ||
65 | .type k1k2, %object | |
66 | .size k1k2, 16 | |
67 | k1k2: | |
68 | .xword 0x00740eef02 | |
69 | .xword 0x009e4addf8 | |
70 | ||
71 | .type k3k4, %object | |
72 | .size k3k4, 16 | |
73 | k3k4: | |
74 | .xword 0x00f20c0dfe | |
75 | .xword 0x014cd00bd6 | |
76 | ||
77 | .type k5k0, %object | |
78 | .size k5k0, 16 | |
79 | k5k0: | |
80 | .xword 0x00dd45aab8 | |
81 | .xword 0 | |
82 | ||
83 | .type poly, %object | |
84 | .size poly, 16 | |
85 | poly: | |
86 | .xword 0x0105ec76f0 | |
87 | .xword 0x00dea713f1 | |
88 | ||
89 | .type crc32_const, %object | |
90 | .size crc32_const, 48 | |
91 | crc32_const: | |
92 | .xword 0x9ef68d35 | |
93 | .xword 0 | |
94 | .xword 0x170076fa | |
95 | .xword 0 | |
96 | .xword 0xdd7e3b0c | |
97 | .xword 0 | |
98 | ||
99 | .align 4 | |
100 | .set .lanchor_mask,. + 0 | |
101 | ||
102 | .type mask, %object | |
103 | .size mask, 16 | |
104 | mask: | |
105 | .word -1 | |
106 | .word 0 | |
107 | .word -1 | |
108 | .word 0 |
0 | /********************************************************************** | |
1 | Copyright(c) 2020 Arm Corporation All rights reserved. | |
2 | ||
3 | Redistribution and use in source and binary forms, with or without | |
4 | modification, are permitted provided that the following conditions | |
5 | are met: | |
6 | * Redistributions of source code must retain the above copyright | |
7 | notice, this list of conditions and the following disclaimer. | |
8 | * Redistributions in binary form must reproduce the above copyright | |
9 | notice, this list of conditions and the following disclaimer in | |
10 | the documentation and/or other materials provided with the | |
11 | distribution. | |
12 | * Neither the name of Arm Corporation nor the names of its | |
13 | contributors may be used to endorse or promote products derived | |
14 | from this software without specific prior written permission. | |
15 | ||
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
17 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
18 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
19 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
20 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
21 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
22 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
23 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
24 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
27 | **********************************************************************/ | |
28 | ||
29 | .text | |
30 | .align 6 | |
31 | .arch armv8-a+crypto+crc | |
32 | ||
33 | #include "crc32_common_mix_neoverse_n1.S" | |
34 | .Lconstants: | |
35 | .octa 0x000000009e4addf800000000740eef02 | |
36 | .octa 0x000000014cd00bd600000000f20c0dfe | |
37 | .octa 0x00000000dea713f10000000105ec76f0 | |
38 | .quad 0x00000000dd45aab8 | |
39 | .quad 0x00000000FFFFFFFF | |
40 | .quad 0x000000009ef68d35 | |
41 | ||
42 | .macro crc32_u64 dst,src,data | |
43 | crc32cx \dst,\src,\data | |
44 | .endm | |
45 | .macro crc32_u32 dst,src,data | |
46 | crc32cw \dst,\src,\data | |
47 | .endm | |
48 | .macro crc32_u16 dst,src,data | |
49 | crc32ch \dst,\src,\data | |
50 | .endm | |
51 | .macro crc32_u8 dst,src,data | |
52 | crc32cb \dst,\src,\data | |
53 | .endm | |
54 | /** | |
55 | * uint32_t crc32c_mix_neoverse_n1(uint8_t * BUF, | |
56 | * size_t LEN, uint CRC) | |
57 | */ | |
58 | BUF .req x0 | |
59 | LEN .req x1 | |
60 | CRC .req x2 | |
61 | wCRC .req w2 | |
62 | .align 6 | |
63 | .global crc32c_mix_neoverse_n1 | |
64 | .type crc32c_mix_neoverse_n1, %function | |
65 | crc32c_mix_neoverse_n1: | |
66 | crc32_common_mix crc32c | |
67 | .size crc32c_mix_neoverse_n1, .-crc32c_mix_neoverse_n1 |
0 | 0 | /********************************************************************** |
1 | Copyright(c) 2019 Arm Corporation All rights reserved. | |
1 | Copyright(c) 2019-2020 Arm Corporation All rights reserved. | |
2 | 2 | |
3 | 3 | Redistribution and use in source and binary forms, with or without |
4 | 4 | modification, are permitted provided that the following conditions |
61 | 61 | DEFINE_INTERFACE_DISPATCHER(crc32_iscsi) |
62 | 62 | { |
63 | 63 | unsigned long auxval = getauxval(AT_HWCAP); |
64 | if (auxval & HWCAP_CRC32) | |
65 | return PROVIDER_INFO(crc32_iscsi_refl_hw_fold); | |
64 | if (auxval & HWCAP_CRC32) { | |
65 | switch (get_micro_arch_id()) { | |
66 | case MICRO_ARCH_ID(ARM, NEOVERSE_N1): | |
67 | case MICRO_ARCH_ID(ARM, CORTEX_A57): | |
68 | case MICRO_ARCH_ID(ARM, CORTEX_A72): | |
69 | return PROVIDER_INFO(crc32_iscsi_crc_ext); | |
70 | } | |
71 | } | |
72 | if ((HWCAP_CRC32 | HWCAP_PMULL) == (auxval & (HWCAP_CRC32 | HWCAP_PMULL))) { | |
73 | return PROVIDER_INFO(crc32_iscsi_3crc_fold); | |
74 | } | |
75 | ||
66 | 76 | if (auxval & HWCAP_PMULL) { |
67 | 77 | return PROVIDER_INFO(crc32_iscsi_refl_pmull); |
68 | 78 | } |
73 | 83 | DEFINE_INTERFACE_DISPATCHER(crc32_gzip_refl) |
74 | 84 | { |
75 | 85 | unsigned long auxval = getauxval(AT_HWCAP); |
76 | if (auxval & HWCAP_CRC32) | |
77 | return PROVIDER_INFO(crc32_gzip_refl_hw_fold); | |
86 | ||
87 | if (auxval & HWCAP_CRC32) { | |
88 | switch (get_micro_arch_id()) { | |
89 | case MICRO_ARCH_ID(ARM, NEOVERSE_N1): | |
90 | case MICRO_ARCH_ID(ARM, CORTEX_A57): | |
91 | case MICRO_ARCH_ID(ARM, CORTEX_A72): | |
92 | return PROVIDER_INFO(crc32_gzip_refl_crc_ext); | |
93 | } | |
94 | } | |
95 | if ((HWCAP_CRC32 | HWCAP_PMULL) == (auxval & (HWCAP_CRC32 | HWCAP_PMULL))) { | |
96 | return PROVIDER_INFO(crc32_gzip_refl_3crc_fold); | |
97 | } | |
98 | ||
78 | 99 | if (auxval & HWCAP_PMULL) |
79 | 100 | return PROVIDER_INFO(crc32_gzip_refl_pmull); |
80 | 101 |
72 | 72 | %endif |
73 | 73 | |
74 | 74 | align 16 |
75 | global crc16_t10dif_01:ISAL_SYM_TYPE_FUNCTION | |
75 | mk_global crc16_t10dif_01, function | |
76 | 76 | crc16_t10dif_01: |
77 | endbranch | |
77 | 78 | |
78 | 79 | ; adjust the 16-bit initial_crc value, scale it to 32 bits |
79 | 80 | shl arg1_low32, 16 |
72 | 72 | %endif |
73 | 73 | |
74 | 74 | align 16 |
75 | global crc16_t10dif_02:ISAL_SYM_TYPE_FUNCTION | |
75 | mk_global crc16_t10dif_02, function | |
76 | 76 | crc16_t10dif_02: |
77 | endbranch | |
77 | 78 | |
78 | 79 | ; adjust the 16-bit initial_crc value, scale it to 32 bits |
79 | 80 | shl arg1_low32, 16 |
81 | 81 | %endif |
82 | 82 | |
83 | 83 | align 16 |
84 | global FUNCTION_NAME:ISAL_SYM_TYPE_FUNCTION | |
84 | mk_global FUNCTION_NAME, function | |
85 | 85 | FUNCTION_NAME: |
86 | endbranch | |
86 | 87 | |
87 | 88 | ; adjust the 16-bit initial_crc value, scale it to 32 bits |
88 | 89 | shl arg1_low32, 16 |
65 | 65 | %endif |
66 | 66 | |
67 | 67 | align 16 |
68 | global crc16_t10dif_by4:ISAL_SYM_TYPE_FUNCTION | |
68 | mk_global crc16_t10dif_by4, function | |
69 | 69 | crc16_t10dif_by4: |
70 | endbranch | |
70 | 71 | |
71 | 72 | ; adjust the 16-bit initial_crc value, scale it to 32 bits |
72 | 73 | shl arg1_low32, 16 |
68 | 68 | %endif |
69 | 69 | |
70 | 70 | align 16 |
71 | global crc16_t10dif_copy_by4:ISAL_SYM_TYPE_FUNCTION | |
71 | mk_global crc16_t10dif_copy_by4, function | |
72 | 72 | crc16_t10dif_copy_by4: |
73 | endbranch | |
73 | 74 | |
74 | 75 | ; adjust the 16-bit initial_crc value, scale it to 32 bits |
75 | 76 | shl arg1_low32, 16 |
68 | 68 | %endif |
69 | 69 | |
70 | 70 | align 16 |
71 | global crc16_t10dif_copy_by4_02:ISAL_SYM_TYPE_FUNCTION | |
71 | mk_global crc16_t10dif_copy_by4_02, function | |
72 | 72 | crc16_t10dif_copy_by4_02: |
73 | endbranch | |
73 | 74 | |
74 | 75 | ; adjust the 16-bit initial_crc value, scale it to 32 bits |
75 | 76 | shl arg1_low32, 16 |
91 | 91 | %endif |
92 | 92 | |
93 | 93 | align 16 |
94 | global FUNCTION_NAME:ISAL_SYM_TYPE_FUNCTION | |
94 | mk_global FUNCTION_NAME, function | |
95 | 95 | FUNCTION_NAME: |
96 | endbranch | |
96 | 97 | |
97 | 98 | not arg1_low32 |
98 | 99 | sub rsp, VARIABLE_OFFSET |
85 | 85 | %endif |
86 | 86 | |
87 | 87 | align 16 |
88 | global crc32_gzip_refl_by8:ISAL_SYM_TYPE_FUNCTION | |
88 | mk_global crc32_gzip_refl_by8, function | |
89 | 89 | crc32_gzip_refl_by8: |
90 | endbranch | |
90 | 91 | |
91 | 92 | ; unsigned long c = crc ^ 0xffffffffL; |
92 | 93 | not arg1_low32 ; |
85 | 85 | %endif |
86 | 86 | |
87 | 87 | align 16 |
88 | global crc32_gzip_refl_by8_02:ISAL_SYM_TYPE_FUNCTION | |
88 | mk_global crc32_gzip_refl_by8_02, function | |
89 | 89 | crc32_gzip_refl_by8_02: |
90 | endbranch | |
90 | 91 | not arg1_low32 |
91 | 92 | sub rsp, VARIABLE_OFFSET |
92 | 93 |
71 | 71 | %define VARIABLE_OFFSET 16*2+8 |
72 | 72 | %endif |
73 | 73 | align 16 |
74 | global crc32_ieee_01:ISAL_SYM_TYPE_FUNCTION | |
74 | mk_global crc32_ieee_01, function | |
75 | 75 | crc32_ieee_01: |
76 | endbranch | |
76 | 77 | |
77 | 78 | not arg1_low32 ;~init_crc |
78 | 79 |
71 | 71 | %define VARIABLE_OFFSET 16*2+8 |
72 | 72 | %endif |
73 | 73 | align 16 |
74 | global crc32_ieee_02:ISAL_SYM_TYPE_FUNCTION | |
74 | mk_global crc32_ieee_02, function | |
75 | 75 | crc32_ieee_02: |
76 | endbranch | |
76 | 77 | |
77 | 78 | not arg1_low32 ;~init_crc |
78 | 79 |
81 | 81 | %endif |
82 | 82 | |
83 | 83 | align 16 |
84 | global FUNCTION_NAME:ISAL_SYM_TYPE_FUNCTION | |
84 | mk_global FUNCTION_NAME, function | |
85 | 85 | FUNCTION_NAME: |
86 | endbranch | |
86 | 87 | |
87 | 88 | not arg1_low32 |
88 | 89 | sub rsp, VARIABLE_OFFSET |
73 | 73 | %endif |
74 | 74 | |
75 | 75 | align 16 |
76 | global crc32_ieee_by4:ISAL_SYM_TYPE_FUNCTION | |
76 | mk_global crc32_ieee_by4, function | |
77 | 77 | crc32_ieee_by4: |
78 | endbranch | |
78 | 79 | |
79 | 80 | not arg1_low32 |
80 | 81 |
152 | 152 | ;;; crc_init = r8 |
153 | 153 | ;;; |
154 | 154 | |
155 | global crc32_iscsi_00:ISAL_SYM_TYPE_FUNCTION | |
155 | mk_global crc32_iscsi_00, function | |
156 | 156 | crc32_iscsi_00: |
157 | endbranch | |
157 | 158 | |
158 | 159 | %ifidn __OUTPUT_FORMAT__, elf64 |
159 | 160 | %define bufp rdi |
49 | 49 | ;;; len = rdx |
50 | 50 | ;;; crc_init = r8 |
51 | 51 | |
52 | global crc32_iscsi_01:ISAL_SYM_TYPE_FUNCTION | |
52 | mk_global crc32_iscsi_01, function | |
53 | 53 | crc32_iscsi_01: |
54 | endbranch | |
54 | 55 | |
55 | 56 | %ifidn __OUTPUT_FORMAT__, elf64 |
56 | 57 | %define bufp rdi |
213 | 214 | %rep 128-1 |
214 | 215 | |
215 | 216 | CONCAT(crc_,i,:) |
217 | endbranch | |
216 | 218 | crc32 crc_init, qword [block_0 - i*8] |
217 | 219 | crc32 crc1, qword [block_1 - i*8] |
218 | 220 | crc32 crc2, qword [block_2 - i*8] |
0 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
1 | ; Copyright(c) 2011-2020 Intel Corporation All rights reserved. | |
2 | ; | |
3 | ; Redistribution and use in source and binary forms, with or without | |
4 | ; modification, are permitted provided that the following conditions | |
5 | ; are met: | |
6 | ; * Redistributions of source code must retain the above copyright | |
7 | ; notice, this list of conditions and the following disclaimer. | |
8 | ; * Redistributions in binary form must reproduce the above copyright | |
9 | ; notice, this list of conditions and the following disclaimer in | |
10 | ; the documentation and/or other materials provided with the | |
11 | ; distribution. | |
12 | ; * Neither the name of Intel Corporation nor the names of its | |
13 | ; contributors may be used to endorse or promote products derived | |
14 | ; from this software without specific prior written permission. | |
15 | ; | |
16 | ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
17 | ; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
18 | ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
19 | ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
20 | ; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
21 | ; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
22 | ; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
23 | ; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
24 | ; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
25 | ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
26 | ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
27 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
28 | ||
29 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
30 | ; Function API: | |
31 | ; UINT32 crc32_iscsi_by16_10( | |
32 | ; UINT32 init_crc, //initial CRC value, 32 bits | |
33 | ; const unsigned char *buf, //buffer pointer to calculate CRC on | |
34 | ; UINT64 len //buffer length in bytes (64-bit data) | |
35 | ; ); | |
36 | ; | |
37 | ; Authors: | |
38 | ; Erdinc Ozturk | |
39 | ; Vinodh Gopal | |
40 | ; James Guilford | |
41 | ; | |
42 | ; Reference paper titled "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction" | |
43 | ; URL: http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf | |
44 | ; | |
45 | ; | |
46 | ||
47 | %include "reg_sizes.asm" | |
48 | ||
49 | %ifndef FUNCTION_NAME | |
50 | %define FUNCTION_NAME crc32_iscsi_by16_10 | |
51 | %endif | |
52 | ||
53 | %if (AS_FEATURE_LEVEL) >= 10 | |
54 | ||
55 | [bits 64] | |
56 | default rel | |
57 | ||
58 | section .text | |
59 | ||
60 | ||
61 | %ifidn __OUTPUT_FORMAT__, win64 | |
62 | %xdefine arg1 r8 | |
63 | %xdefine arg2 rcx | |
64 | %xdefine arg3 rdx | |
65 | ||
66 | %xdefine arg1_low32 r8d | |
67 | %else | |
68 | %xdefine arg1 rdx | |
69 | %xdefine arg2 rdi | |
70 | %xdefine arg3 rsi | |
71 | ||
72 | %xdefine arg1_low32 edx | |
73 | %endif | |
74 | ||
75 | %define TMP 16*0 | |
76 | %ifidn __OUTPUT_FORMAT__, win64 | |
77 | %define XMM_SAVE 16*2 | |
78 | %define VARIABLE_OFFSET 16*12+8 | |
79 | %else | |
80 | %define VARIABLE_OFFSET 16*2+8 | |
81 | %endif | |
82 | ||
83 | align 16 | |
84 | mk_global FUNCTION_NAME, function | |
85 | FUNCTION_NAME: | |
86 | endbranch | |
87 | sub rsp, VARIABLE_OFFSET | |
88 | ||
89 | %ifidn __OUTPUT_FORMAT__, win64 | |
90 | ; push the xmm registers into the stack to maintain | |
91 | vmovdqa [rsp + XMM_SAVE + 16*0], xmm6 | |
92 | vmovdqa [rsp + XMM_SAVE + 16*1], xmm7 | |
93 | vmovdqa [rsp + XMM_SAVE + 16*2], xmm8 | |
94 | vmovdqa [rsp + XMM_SAVE + 16*3], xmm9 | |
95 | vmovdqa [rsp + XMM_SAVE + 16*4], xmm10 | |
96 | vmovdqa [rsp + XMM_SAVE + 16*5], xmm11 | |
97 | vmovdqa [rsp + XMM_SAVE + 16*6], xmm12 | |
98 | vmovdqa [rsp + XMM_SAVE + 16*7], xmm13 | |
99 | vmovdqa [rsp + XMM_SAVE + 16*8], xmm14 | |
100 | vmovdqa [rsp + XMM_SAVE + 16*9], xmm15 | |
101 | %endif | |
102 | ||
103 | ; check if smaller than 256B | |
104 | cmp arg3, 256 | |
105 | jl .less_than_256 | |
106 | ||
107 | ; load the initial crc value | |
108 | vmovd xmm10, arg1_low32 ; initial crc | |
109 | ||
110 | ; receive the initial 64B data, xor the initial crc value | |
111 | vmovdqu8 zmm0, [arg2+16*0] | |
112 | vmovdqu8 zmm4, [arg2+16*4] | |
113 | vpxorq zmm0, zmm10 | |
114 | vbroadcasti32x4 zmm10, [rk3] ;xmm10 has rk3 and rk4 | |
115 | ;imm value of pclmulqdq instruction will determine which constant to use | |
116 | ||
117 | sub arg3, 256 | |
118 | cmp arg3, 256 | |
119 | jl .fold_128_B_loop | |
120 | ||
121 | vmovdqu8 zmm7, [arg2+16*8] | |
122 | vmovdqu8 zmm8, [arg2+16*12] | |
123 | vbroadcasti32x4 zmm16, [rk_1] ;zmm16 has rk-1 and rk-2 | |
124 | sub arg3, 256 | |
125 | ||
126 | .fold_256_B_loop: | |
127 | add arg2, 256 | |
128 | vmovdqu8 zmm3, [arg2+16*0] | |
129 | vpclmulqdq zmm1, zmm0, zmm16, 0x10 | |
130 | vpclmulqdq zmm2, zmm0, zmm16, 0x01 | |
131 | vpxorq zmm0, zmm1, zmm2 | |
132 | vpxorq zmm0, zmm0, zmm3 | |
133 | ||
134 | vmovdqu8 zmm9, [arg2+16*4] | |
135 | vpclmulqdq zmm5, zmm4, zmm16, 0x10 | |
136 | vpclmulqdq zmm6, zmm4, zmm16, 0x01 | |
137 | vpxorq zmm4, zmm5, zmm6 | |
138 | vpxorq zmm4, zmm4, zmm9 | |
139 | ||
140 | vmovdqu8 zmm11, [arg2+16*8] | |
141 | vpclmulqdq zmm12, zmm7, zmm16, 0x10 | |
142 | vpclmulqdq zmm13, zmm7, zmm16, 0x01 | |
143 | vpxorq zmm7, zmm12, zmm13 | |
144 | vpxorq zmm7, zmm7, zmm11 | |
145 | ||
146 | vmovdqu8 zmm17, [arg2+16*12] | |
147 | vpclmulqdq zmm14, zmm8, zmm16, 0x10 | |
148 | vpclmulqdq zmm15, zmm8, zmm16, 0x01 | |
149 | vpxorq zmm8, zmm14, zmm15 | |
150 | vpxorq zmm8, zmm8, zmm17 | |
151 | ||
152 | sub arg3, 256 | |
153 | jge .fold_256_B_loop | |
154 | ||
155 | ;; Fold 256 into 128 | |
156 | add arg2, 256 | |
157 | vpclmulqdq zmm1, zmm0, zmm10, 0x01 | |
158 | vpclmulqdq zmm2, zmm0, zmm10, 0x10 | |
159 | vpternlogq zmm7, zmm1, zmm2, 0x96 ; xor ABC | |
160 | ||
161 | vpclmulqdq zmm5, zmm4, zmm10, 0x01 | |
162 | vpclmulqdq zmm6, zmm4, zmm10, 0x10 | |
163 | vpternlogq zmm8, zmm5, zmm6, 0x96 ; xor ABC | |
164 | ||
165 | vmovdqa32 zmm0, zmm7 | |
166 | vmovdqa32 zmm4, zmm8 | |
167 | ||
168 | add arg3, 128 | |
169 | jmp .fold_128_B_register | |
170 | ||
171 | ||
172 | ||
173 | ; at this section of the code, there is 128*x+y (0<=y<128) bytes of buffer. The fold_128_B_loop | |
174 | ; loop will fold 128B at a time until we have 128+y Bytes of buffer | |
175 | ||
176 | ; fold 128B at a time. This section of the code folds 8 xmm registers in parallel | |
177 | .fold_128_B_loop: | |
178 | add arg2, 128 | |
179 | vmovdqu8 zmm8, [arg2+16*0] | |
180 | vpclmulqdq zmm2, zmm0, zmm10, 0x10 | |
181 | vpclmulqdq zmm1, zmm0, zmm10, 0x01 | |
182 | vpxorq zmm0, zmm2, zmm1 | |
183 | vpxorq zmm0, zmm0, zmm8 | |
184 | ||
185 | vmovdqu8 zmm9, [arg2+16*4] | |
186 | vpclmulqdq zmm5, zmm4, zmm10, 0x10 | |
187 | vpclmulqdq zmm6, zmm4, zmm10, 0x01 | |
188 | vpxorq zmm4, zmm5, zmm6 | |
189 | vpxorq zmm4, zmm4, zmm9 | |
190 | ||
191 | sub arg3, 128 | |
192 | jge .fold_128_B_loop | |
193 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
194 | ||
195 | add arg2, 128 | |
196 | ; at this point, the buffer pointer is pointing at the last y Bytes of the buffer, where 0 <= y < 128 | |
197 | ; the 128B of folded data is in 8 of the xmm registers: xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 | |
198 | ||
199 | .fold_128_B_register: | |
200 | ; fold the 8 128b parts into 1 xmm register with different constants | |
201 | vmovdqu8 zmm16, [rk9] ; multiply by rk9-rk16 | |
202 | vmovdqu8 zmm11, [rk17] ; multiply by rk17-rk20, rk1,rk2, 0,0 | |
203 | vpclmulqdq zmm1, zmm0, zmm16, 0x01 | |
204 | vpclmulqdq zmm2, zmm0, zmm16, 0x10 | |
205 | vextracti64x2 xmm7, zmm4, 3 ; save last that has no multiplicand | |
206 | ||
207 | vpclmulqdq zmm5, zmm4, zmm11, 0x01 | |
208 | vpclmulqdq zmm6, zmm4, zmm11, 0x10 | |
209 | vmovdqa xmm10, [rk1] ; Needed later in reduction loop | |
210 | vpternlogq zmm1, zmm2, zmm5, 0x96 ; xor ABC | |
211 | vpternlogq zmm1, zmm6, zmm7, 0x96 ; xor ABC | |
212 | ||
213 | vshufi64x2 zmm8, zmm1, zmm1, 0x4e ; Swap 1,0,3,2 - 01 00 11 10 | |
214 | vpxorq ymm8, ymm8, ymm1 | |
215 | vextracti64x2 xmm5, ymm8, 1 | |
216 | vpxorq xmm7, xmm5, xmm8 | |
217 | ||
218 | ; instead of 128, we add 128-16 to the loop counter to save 1 instruction from the loop | |
219 | ; instead of a cmp instruction, we use the negative flag with the jl instruction | |
220 | add arg3, 128-16 | |
221 | jl .final_reduction_for_128 | |
222 | ||
223 | ; now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 and the rest is in memory | |
224 | ; we can fold 16 bytes at a time if y>=16 | |
225 | ; continue folding 16B at a time | |
226 | ||
227 | .16B_reduction_loop: | |
228 | vpclmulqdq xmm8, xmm7, xmm10, 0x1 | |
229 | vpclmulqdq xmm7, xmm7, xmm10, 0x10 | |
230 | vpxor xmm7, xmm8 | |
231 | vmovdqu xmm0, [arg2] | |
232 | vpxor xmm7, xmm0 | |
233 | add arg2, 16 | |
234 | sub arg3, 16 | |
235 | ; instead of a cmp instruction, we utilize the flags with the jge instruction | |
236 | ; equivalent of: cmp arg3, 16-16 | |
237 | ; check if there is any more 16B in the buffer to be able to fold | |
238 | jge .16B_reduction_loop | |
239 | ||
240 | ;now we have 16+z bytes left to reduce, where 0<= z < 16. | |
241 | ;first, we reduce the data in the xmm7 register | |
242 | ||
243 | ||
244 | .final_reduction_for_128: | |
245 | add arg3, 16 | |
246 | je .128_done | |
247 | ||
248 | ; here we are getting data that is less than 16 bytes. | |
249 | ; since we know that there was data before the pointer, we can offset | |
250 | ; the input pointer before the actual point, to receive exactly 16 bytes. | |
251 | ; after that the registers need to be adjusted. | |
252 | .get_last_two_xmms: | |
253 | ||
254 | vmovdqa xmm2, xmm7 | |
255 | vmovdqu xmm1, [arg2 - 16 + arg3] | |
256 | ||
257 | ; get rid of the extra data that was loaded before | |
258 | ; load the shift constant | |
259 | lea rax, [pshufb_shf_table] | |
260 | add rax, arg3 | |
261 | vmovdqu xmm0, [rax] | |
262 | ||
263 | vpshufb xmm7, xmm0 | |
264 | vpxor xmm0, [mask3] | |
265 | vpshufb xmm2, xmm0 | |
266 | ||
267 | vpblendvb xmm2, xmm2, xmm1, xmm0 | |
268 | ;;;;;;;;;; | |
269 | vpclmulqdq xmm8, xmm7, xmm10, 0x1 | |
270 | vpclmulqdq xmm7, xmm7, xmm10, 0x10 | |
271 | vpxor xmm7, xmm8 | |
272 | vpxor xmm7, xmm2 | |
273 | ||
274 | .128_done: | |
275 | ; compute crc of a 128-bit value | |
276 | vmovdqa xmm10, [rk5] | |
277 | vmovdqa xmm0, xmm7 | |
278 | ||
279 | ;64b fold | |
280 | vpclmulqdq xmm7, xmm10, 0 | |
281 | vpsrldq xmm0, 8 | |
282 | vpxor xmm7, xmm0 | |
283 | ||
284 | ;32b fold | |
285 | vmovdqa xmm0, xmm7 | |
286 | vpslldq xmm7, 4 | |
287 | vpclmulqdq xmm7, xmm10, 0x10 | |
288 | vpxor xmm7, xmm0 | |
289 | ||
290 | ||
291 | ;barrett reduction | |
292 | .barrett: | |
293 | vpand xmm7, [mask2] | |
294 | vmovdqa xmm1, xmm7 | |
295 | vmovdqa xmm2, xmm7 | |
296 | vmovdqa xmm10, [rk7] | |
297 | ||
298 | vpclmulqdq xmm7, xmm10, 0 | |
299 | vpxor xmm7, xmm2 | |
300 | vpand xmm7, [mask] | |
301 | vmovdqa xmm2, xmm7 | |
302 | vpclmulqdq xmm7, xmm10, 0x10 | |
303 | vpxor xmm7, xmm2 | |
304 | vpxor xmm7, xmm1 | |
305 | vpextrd eax, xmm7, 2 | |
306 | ||
307 | .cleanup: | |
308 | ||
309 | %ifidn __OUTPUT_FORMAT__, win64 | |
310 | vmovdqa xmm6, [rsp + XMM_SAVE + 16*0] | |
311 | vmovdqa xmm7, [rsp + XMM_SAVE + 16*1] | |
312 | vmovdqa xmm8, [rsp + XMM_SAVE + 16*2] | |
313 | vmovdqa xmm9, [rsp + XMM_SAVE + 16*3] | |
314 | vmovdqa xmm10, [rsp + XMM_SAVE + 16*4] | |
315 | vmovdqa xmm11, [rsp + XMM_SAVE + 16*5] | |
316 | vmovdqa xmm12, [rsp + XMM_SAVE + 16*6] | |
317 | vmovdqa xmm13, [rsp + XMM_SAVE + 16*7] | |
318 | vmovdqa xmm14, [rsp + XMM_SAVE + 16*8] | |
319 | vmovdqa xmm15, [rsp + XMM_SAVE + 16*9] | |
320 | %endif | |
321 | add rsp, VARIABLE_OFFSET | |
322 | ret | |
323 | ||
324 | ||
325 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
326 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
327 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
328 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
329 | ||
330 | align 16 | |
331 | .less_than_256: | |
332 | ||
333 | ; check if there is enough buffer to be able to fold 16B at a time | |
334 | cmp arg3, 32 | |
335 | jl .less_than_32 | |
336 | ||
337 | ; if there is, load the constants | |
338 | vmovdqa xmm10, [rk1] ; rk1 and rk2 in xmm10 | |
339 | ||
340 | vmovd xmm0, arg1_low32 ; get the initial crc value | |
341 | vmovdqu xmm7, [arg2] ; load the plaintext | |
342 | vpxor xmm7, xmm0 | |
343 | ||
344 | ; update the buffer pointer | |
345 | add arg2, 16 | |
346 | ||
347 | ; update the counter. subtract 32 instead of 16 to save one instruction from the loop | |
348 | sub arg3, 32 | |
349 | ||
350 | jmp .16B_reduction_loop | |
351 | ||
352 | ||
353 | align 16 | |
354 | .less_than_32: | |
355 | ; mov initial crc to the return value. this is necessary for zero-length buffers. | |
356 | mov eax, arg1_low32 | |
357 | test arg3, arg3 | |
358 | je .cleanup | |
359 | ||
360 | vmovd xmm0, arg1_low32 ; get the initial crc value | |
361 | ||
362 | cmp arg3, 16 | |
363 | je .exact_16_left | |
364 | jl .less_than_16_left | |
365 | ||
366 | vmovdqu xmm7, [arg2] ; load the plaintext | |
367 | vpxor xmm7, xmm0 ; xor the initial crc value | |
368 | add arg2, 16 | |
369 | sub arg3, 16 | |
370 | vmovdqa xmm10, [rk1] ; rk1 and rk2 in xmm10 | |
371 | jmp .get_last_two_xmms | |
372 | ||
373 | align 16 | |
374 | .less_than_16_left: | |
375 | ; use stack space to load data less than 16 bytes, zero-out the 16B in memory first. | |
376 | ||
377 | vpxor xmm1, xmm1 | |
378 | mov r11, rsp | |
379 | vmovdqa [r11], xmm1 | |
380 | ||
381 | cmp arg3, 4 | |
382 | jl .only_less_than_4 | |
383 | ||
384 | ; backup the counter value | |
385 | mov r9, arg3 | |
386 | cmp arg3, 8 | |
387 | jl .less_than_8_left | |
388 | ||
389 | ; load 8 Bytes | |
390 | mov rax, [arg2] | |
391 | mov [r11], rax | |
392 | add r11, 8 | |
393 | sub arg3, 8 | |
394 | add arg2, 8 | |
395 | .less_than_8_left: | |
396 | ||
397 | cmp arg3, 4 | |
398 | jl .less_than_4_left | |
399 | ||
400 | ; load 4 Bytes | |
401 | mov eax, [arg2] | |
402 | mov [r11], eax | |
403 | add r11, 4 | |
404 | sub arg3, 4 | |
405 | add arg2, 4 | |
406 | .less_than_4_left: | |
407 | ||
408 | cmp arg3, 2 | |
409 | jl .less_than_2_left | |
410 | ||
411 | ; load 2 Bytes | |
412 | mov ax, [arg2] | |
413 | mov [r11], ax | |
414 | add r11, 2 | |
415 | sub arg3, 2 | |
416 | add arg2, 2 | |
417 | .less_than_2_left: | |
418 | cmp arg3, 1 | |
419 | jl .zero_left | |
420 | ||
421 | ; load 1 Byte | |
422 | mov al, [arg2] | |
423 | mov [r11], al | |
424 | ||
425 | .zero_left: | |
426 | vmovdqa xmm7, [rsp] | |
427 | vpxor xmm7, xmm0 ; xor the initial crc value | |
428 | ||
429 | lea rax,[pshufb_shf_table] | |
430 | vmovdqu xmm0, [rax + r9] | |
431 | vpshufb xmm7,xmm0 | |
432 | jmp .128_done | |
433 | ||
434 | align 16 | |
435 | .exact_16_left: | |
436 | vmovdqu xmm7, [arg2] | |
437 | vpxor xmm7, xmm0 ; xor the initial crc value | |
438 | jmp .128_done | |
439 | ||
440 | .only_less_than_4: | |
441 | cmp arg3, 3 | |
442 | jl .only_less_than_3 | |
443 | ||
444 | ; load 3 Bytes | |
445 | mov al, [arg2] | |
446 | mov [r11], al | |
447 | ||
448 | mov al, [arg2+1] | |
449 | mov [r11+1], al | |
450 | ||
451 | mov al, [arg2+2] | |
452 | mov [r11+2], al | |
453 | ||
454 | vmovdqa xmm7, [rsp] | |
455 | vpxor xmm7, xmm0 ; xor the initial crc value | |
456 | ||
457 | vpslldq xmm7, 5 | |
458 | jmp .barrett | |
459 | ||
460 | .only_less_than_3: | |
461 | cmp arg3, 2 | |
462 | jl .only_less_than_2 | |
463 | ||
464 | ; load 2 Bytes | |
465 | mov al, [arg2] | |
466 | mov [r11], al | |
467 | ||
468 | mov al, [arg2+1] | |
469 | mov [r11+1], al | |
470 | ||
471 | vmovdqa xmm7, [rsp] | |
472 | vpxor xmm7, xmm0 ; xor the initial crc value | |
473 | ||
474 | vpslldq xmm7, 6 | |
475 | jmp .barrett | |
476 | ||
477 | .only_less_than_2: | |
478 | ; load 1 Byte | |
479 | mov al, [arg2] | |
480 | mov [r11], al | |
481 | ||
482 | vmovdqa xmm7, [rsp] | |
483 | vpxor xmm7, xmm0 ; xor the initial crc value | |
484 | ||
485 | vpslldq xmm7, 7 | |
486 | jmp .barrett | |
487 | ||
488 | section .data | |
489 | align 32 | |
490 | ||
491 | %ifndef USE_CONSTS | |
492 | ; precomputed constants | |
493 | rk_1: dq 0x00000000b9e02b86 | |
494 | rk_2: dq 0x00000000dcb17aa4 | |
495 | rk1: dq 0x00000000493c7d27 | |
496 | rk2: dq 0x0000000ec1068c50 | |
497 | rk3: dq 0x0000000206e38d70 | |
498 | rk4: dq 0x000000006992cea2 | |
499 | rk5: dq 0x00000000493c7d27 | |
500 | rk6: dq 0x00000000dd45aab8 | |
501 | rk7: dq 0x00000000dea713f0 | |
502 | rk8: dq 0x0000000105ec76f0 | |
503 | rk9: dq 0x0000000047db8317 | |
504 | rk10: dq 0x000000002ad91c30 | |
505 | rk11: dq 0x000000000715ce53 | |
506 | rk12: dq 0x00000000c49f4f67 | |
507 | rk13: dq 0x0000000039d3b296 | |
508 | rk14: dq 0x00000000083a6eec | |
509 | rk15: dq 0x000000009e4addf8 | |
510 | rk16: dq 0x00000000740eef02 | |
511 | rk17: dq 0x00000000ddc0152b | |
512 | rk18: dq 0x000000001c291d04 | |
513 | rk19: dq 0x00000000ba4fc28e | |
514 | rk20: dq 0x000000003da6d0cb | |
515 | ||
516 | rk_1b: dq 0x00000000493c7d27 | |
517 | rk_2b: dq 0x0000000ec1068c50 | |
518 | dq 0x0000000000000000 | |
519 | dq 0x0000000000000000 | |
520 | ||
521 | %else | |
522 | INCLUDE_CONSTS | |
523 | %endif | |
524 | ||
525 | pshufb_shf_table: | |
526 | ; use these values for shift constants for the pshufb instruction | |
527 | ; different alignments result in values as shown: | |
528 | ; dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1 | |
529 | ; dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2 | |
530 | ; dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3 | |
531 | ; dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4 | |
532 | ; dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5 | |
533 | ; dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6 | |
534 | ; dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7 | |
535 | ; dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8 | |
536 | ; dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9 | |
537 | ; dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10 | |
538 | ; dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11 | |
539 | ; dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12 | |
540 | ; dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13 | |
541 | ; dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14 | |
542 | ; dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15 | |
543 | dq 0x8786858483828100, 0x8f8e8d8c8b8a8988 | |
544 | dq 0x0706050403020100, 0x000e0d0c0b0a0908 | |
545 | ||
546 | mask: dq 0xFFFFFFFFFFFFFFFF, 0x0000000000000000 | |
547 | mask2: dq 0xFFFFFFFF00000000, 0xFFFFFFFFFFFFFFFF | |
548 | mask3: dq 0x8080808080808080, 0x8080808080808080 | |
549 | ||
550 | %else ; Assembler doesn't understand these opcodes. Add empty symbol for windows. | |
551 | %ifidn __OUTPUT_FORMAT__, win64 | |
552 | global no_ %+ FUNCTION_NAME | |
553 | no_ %+ FUNCTION_NAME %+ : | |
554 | %endif | |
555 | %endif ; (AS_FEATURE_LEVEL) >= 10 |
61 | 61 | %define VARIABLE_OFFSET 16*2+8 |
62 | 62 | %endif |
63 | 63 | align 16 |
64 | global crc64_ecma_norm_by8:ISAL_SYM_TYPE_FUNCTION | |
64 | mk_global crc64_ecma_norm_by8, function | |
65 | 65 | crc64_ecma_norm_by8: |
66 | endbranch | |
66 | 67 | |
67 | 68 | not arg1 ;~init_crc |
68 | 69 |
67 | 67 | |
68 | 68 | |
69 | 69 | align 16 |
70 | global crc64_ecma_refl_by8:ISAL_SYM_TYPE_FUNCTION | |
70 | mk_global crc64_ecma_refl_by8, function | |
71 | 71 | crc64_ecma_refl_by8: |
72 | endbranch | |
72 | 73 | ; uint64_t c = crc ^ 0xffffffff,ffffffffL; |
73 | 74 | not arg1 |
74 | 75 | sub rsp, VARIABLE_OFFSET |
68 | 68 | %endif |
69 | 69 | |
70 | 70 | align 16 |
71 | global FUNCTION_NAME:ISAL_SYM_TYPE_FUNCTION | |
71 | mk_global FUNCTION_NAME, function | |
72 | 72 | FUNCTION_NAME: |
73 | endbranch | |
73 | 74 | not arg1 |
74 | 75 | sub rsp, VARIABLE_OFFSET |
75 | 76 |
60 | 60 | %define VARIABLE_OFFSET 16*2+8 |
61 | 61 | %endif |
62 | 62 | align 16 |
63 | global crc64_iso_norm_by8:ISAL_SYM_TYPE_FUNCTION | |
63 | mk_global crc64_iso_norm_by8, function | |
64 | 64 | crc64_iso_norm_by8: |
65 | endbranch | |
65 | 66 | |
66 | 67 | not arg1 ;~init_crc |
67 | 68 |
69 | 69 | %endif |
70 | 70 | |
71 | 71 | align 16 |
72 | global FUNCTION_NAME:ISAL_SYM_TYPE_FUNCTION | |
72 | mk_global FUNCTION_NAME, function | |
73 | 73 | FUNCTION_NAME: |
74 | endbranch | |
74 | 75 | not arg1 |
75 | 76 | sub rsp, VARIABLE_OFFSET |
76 | 77 |
64 | 64 | |
65 | 65 | |
66 | 66 | align 16 |
67 | global crc64_iso_refl_by8:ISAL_SYM_TYPE_FUNCTION | |
67 | mk_global crc64_iso_refl_by8, function | |
68 | 68 | crc64_iso_refl_by8: |
69 | endbranch | |
69 | 70 | ; uint64_t c = crc ^ 0xffffffff,ffffffffL; |
70 | 71 | not arg1 |
71 | 72 | sub rsp, VARIABLE_OFFSET |
60 | 60 | %define VARIABLE_OFFSET 16*2+8 |
61 | 61 | %endif |
62 | 62 | align 16 |
63 | global crc64_jones_norm_by8:ISAL_SYM_TYPE_FUNCTION | |
63 | mk_global crc64_jones_norm_by8, function | |
64 | 64 | crc64_jones_norm_by8: |
65 | endbranch | |
65 | 66 | |
66 | 67 | not arg1 ;~init_crc |
67 | 68 |
64 | 64 | |
65 | 65 | |
66 | 66 | align 16 |
67 | global crc64_jones_refl_by8:ISAL_SYM_TYPE_FUNCTION | |
67 | mk_global crc64_jones_refl_by8, function | |
68 | 68 | crc64_jones_refl_by8: |
69 | endbranch | |
69 | 70 | ; uint64_t c = crc ^ 0xffffffff,ffffffffL; |
70 | 71 | not arg1 |
71 | 72 | sub rsp, VARIABLE_OFFSET |
56 | 56 | %if (AS_FEATURE_LEVEL) >= 10 |
57 | 57 | extern crc32_gzip_refl_by16_10 |
58 | 58 | extern crc32_ieee_by16_10 |
59 | extern crc32_iscsi_by16_10 | |
59 | 60 | extern crc16_t10dif_by16_10 |
60 | 61 | %endif |
61 | 62 | |
78 | 79 | ;;;; |
79 | 80 | ; crc32_iscsi multibinary function |
80 | 81 | ;;;; |
81 | global crc32_iscsi:ISAL_SYM_TYPE_FUNCTION | |
82 | mk_global crc32_iscsi, function | |
82 | 83 | crc32_iscsi_mbinit: |
84 | endbranch | |
83 | 85 | call crc32_iscsi_dispatch_init |
84 | 86 | crc32_iscsi: |
87 | endbranch | |
85 | 88 | jmp qword [crc32_iscsi_dispatched] |
86 | 89 | |
87 | 90 | crc32_iscsi_dispatch_init: |
90 | 93 | push rcx |
91 | 94 | push rdx |
92 | 95 | push rsi |
96 | push rdi | |
93 | 97 | lea rsi, [crc32_iscsi_base WRT_OPT] ; Default |
94 | 98 | |
95 | 99 | mov eax, 1 |
96 | 100 | cpuid |
97 | lea rbx, [crc32_iscsi_00 WRT_OPT] | |
98 | lea rax, [crc32_iscsi_01 WRT_OPT] | |
99 | ||
100 | test ecx, FLAG_CPUID1_ECX_SSE4_2 | |
101 | cmovne rsi, rbx | |
102 | test ecx, FLAG_CPUID1_ECX_CLMUL | |
103 | cmovne rsi, rax | |
101 | mov ebx, ecx ; save cpuid1.ecx | |
102 | test ecx, FLAG_CPUID1_ECX_SSE4_2 | |
103 | jz .crc_iscsi_init_done ; use iscsi_base | |
104 | lea rsi, [crc32_iscsi_00 WRT_OPT] | |
105 | test ecx, FLAG_CPUID1_ECX_CLMUL | |
106 | jz .crc_iscsi_init_done ; use ieee_base | |
107 | lea rsi, [crc32_iscsi_01 WRT_OPT] | |
108 | ||
109 | ;; Test for XMM_YMM support/AVX | |
110 | test ecx, FLAG_CPUID1_ECX_OSXSAVE | |
111 | je .crc_iscsi_init_done | |
112 | xor ecx, ecx | |
113 | xgetbv ; xcr -> edx:eax | |
114 | mov edi, eax ; save xgetvb.eax | |
115 | ||
116 | and eax, FLAG_XGETBV_EAX_XMM_YMM | |
117 | cmp eax, FLAG_XGETBV_EAX_XMM_YMM | |
118 | jne .crc_iscsi_init_done | |
119 | test ebx, FLAG_CPUID1_ECX_AVX | |
120 | je .crc_iscsi_init_done | |
121 | ;; AVX/02 opt if available | |
122 | ||
123 | %if AS_FEATURE_LEVEL >= 10 | |
124 | ;; Test for AVX2 | |
125 | xor ecx, ecx | |
126 | mov eax, 7 | |
127 | cpuid | |
128 | test ebx, FLAG_CPUID7_EBX_AVX2 | |
129 | je .crc_iscsi_init_done ; No AVX2 possible | |
130 | ||
131 | ;; Test for AVX512 | |
132 | and edi, FLAG_XGETBV_EAX_ZMM_OPM | |
133 | cmp edi, FLAG_XGETBV_EAX_ZMM_OPM | |
134 | jne .crc_iscsi_init_done ; No AVX512 possible | |
135 | and ebx, FLAGS_CPUID7_EBX_AVX512_G1 | |
136 | cmp ebx, FLAGS_CPUID7_EBX_AVX512_G1 | |
137 | jne .crc_iscsi_init_done | |
138 | ||
139 | and ecx, FLAGS_CPUID7_ECX_AVX512_G2 | |
140 | cmp ecx, FLAGS_CPUID7_ECX_AVX512_G2 | |
141 | lea rbx, [crc32_iscsi_by16_10 WRT_OPT] ; AVX512/10 opt | |
142 | cmove rsi, rbx | |
143 | %endif | |
144 | ||
145 | .crc_iscsi_init_done: | |
104 | 146 | mov [crc32_iscsi_dispatched], rsi |
147 | pop rdi | |
105 | 148 | pop rsi |
106 | 149 | pop rdx |
107 | 150 | pop rcx |
112 | 155 | ;;;; |
113 | 156 | ; crc32_ieee multibinary function |
114 | 157 | ;;;; |
115 | global crc32_ieee:ISAL_SYM_TYPE_FUNCTION | |
158 | mk_global crc32_ieee, function | |
116 | 159 | crc32_ieee_mbinit: |
160 | endbranch | |
117 | 161 | call crc32_ieee_dispatch_init |
118 | 162 | crc32_ieee: |
163 | endbranch | |
119 | 164 | jmp qword [crc32_ieee_dispatched] |
120 | 165 | |
121 | 166 | crc32_ieee_dispatch_init: |
191 | 236 | ;;;; |
192 | 237 | ; crc16_t10dif multibinary function |
193 | 238 | ;;;; |
194 | global crc16_t10dif:ISAL_SYM_TYPE_FUNCTION | |
239 | mk_global crc16_t10dif, function | |
195 | 240 | crc16_t10dif_mbinit: |
241 | endbranch | |
196 | 242 | call crc16_t10dif_dispatch_init |
197 | 243 | crc16_t10dif: |
244 | endbranch | |
198 | 245 | jmp qword [crc16_t10dif_dispatched] |
199 | 246 | |
200 | 247 | crc16_t10dif_dispatch_init: |
0 | # ISA-L Build Details | |
1 | ||
2 | For x86-64 builds it is highly recommended to get an up-to-date version of | |
3 | [nasm] that can understand the latest instruction sets. Building with an older | |
4 | version is usually possible but the library may lack some function versions for | |
5 | the best performance. | |
6 | ||
7 | ## Windows Build Environment Details | |
8 | ||
9 | The windows dynamic and static libraries can be built with the nmake tool on the | |
10 | windows command line when appropriate paths and tools are setup as follows. | |
11 | ||
12 | ### Download nasm and put into path | |
13 | ||
14 | Download and install [nasm] and add location to path. | |
15 | ||
16 | set PATH=%PATH%;C:\Program Files\NASM | |
17 | ||
18 | ### Setup compiler environment | |
19 | ||
20 | Install compiler and run environment setup script. | |
21 | ||
22 | Compilers for windows usually have a batch file to setup environment variables | |
23 | for the command line called `vcvarsall.bat` or `compilervars.bat` or a link to | |
24 | run these. For Visual Studio this may be as follows for Community edition. | |
25 | ||
26 | C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvarsall.bat x64 | |
27 | ||
28 | For the Intel compiler the path is typically as follows where yyyy, x, zzz | |
29 | represent the version. | |
30 | ||
31 | C:\Program Files (x86)\IntelSWTools\system_studio_for_windows_yyyy.x.zzz\compilers_and_libraries_yyyy\bin\compilervars.bat intel64 | |
32 | ||
33 | ### Build ISA-L libs and copy to appropriate place | |
34 | ||
35 | Run `nmake /f Makefile.nmake` | |
36 | ||
37 | This should build isa-l.dll, isa-l.lib and isa-l_static.lib. You may want to | |
38 | copy the libs to a system directory in the dynamic linking path such as | |
39 | `C:\windows\system32` or to a project directory. | |
40 | ||
41 | To build a simple program with a static library. | |
42 | ||
43 | cl /Fe: test.exe test.c isa-l_static.lib | |
44 | ||
45 | [nasm]: https://www.nasm.us |
0 | # ISA-L Testing | |
1 | ||
2 | Tests are divided into check tests, unit tests and fuzz tests. Check tests, | |
3 | built with `make check`, should have no additional dependencies. Other unit | |
4 | tests built with `make test` may have additional dependencies in order to make | |
5 | comparisons of the output of ISA-L to other standard libraries and ensure | |
6 | compatibility. Fuzz tests are meant to be run with a fuzzing tool such as [AFL] | |
7 | or [llvm libFuzzer] fuzzing to direct the input data based on coverage. There | |
8 | are a number of scripts in the /tools directory to help with automating the | |
9 | running of tests. | |
10 | ||
11 | ## Test check | |
12 | ||
13 | `./tools/test_autorun.sh` is a helper script for kicking off check tests, that | |
14 | typically run for a few minutes, or extended tests that could run much | |
15 | longer. The command `test_autorun.sh check` build and runs all check tests with | |
16 | autotools and runs other short tests to ensure check tests, unit tests, | |
17 | examples, install, exe stack, format are correct. Each run of `test_autorun.sh` | |
18 | builds tests with a new random test seed that ensures that each run is unique to | |
19 | the seed but deterministic for debugging. Tests are also built with sanitizers | |
20 | and Electric Fence if available. | |
21 | ||
22 | ## Extended tests | |
23 | ||
24 | Extended tests are initiated with the command `./tools/test_autorun.sh | |
25 | ext`. These build and run check tests, unit tests, and other utilities that can | |
26 | take much longer than check tests alone. This includes special compression tools | |
27 | and some cross targets such as the no-arch build of base functions only and | |
28 | mingw build if tools are available. | |
29 | ||
30 | ## Fuzz testing | |
31 | ||
32 | `./tools/test_fuzz.sh` is a helper script for fuzzing to setup, build and run | |
33 | the ISA-L inflate fuzz tests on multiple fuzz tools. Fuzzing with | |
34 | [llvm libFuzzer] requires clang compiler tools with `-fsanitize=fuzzer` or | |
35 | `libFuzzer` installed. You can invoke the default fuzz tests under llvm with | |
36 | ||
37 | ./tools/test_fuzz.sh -e checked | |
38 | ||
39 | To use [AFL], install tools and system setup for `afl-fuzz` and run | |
40 | ||
41 | ./tools/test_fuzz.sh -e checked --afl 1 --llvm -1 -d 1 | |
42 | ||
43 | This uses internal vectors as a seed. You can also specify a sample file to use | |
44 | as a seed instead with `-f <file>`. One of three fuzz tests can be invoked: | |
45 | checked, simple, and round_trip. | |
46 | ||
47 | [llvm libFuzzer]: https://llvm.org/docs/LibFuzzer.html | |
48 | [AFL]: https://github.com/google/AFL |
148 | 148 | |
149 | 149 | other_tests += erasure_code/gen_rs_matrix_limits |
150 | 150 | |
151 | other_tests_x86_64 += \ | |
152 | erasure_code/gf_2vect_dot_prod_sse_test \ | |
153 | erasure_code/gf_3vect_dot_prod_sse_test \ | |
154 | erasure_code/gf_4vect_dot_prod_sse_test \ | |
155 | erasure_code/gf_5vect_dot_prod_sse_test \ | |
156 | erasure_code/gf_6vect_dot_prod_sse_test | |
157 | ||
158 | other_tests_x86_32 += \ | |
159 | erasure_code/gf_2vect_dot_prod_sse_test \ | |
160 | erasure_code/gf_3vect_dot_prod_sse_test \ | |
161 | erasure_code/gf_4vect_dot_prod_sse_test \ | |
162 | erasure_code/gf_5vect_dot_prod_sse_test \ | |
163 | erasure_code/gf_6vect_dot_prod_sse_test | |
164 | ||
165 | 151 | other_src += include/test.h \ |
166 | 152 | include/types.h |
51 | 51 | %define PS 8 |
52 | 52 | %define LOG_PS 3 |
53 | 53 | |
54 | %define func(x) x: | |
54 | %define func(x) x: endbranch | |
55 | 55 | %macro FUNC_SAVE 0 |
56 | 56 | push r12 |
57 | 57 | %endmacro |
83 | 83 | %define func(x) proc_frame x |
84 | 84 | %macro FUNC_SAVE 0 |
85 | 85 | alloc_stack stack_size |
86 | save_xmm128 xmm6, 0*16 | |
87 | save_xmm128 xmm7, 1*16 | |
88 | save_xmm128 xmm8, 2*16 | |
86 | vmovdqa [rsp + 0*16], xmm6 | |
87 | vmovdqa [rsp + 1*16], xmm7 | |
88 | vmovdqa [rsp + 2*16], xmm8 | |
89 | 89 | save_reg r12, 3*16 + 0*8 |
90 | 90 | save_reg r13, 3*16 + 1*8 |
91 | 91 | save_reg r14, 3*16 + 2*8 |
126 | 126 | |
127 | 127 | %define PS 4 |
128 | 128 | %define LOG_PS 2 |
129 | %define func(x) x: | |
129 | %define func(x) x: endbranch | |
130 | 130 | %define arg(x) [ebp + PS*2 + PS*x] |
131 | 131 | %define var(x) [ebp - PS - PS*x] |
132 | 132 | |
237 | 237 | %endif |
238 | 238 | |
239 | 239 | align 16 |
240 | global gf_2vect_dot_prod_avx:ISAL_SYM_TYPE_FUNCTION | |
240 | mk_global gf_2vect_dot_prod_avx, function | |
241 | 241 | |
242 | 242 | func(gf_2vect_dot_prod_avx) |
243 | 243 | FUNC_SAVE |
53 | 53 | %define PS 8 |
54 | 54 | %define LOG_PS 3 |
55 | 55 | |
56 | %define func(x) x: | |
56 | %define func(x) x: endbranch | |
57 | 57 | %macro FUNC_SAVE 0 |
58 | 58 | push r12 |
59 | 59 | %endmacro |
130 | 130 | |
131 | 131 | %define PS 4 |
132 | 132 | %define LOG_PS 2 |
133 | %define func(x) x: | |
133 | %define func(x) x: endbranch | |
134 | 134 | %define arg(x) [ebp + PS*2 + PS*x] |
135 | 135 | %define var(x) [ebp - PS - PS*x] |
136 | 136 | |
247 | 247 | %endif |
248 | 248 | |
249 | 249 | align 16 |
250 | global gf_2vect_dot_prod_avx2:ISAL_SYM_TYPE_FUNCTION | |
250 | mk_global gf_2vect_dot_prod_avx2, function | |
251 | 251 | |
252 | 252 | func(gf_2vect_dot_prod_avx2) |
253 | 253 | FUNC_SAVE |
49 | 49 | %define PS 8 |
50 | 50 | %define LOG_PS 3 |
51 | 51 | |
52 | %define func(x) x: | |
52 | %define func(x) x: endbranch | |
53 | 53 | %macro FUNC_SAVE 0 |
54 | 54 | push r12 |
55 | 55 | %endmacro |
159 | 159 | section .text |
160 | 160 | |
161 | 161 | align 16 |
162 | global gf_2vect_dot_prod_avx512:ISAL_SYM_TYPE_FUNCTION | |
162 | mk_global gf_2vect_dot_prod_avx512, function | |
163 | 163 | func(gf_2vect_dot_prod_avx512) |
164 | 164 | FUNC_SAVE |
165 | 165 | sub len, 64 |
51 | 51 | %define PS 8 |
52 | 52 | %define LOG_PS 3 |
53 | 53 | |
54 | %define func(x) x: | |
54 | %define func(x) x: endbranch | |
55 | 55 | %macro FUNC_SAVE 0 |
56 | 56 | push r12 |
57 | 57 | %endmacro |
126 | 126 | |
127 | 127 | %define PS 4 |
128 | 128 | %define LOG_PS 2 |
129 | %define func(x) x: | |
129 | %define func(x) x: endbranch | |
130 | 130 | %define arg(x) [ebp + PS*2 + PS*x] |
131 | 131 | %define var(x) [ebp - PS - PS*x] |
132 | 132 | |
237 | 237 | %endif |
238 | 238 | |
239 | 239 | align 16 |
240 | global gf_2vect_dot_prod_sse:ISAL_SYM_TYPE_FUNCTION | |
240 | mk_global gf_2vect_dot_prod_sse, function | |
241 | 241 | |
242 | 242 | func(gf_2vect_dot_prod_sse) |
243 | 243 | FUNC_SAVE |
0 | /********************************************************************** | |
1 | Copyright(c) 2011-2015 Intel Corporation All rights reserved. | |
2 | ||
3 | Redistribution and use in source and binary forms, with or without | |
4 | modification, are permitted provided that the following conditions | |
5 | are met: | |
6 | * Redistributions of source code must retain the above copyright | |
7 | notice, this list of conditions and the following disclaimer. | |
8 | * Redistributions in binary form must reproduce the above copyright | |
9 | notice, this list of conditions and the following disclaimer in | |
10 | the documentation and/or other materials provided with the | |
11 | distribution. | |
12 | * Neither the name of Intel Corporation nor the names of its | |
13 | contributors may be used to endorse or promote products derived | |
14 | from this software without specific prior written permission. | |
15 | ||
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
17 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
18 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
19 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
20 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
21 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
22 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
23 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
24 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
27 | **********************************************************************/ | |
28 | ||
29 | #include <stdio.h> | |
30 | #include <stdlib.h> | |
31 | #include <string.h> // for memset, memcmp | |
32 | #include "erasure_code.h" | |
33 | #include "types.h" | |
34 | ||
35 | #ifndef FUNCTION_UNDER_TEST | |
36 | # define FUNCTION_UNDER_TEST gf_2vect_dot_prod_sse | |
37 | #endif | |
38 | #ifndef TEST_MIN_SIZE | |
39 | # define TEST_MIN_SIZE 16 | |
40 | #endif | |
41 | ||
42 | #define str(s) #s | |
43 | #define xstr(s) str(s) | |
44 | ||
45 | #define TEST_LEN 8192 | |
46 | #define TEST_SIZE (TEST_LEN/2) | |
47 | #define TEST_MEM TEST_SIZE | |
48 | #define TEST_LOOPS 10000 | |
49 | #define TEST_TYPE_STR "" | |
50 | ||
51 | #ifndef TEST_SOURCES | |
52 | # define TEST_SOURCES 16 | |
53 | #endif | |
54 | #ifndef RANDOMS | |
55 | # define RANDOMS 20 | |
56 | #endif | |
57 | ||
58 | #ifdef EC_ALIGNED_ADDR | |
59 | // Define power of 2 range to check ptr, len alignment | |
60 | # define PTR_ALIGN_CHK_B 0 | |
61 | # define LEN_ALIGN_CHK_B 0 // 0 for aligned only | |
62 | #else | |
63 | // Define power of 2 range to check ptr, len alignment | |
64 | # define PTR_ALIGN_CHK_B 32 | |
65 | # define LEN_ALIGN_CHK_B 32 // 0 for aligned only | |
66 | #endif | |
67 | ||
68 | typedef unsigned char u8; | |
69 | ||
70 | extern void FUNCTION_UNDER_TEST(int len, int vlen, unsigned char *gftbls, | |
71 | unsigned char **src, unsigned char **dest); | |
72 | ||
73 | void dump(unsigned char *buf, int len) | |
74 | { | |
75 | int i; | |
76 | for (i = 0; i < len;) { | |
77 | printf(" %2x", 0xff & buf[i++]); | |
78 | if (i % 32 == 0) | |
79 | printf("\n"); | |
80 | } | |
81 | printf("\n"); | |
82 | } | |
83 | ||
84 | void dump_matrix(unsigned char **s, int k, int m) | |
85 | { | |
86 | int i, j; | |
87 | for (i = 0; i < k; i++) { | |
88 | for (j = 0; j < m; j++) { | |
89 | printf(" %2x", s[i][j]); | |
90 | } | |
91 | printf("\n"); | |
92 | } | |
93 | printf("\n"); | |
94 | } | |
95 | ||
96 | void dump_u8xu8(unsigned char *s, int k, int m) | |
97 | { | |
98 | int i, j; | |
99 | for (i = 0; i < k; i++) { | |
100 | for (j = 0; j < m; j++) { | |
101 | printf(" %2x", 0xff & s[j + (i * m)]); | |
102 | } | |
103 | printf("\n"); | |
104 | } | |
105 | printf("\n"); | |
106 | } | |
107 | ||
108 | int main(int argc, char *argv[]) | |
109 | { | |
110 | int i, j, rtest, srcs; | |
111 | void *buf; | |
112 | u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g_tbls[2 * TEST_SOURCES * 32]; | |
113 | u8 *dest1, *dest2, *dest_ref1, *dest_ref2, *dest_ptrs[2]; | |
114 | u8 *buffs[TEST_SOURCES]; | |
115 | ||
116 | int align, size; | |
117 | unsigned char *efence_buffs[TEST_SOURCES]; | |
118 | unsigned int offset; | |
119 | u8 *ubuffs[TEST_SOURCES]; | |
120 | u8 *udest_ptrs[2]; | |
121 | ||
122 | printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN); | |
123 | ||
124 | // Allocate the arrays | |
125 | for (i = 0; i < TEST_SOURCES; i++) { | |
126 | if (posix_memalign(&buf, 64, TEST_LEN)) { | |
127 | printf("alloc error: Fail"); | |
128 | return -1; | |
129 | } | |
130 | buffs[i] = buf; | |
131 | } | |
132 | ||
133 | if (posix_memalign(&buf, 64, TEST_LEN)) { | |
134 | printf("alloc error: Fail"); | |
135 | return -1; | |
136 | } | |
137 | dest1 = buf; | |
138 | ||
139 | if (posix_memalign(&buf, 64, TEST_LEN)) { | |
140 | printf("alloc error: Fail"); | |
141 | return -1; | |
142 | } | |
143 | dest2 = buf; | |
144 | ||
145 | if (posix_memalign(&buf, 64, TEST_LEN)) { | |
146 | printf("alloc error: Fail"); | |
147 | return -1; | |
148 | } | |
149 | dest_ref1 = buf; | |
150 | ||
151 | if (posix_memalign(&buf, 64, TEST_LEN)) { | |
152 | printf("alloc error: Fail"); | |
153 | return -1; | |
154 | } | |
155 | dest_ref2 = buf; | |
156 | ||
157 | dest_ptrs[0] = dest1; | |
158 | dest_ptrs[1] = dest2; | |
159 | ||
160 | // Test of all zeros | |
161 | for (i = 0; i < TEST_SOURCES; i++) | |
162 | memset(buffs[i], 0, TEST_LEN); | |
163 | ||
164 | memset(dest1, 0, TEST_LEN); | |
165 | memset(dest2, 0, TEST_LEN); | |
166 | memset(dest_ref1, 0, TEST_LEN); | |
167 | memset(dest_ref2, 0, TEST_LEN); | |
168 | memset(g1, 2, TEST_SOURCES); | |
169 | memset(g2, 1, TEST_SOURCES); | |
170 | ||
171 | for (i = 0; i < TEST_SOURCES; i++) { | |
172 | gf_vect_mul_init(g1[i], &g_tbls[i * 32]); | |
173 | gf_vect_mul_init(g2[i], &g_tbls[32 * TEST_SOURCES + i * 32]); | |
174 | } | |
175 | ||
176 | gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); | |
177 | gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs, | |
178 | dest_ref2); | |
179 | ||
180 | FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); | |
181 | ||
182 | if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { | |
183 | printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test1\n"); | |
184 | dump_matrix(buffs, 5, TEST_SOURCES); | |
185 | printf("dprod_base:"); | |
186 | dump(dest_ref1, 25); | |
187 | printf("dprod_dut:"); | |
188 | dump(dest1, 25); | |
189 | return -1; | |
190 | } | |
191 | if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { | |
192 | printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test2\n"); | |
193 | dump_matrix(buffs, 5, TEST_SOURCES); | |
194 | printf("dprod_base:"); | |
195 | dump(dest_ref2, 25); | |
196 | printf("dprod_dut:"); | |
197 | dump(dest2, 25); | |
198 | return -1; | |
199 | } | |
200 | ||
201 | putchar('.'); | |
202 | ||
203 | // Rand data test | |
204 | ||
205 | for (rtest = 0; rtest < RANDOMS; rtest++) { | |
206 | for (i = 0; i < TEST_SOURCES; i++) | |
207 | for (j = 0; j < TEST_LEN; j++) | |
208 | buffs[i][j] = rand(); | |
209 | ||
210 | for (i = 0; i < TEST_SOURCES; i++) { | |
211 | g1[i] = rand(); | |
212 | g2[i] = rand(); | |
213 | } | |
214 | ||
215 | for (i = 0; i < TEST_SOURCES; i++) { | |
216 | gf_vect_mul_init(g1[i], &g_tbls[i * 32]); | |
217 | gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]); | |
218 | } | |
219 | ||
220 | gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); | |
221 | gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], | |
222 | buffs, dest_ref2); | |
223 | ||
224 | FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); | |
225 | ||
226 | if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { | |
227 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest); | |
228 | dump_matrix(buffs, 5, TEST_SOURCES); | |
229 | printf("dprod_base:"); | |
230 | dump(dest_ref1, 25); | |
231 | printf("dprod_dut:"); | |
232 | dump(dest1, 25); | |
233 | return -1; | |
234 | } | |
235 | if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { | |
236 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest); | |
237 | dump_matrix(buffs, 5, TEST_SOURCES); | |
238 | printf("dprod_base:"); | |
239 | dump(dest_ref2, 25); | |
240 | printf("dprod_dut:"); | |
241 | dump(dest2, 25); | |
242 | return -1; | |
243 | } | |
244 | ||
245 | putchar('.'); | |
246 | } | |
247 | ||
248 | // Rand data test with varied parameters | |
249 | for (rtest = 0; rtest < RANDOMS; rtest++) { | |
250 | for (srcs = TEST_SOURCES; srcs > 0; srcs--) { | |
251 | for (i = 0; i < srcs; i++) | |
252 | for (j = 0; j < TEST_LEN; j++) | |
253 | buffs[i][j] = rand(); | |
254 | ||
255 | for (i = 0; i < srcs; i++) { | |
256 | g1[i] = rand(); | |
257 | g2[i] = rand(); | |
258 | } | |
259 | ||
260 | for (i = 0; i < srcs; i++) { | |
261 | gf_vect_mul_init(g1[i], &g_tbls[i * 32]); | |
262 | gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); | |
263 | } | |
264 | ||
265 | gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref1); | |
266 | gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[32 * srcs], buffs, | |
267 | dest_ref2); | |
268 | ||
269 | FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest_ptrs); | |
270 | ||
271 | if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { | |
272 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) | |
273 | " test1 srcs=%d\n", srcs); | |
274 | dump_matrix(buffs, 5, TEST_SOURCES); | |
275 | printf("dprod_base:"); | |
276 | dump(dest_ref1, 25); | |
277 | printf("dprod_dut:"); | |
278 | dump(dest1, 25); | |
279 | return -1; | |
280 | } | |
281 | if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { | |
282 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) | |
283 | " test2 srcs=%d\n", srcs); | |
284 | dump_matrix(buffs, 5, TEST_SOURCES); | |
285 | printf("dprod_base:"); | |
286 | dump(dest_ref2, 25); | |
287 | printf("dprod_dut:"); | |
288 | dump(dest2, 25); | |
289 | return -1; | |
290 | } | |
291 | ||
292 | putchar('.'); | |
293 | } | |
294 | } | |
295 | ||
296 | // Run tests at end of buffer for Electric Fence | |
297 | align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16; | |
298 | for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) { | |
299 | for (i = 0; i < TEST_SOURCES; i++) | |
300 | for (j = 0; j < TEST_LEN; j++) | |
301 | buffs[i][j] = rand(); | |
302 | ||
303 | for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end | |
304 | efence_buffs[i] = buffs[i] + TEST_LEN - size; | |
305 | ||
306 | for (i = 0; i < TEST_SOURCES; i++) { | |
307 | g1[i] = rand(); | |
308 | g2[i] = rand(); | |
309 | } | |
310 | ||
311 | for (i = 0; i < TEST_SOURCES; i++) { | |
312 | gf_vect_mul_init(g1[i], &g_tbls[i * 32]); | |
313 | gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]); | |
314 | } | |
315 | ||
316 | gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref1); | |
317 | gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], | |
318 | efence_buffs, dest_ref2); | |
319 | ||
320 | FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest_ptrs); | |
321 | ||
322 | if (0 != memcmp(dest_ref1, dest1, size)) { | |
323 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest); | |
324 | dump_matrix(efence_buffs, 5, TEST_SOURCES); | |
325 | printf("dprod_base:"); | |
326 | dump(dest_ref1, align); | |
327 | printf("dprod_dut:"); | |
328 | dump(dest1, align); | |
329 | return -1; | |
330 | } | |
331 | ||
332 | if (0 != memcmp(dest_ref2, dest2, size)) { | |
333 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest); | |
334 | dump_matrix(efence_buffs, 5, TEST_SOURCES); | |
335 | printf("dprod_base:"); | |
336 | dump(dest_ref2, align); | |
337 | printf("dprod_dut:"); | |
338 | dump(dest2, align); | |
339 | return -1; | |
340 | } | |
341 | ||
342 | putchar('.'); | |
343 | } | |
344 | ||
345 | // Test rand ptr alignment if available | |
346 | ||
347 | for (rtest = 0; rtest < RANDOMS; rtest++) { | |
348 | size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1); | |
349 | srcs = rand() % TEST_SOURCES; | |
350 | if (srcs == 0) | |
351 | continue; | |
352 | ||
353 | offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B; | |
354 | // Add random offsets | |
355 | for (i = 0; i < srcs; i++) | |
356 | ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset)); | |
357 | ||
358 | udest_ptrs[0] = dest1 + (rand() & (PTR_ALIGN_CHK_B - offset)); | |
359 | udest_ptrs[1] = dest2 + (rand() & (PTR_ALIGN_CHK_B - offset)); | |
360 | ||
361 | memset(dest1, 0, TEST_LEN); // zero pad to check write-over | |
362 | memset(dest2, 0, TEST_LEN); | |
363 | ||
364 | for (i = 0; i < srcs; i++) | |
365 | for (j = 0; j < size; j++) | |
366 | ubuffs[i][j] = rand(); | |
367 | ||
368 | for (i = 0; i < srcs; i++) { | |
369 | g1[i] = rand(); | |
370 | g2[i] = rand(); | |
371 | } | |
372 | ||
373 | for (i = 0; i < srcs; i++) { | |
374 | gf_vect_mul_init(g1[i], &g_tbls[i * 32]); | |
375 | gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); | |
376 | } | |
377 | ||
378 | gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref1); | |
379 | gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], ubuffs, dest_ref2); | |
380 | ||
381 | FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptrs); | |
382 | ||
383 | if (memcmp(dest_ref1, udest_ptrs[0], size)) { | |
384 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", | |
385 | srcs); | |
386 | dump_matrix(ubuffs, 5, TEST_SOURCES); | |
387 | printf("dprod_base:"); | |
388 | dump(dest_ref1, 25); | |
389 | printf("dprod_dut:"); | |
390 | dump(udest_ptrs[0], 25); | |
391 | return -1; | |
392 | } | |
393 | if (memcmp(dest_ref2, udest_ptrs[1], size)) { | |
394 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", | |
395 | srcs); | |
396 | dump_matrix(ubuffs, 5, TEST_SOURCES); | |
397 | printf("dprod_base:"); | |
398 | dump(dest_ref2, 25); | |
399 | printf("dprod_dut:"); | |
400 | dump(udest_ptrs[1], 25); | |
401 | return -1; | |
402 | } | |
403 | // Confirm that padding around dests is unchanged | |
404 | memset(dest_ref1, 0, PTR_ALIGN_CHK_B); // Make reference zero buff | |
405 | offset = udest_ptrs[0] - dest1; | |
406 | ||
407 | if (memcmp(dest1, dest_ref1, offset)) { | |
408 | printf("Fail rand ualign pad1 start\n"); | |
409 | return -1; | |
410 | } | |
411 | if (memcmp(dest1 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { | |
412 | printf("Fail rand ualign pad1 end\n"); | |
413 | return -1; | |
414 | } | |
415 | ||
416 | offset = udest_ptrs[1] - dest2; | |
417 | if (memcmp(dest2, dest_ref1, offset)) { | |
418 | printf("Fail rand ualign pad2 start\n"); | |
419 | return -1; | |
420 | } | |
421 | if (memcmp(dest2 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { | |
422 | printf("Fail rand ualign pad2 end\n"); | |
423 | return -1; | |
424 | } | |
425 | ||
426 | putchar('.'); | |
427 | } | |
428 | ||
429 | // Test all size alignment | |
430 | align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16; | |
431 | ||
432 | for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) { | |
433 | srcs = TEST_SOURCES; | |
434 | ||
435 | for (i = 0; i < srcs; i++) | |
436 | for (j = 0; j < size; j++) | |
437 | buffs[i][j] = rand(); | |
438 | ||
439 | for (i = 0; i < srcs; i++) { | |
440 | g1[i] = rand(); | |
441 | g2[i] = rand(); | |
442 | } | |
443 | ||
444 | for (i = 0; i < srcs; i++) { | |
445 | gf_vect_mul_init(g1[i], &g_tbls[i * 32]); | |
446 | gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); | |
447 | } | |
448 | ||
449 | gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref1); | |
450 | gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], buffs, dest_ref2); | |
451 | ||
452 | FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest_ptrs); | |
453 | ||
454 | if (memcmp(dest_ref1, dest_ptrs[0], size)) { | |
455 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", | |
456 | size); | |
457 | dump_matrix(buffs, 5, TEST_SOURCES); | |
458 | printf("dprod_base:"); | |
459 | dump(dest_ref1, 25); | |
460 | printf("dprod_dut:"); | |
461 | dump(dest_ptrs[0], 25); | |
462 | return -1; | |
463 | } | |
464 | if (memcmp(dest_ref2, dest_ptrs[1], size)) { | |
465 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", | |
466 | size); | |
467 | dump_matrix(buffs, 5, TEST_SOURCES); | |
468 | printf("dprod_base:"); | |
469 | dump(dest_ref2, 25); | |
470 | printf("dprod_dut:"); | |
471 | dump(dest_ptrs[1], 25); | |
472 | return -1; | |
473 | } | |
474 | } | |
475 | ||
476 | printf("Pass\n"); | |
477 | return 0; | |
478 | ||
479 | } |
96 | 96 | %define return rax |
97 | 97 | %define return.w eax |
98 | 98 | |
99 | %define func(x) x: | |
99 | %define func(x) x: endbranch | |
100 | 100 | %define FUNC_SAVE |
101 | 101 | %define FUNC_RESTORE |
102 | 102 | %endif |
154 | 154 | |
155 | 155 | |
156 | 156 | align 16 |
157 | global gf_2vect_mad_avx:ISAL_SYM_TYPE_FUNCTION | |
157 | mk_global gf_2vect_mad_avx, function | |
158 | 158 | |
159 | 159 | func(gf_2vect_mad_avx) |
160 | 160 | FUNC_SAVE |
103 | 103 | %define return rax |
104 | 104 | %define return.w eax |
105 | 105 | |
106 | %define func(x) x: | |
106 | %define func(x) x: endbranch | |
107 | 107 | %define FUNC_SAVE |
108 | 108 | %define FUNC_RESTORE |
109 | 109 | %endif |
162 | 162 | %define xtmpd2 ymm9 |
163 | 163 | |
164 | 164 | align 16 |
165 | global gf_2vect_mad_avx2:ISAL_SYM_TYPE_FUNCTION | |
165 | mk_global gf_2vect_mad_avx2, function | |
166 | 166 | |
167 | 167 | func(gf_2vect_mad_avx2) |
168 | 168 | FUNC_SAVE |
44 | 44 | %define tmp r11 |
45 | 45 | %define tmp2 r10 |
46 | 46 | %define return rax |
47 | %define func(x) x: | |
47 | %define func(x) x: endbranch | |
48 | 48 | %define FUNC_SAVE |
49 | 49 | %define FUNC_RESTORE |
50 | 50 | %endif |
148 | 148 | %define xmask0f zmm14 |
149 | 149 | |
150 | 150 | align 16 |
151 | global gf_2vect_mad_avx512:ISAL_SYM_TYPE_FUNCTION | |
151 | mk_global gf_2vect_mad_avx512, function | |
152 | 152 | func(gf_2vect_mad_avx512) |
153 | 153 | FUNC_SAVE |
154 | 154 | sub len, 64 |
96 | 96 | %define return rax |
97 | 97 | %define return.w eax |
98 | 98 | |
99 | %define func(x) x: | |
99 | %define func(x) x: endbranch | |
100 | 100 | %define FUNC_SAVE |
101 | 101 | %define FUNC_RESTORE |
102 | 102 | %endif |
153 | 153 | |
154 | 154 | |
155 | 155 | align 16 |
156 | global gf_2vect_mad_sse:ISAL_SYM_TYPE_FUNCTION | |
156 | mk_global gf_2vect_mad_sse, function | |
157 | 157 | func(gf_2vect_mad_sse) |
158 | 158 | FUNC_SAVE |
159 | 159 | sub len, 16 |
51 | 51 | %define PS 8 |
52 | 52 | %define LOG_PS 3 |
53 | 53 | |
54 | %define func(x) x: | |
54 | %define func(x) x: endbranch | |
55 | 55 | %macro FUNC_SAVE 0 |
56 | 56 | push r12 |
57 | 57 | push r13 |
86 | 86 | %define func(x) proc_frame x |
87 | 87 | %macro FUNC_SAVE 0 |
88 | 88 | alloc_stack stack_size |
89 | save_xmm128 xmm6, 0*16 | |
90 | save_xmm128 xmm7, 1*16 | |
91 | save_xmm128 xmm8, 2*16 | |
92 | save_xmm128 xmm9, 3*16 | |
93 | save_xmm128 xmm10, 4*16 | |
94 | save_xmm128 xmm11, 5*16 | |
89 | vmovdqa [rsp + 0*16], xmm6 | |
90 | vmovdqa [rsp + 1*16], xmm7 | |
91 | vmovdqa [rsp + 2*16], xmm8 | |
92 | vmovdqa [rsp + 3*16], xmm9 | |
93 | vmovdqa [rsp + 4*16], xmm10 | |
94 | vmovdqa [rsp + 5*16], xmm11 | |
95 | 95 | save_reg r12, 6*16 + 0*8 |
96 | 96 | save_reg r13, 6*16 + 1*8 |
97 | 97 | save_reg r14, 6*16 + 2*8 |
138 | 138 | |
139 | 139 | %define PS 4 |
140 | 140 | %define LOG_PS 2 |
141 | %define func(x) x: | |
141 | %define func(x) x: endbranch | |
142 | 142 | %define arg(x) [ebp + PS*2 + PS*x] |
143 | 143 | %define var(x) [ebp - PS - PS*x] |
144 | 144 | |
260 | 260 | %endif |
261 | 261 | |
262 | 262 | align 16 |
263 | global gf_3vect_dot_prod_avx:ISAL_SYM_TYPE_FUNCTION | |
263 | mk_global gf_3vect_dot_prod_avx, function | |
264 | 264 | func(gf_3vect_dot_prod_avx) |
265 | 265 | FUNC_SAVE |
266 | 266 | SLDR len, len_m |
53 | 53 | %define PS 8 |
54 | 54 | %define LOG_PS 3 |
55 | 55 | |
56 | %define func(x) x: | |
56 | %define func(x) x: endbranch | |
57 | 57 | %macro FUNC_SAVE 0 |
58 | 58 | push r12 |
59 | 59 | push r13 |
142 | 142 | |
143 | 143 | %define PS 4 |
144 | 144 | %define LOG_PS 2 |
145 | %define func(x) x: | |
145 | %define func(x) x: endbranch | |
146 | 146 | %define arg(x) [ebp + PS*2 + PS*x] |
147 | 147 | %define var(x) [ebp - PS - PS*x] |
148 | 148 | |
268 | 268 | %endif |
269 | 269 | |
270 | 270 | align 16 |
271 | global gf_3vect_dot_prod_avx2:ISAL_SYM_TYPE_FUNCTION | |
271 | mk_global gf_3vect_dot_prod_avx2, function | |
272 | 272 | func(gf_3vect_dot_prod_avx2) |
273 | 273 | FUNC_SAVE |
274 | 274 | SLDR len, len_m |
52 | 52 | %define PS 8 |
53 | 53 | %define LOG_PS 3 |
54 | 54 | |
55 | %define func(x) x: | |
55 | %define func(x) x: endbranch | |
56 | 56 | %macro FUNC_SAVE 0 |
57 | 57 | push r12 |
58 | 58 | push r13 |
172 | 172 | section .text |
173 | 173 | |
174 | 174 | align 16 |
175 | global gf_3vect_dot_prod_avx512:ISAL_SYM_TYPE_FUNCTION | |
175 | mk_global gf_3vect_dot_prod_avx512, function | |
176 | 176 | func(gf_3vect_dot_prod_avx512) |
177 | 177 | FUNC_SAVE |
178 | 178 | sub len, 64 |
51 | 51 | %define PS 8 |
52 | 52 | %define LOG_PS 3 |
53 | 53 | |
54 | %define func(x) x: | |
54 | %define func(x) x: endbranch | |
55 | 55 | %macro FUNC_SAVE 0 |
56 | 56 | push r12 |
57 | 57 | push r13 |
138 | 138 | |
139 | 139 | %define PS 4 |
140 | 140 | %define LOG_PS 2 |
141 | %define func(x) x: | |
141 | %define func(x) x: endbranch | |
142 | 142 | %define arg(x) [ebp + PS*2 + PS*x] |
143 | 143 | %define var(x) [ebp - PS - PS*x] |
144 | 144 | |
260 | 260 | %endif |
261 | 261 | |
262 | 262 | align 16 |
263 | global gf_3vect_dot_prod_sse:ISAL_SYM_TYPE_FUNCTION | |
263 | mk_global gf_3vect_dot_prod_sse, function | |
264 | 264 | func(gf_3vect_dot_prod_sse) |
265 | 265 | FUNC_SAVE |
266 | 266 | SLDR len, len_m |
0 | /********************************************************************** | |
1 | Copyright(c) 2011-2015 Intel Corporation All rights reserved. | |
2 | ||
3 | Redistribution and use in source and binary forms, with or without | |
4 | modification, are permitted provided that the following conditions | |
5 | are met: | |
6 | * Redistributions of source code must retain the above copyright | |
7 | notice, this list of conditions and the following disclaimer. | |
8 | * Redistributions in binary form must reproduce the above copyright | |
9 | notice, this list of conditions and the following disclaimer in | |
10 | the documentation and/or other materials provided with the | |
11 | distribution. | |
12 | * Neither the name of Intel Corporation nor the names of its | |
13 | contributors may be used to endorse or promote products derived | |
14 | from this software without specific prior written permission. | |
15 | ||
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
17 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
18 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
19 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
20 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
21 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
22 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
23 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
24 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
27 | **********************************************************************/ | |
28 | ||
29 | #include <stdio.h> | |
30 | #include <stdlib.h> | |
31 | #include <string.h> // for memset, memcmp | |
32 | #include "erasure_code.h" | |
33 | #include "types.h" | |
34 | ||
35 | #ifndef FUNCTION_UNDER_TEST | |
36 | # define FUNCTION_UNDER_TEST gf_3vect_dot_prod_sse | |
37 | #endif | |
38 | #ifndef TEST_MIN_SIZE | |
39 | # define TEST_MIN_SIZE 16 | |
40 | #endif | |
41 | ||
42 | #define str(s) #s | |
43 | #define xstr(s) str(s) | |
44 | ||
45 | #define TEST_LEN 8192 | |
46 | #define TEST_SIZE (TEST_LEN/2) | |
47 | #define TEST_MEM TEST_SIZE | |
48 | #define TEST_LOOPS 10000 | |
49 | #define TEST_TYPE_STR "" | |
50 | ||
51 | #ifndef TEST_SOURCES | |
52 | # define TEST_SOURCES 16 | |
53 | #endif | |
54 | #ifndef RANDOMS | |
55 | # define RANDOMS 20 | |
56 | #endif | |
57 | ||
58 | #ifdef EC_ALIGNED_ADDR | |
59 | // Define power of 2 range to check ptr, len alignment | |
60 | # define PTR_ALIGN_CHK_B 0 | |
61 | # define LEN_ALIGN_CHK_B 0 // 0 for aligned only | |
62 | #else | |
63 | // Define power of 2 range to check ptr, len alignment | |
64 | # define PTR_ALIGN_CHK_B 32 | |
65 | # define LEN_ALIGN_CHK_B 32 // 0 for aligned only | |
66 | #endif | |
67 | ||
68 | typedef unsigned char u8; | |
69 | ||
70 | extern void FUNCTION_UNDER_TEST(int len, int vlen, unsigned char *gftbls, | |
71 | unsigned char **src, unsigned char **dest); | |
72 | ||
73 | void dump(unsigned char *buf, int len) | |
74 | { | |
75 | int i; | |
76 | for (i = 0; i < len;) { | |
77 | printf(" %2x", 0xff & buf[i++]); | |
78 | if (i % 32 == 0) | |
79 | printf("\n"); | |
80 | } | |
81 | printf("\n"); | |
82 | } | |
83 | ||
84 | void dump_matrix(unsigned char **s, int k, int m) | |
85 | { | |
86 | int i, j; | |
87 | for (i = 0; i < k; i++) { | |
88 | for (j = 0; j < m; j++) { | |
89 | printf(" %2x", s[i][j]); | |
90 | } | |
91 | printf("\n"); | |
92 | } | |
93 | printf("\n"); | |
94 | } | |
95 | ||
96 | void dump_u8xu8(unsigned char *s, int k, int m) | |
97 | { | |
98 | int i, j; | |
99 | for (i = 0; i < k; i++) { | |
100 | for (j = 0; j < m; j++) { | |
101 | printf(" %2x", 0xff & s[j + (i * m)]); | |
102 | } | |
103 | printf("\n"); | |
104 | } | |
105 | printf("\n"); | |
106 | } | |
107 | ||
108 | int main(int argc, char *argv[]) | |
109 | { | |
110 | int i, j, rtest, srcs; | |
111 | void *buf; | |
112 | u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g3[TEST_SOURCES]; | |
113 | u8 g_tbls[3 * TEST_SOURCES * 32], *dest_ptrs[3], *buffs[TEST_SOURCES]; | |
114 | u8 *dest1, *dest2, *dest3, *dest_ref1, *dest_ref2, *dest_ref3; | |
115 | ||
116 | int align, size; | |
117 | unsigned char *efence_buffs[TEST_SOURCES]; | |
118 | unsigned int offset; | |
119 | u8 *ubuffs[TEST_SOURCES]; | |
120 | u8 *udest_ptrs[3]; | |
121 | printf(xstr(FUNCTION_UNDER_TEST) "_test: %dx%d ", TEST_SOURCES, TEST_LEN); | |
122 | ||
123 | // Allocate the arrays | |
124 | for (i = 0; i < TEST_SOURCES; i++) { | |
125 | if (posix_memalign(&buf, 64, TEST_LEN)) { | |
126 | printf("alloc error: Fail"); | |
127 | return -1; | |
128 | } | |
129 | buffs[i] = buf; | |
130 | } | |
131 | ||
132 | if (posix_memalign(&buf, 64, TEST_LEN)) { | |
133 | printf("alloc error: Fail"); | |
134 | return -1; | |
135 | } | |
136 | dest1 = buf; | |
137 | ||
138 | if (posix_memalign(&buf, 64, TEST_LEN)) { | |
139 | printf("alloc error: Fail"); | |
140 | return -1; | |
141 | } | |
142 | dest2 = buf; | |
143 | ||
144 | if (posix_memalign(&buf, 64, TEST_LEN)) { | |
145 | printf("alloc error: Fail"); | |
146 | return -1; | |
147 | } | |
148 | dest3 = buf; | |
149 | ||
150 | if (posix_memalign(&buf, 64, TEST_LEN)) { | |
151 | printf("alloc error: Fail"); | |
152 | return -1; | |
153 | } | |
154 | dest_ref1 = buf; | |
155 | ||
156 | if (posix_memalign(&buf, 64, TEST_LEN)) { | |
157 | printf("alloc error: Fail");; | |
158 | return -1; | |
159 | } | |
160 | dest_ref2 = buf; | |
161 | ||
162 | if (posix_memalign(&buf, 64, TEST_LEN)) { | |
163 | printf("alloc error: Fail"); | |
164 | return -1; | |
165 | } | |
166 | dest_ref3 = buf; | |
167 | ||
168 | dest_ptrs[0] = dest1; | |
169 | dest_ptrs[1] = dest2; | |
170 | dest_ptrs[2] = dest3; | |
171 | ||
172 | // Test of all zeros | |
173 | for (i = 0; i < TEST_SOURCES; i++) | |
174 | memset(buffs[i], 0, TEST_LEN); | |
175 | ||
176 | memset(dest1, 0, TEST_LEN); | |
177 | memset(dest2, 0, TEST_LEN); | |
178 | memset(dest3, 0, TEST_LEN); | |
179 | memset(dest_ref1, 0, TEST_LEN); | |
180 | memset(dest_ref2, 0, TEST_LEN); | |
181 | memset(dest_ref3, 0, TEST_LEN); | |
182 | memset(g1, 2, TEST_SOURCES); | |
183 | memset(g2, 1, TEST_SOURCES); | |
184 | memset(g3, 7, TEST_SOURCES); | |
185 | ||
186 | for (i = 0; i < TEST_SOURCES; i++) { | |
187 | gf_vect_mul_init(g1[i], &g_tbls[i * 32]); | |
188 | gf_vect_mul_init(g2[i], &g_tbls[32 * TEST_SOURCES + i * 32]); | |
189 | gf_vect_mul_init(g3[i], &g_tbls[64 * TEST_SOURCES + i * 32]); | |
190 | } | |
191 | ||
192 | gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); | |
193 | gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs, | |
194 | dest_ref2); | |
195 | gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], buffs, | |
196 | dest_ref3); | |
197 | ||
198 | FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); | |
199 | ||
200 | if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { | |
201 | printf("Fail zero" xstr(FUNCTION_UNDER_TEST) " test1\n"); | |
202 | dump_matrix(buffs, 5, TEST_SOURCES); | |
203 | printf("dprod_base:"); | |
204 | dump(dest_ref1, 25); | |
205 | printf("dprod_dut:"); | |
206 | dump(dest1, 25); | |
207 | return -1; | |
208 | } | |
209 | if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { | |
210 | printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test2\n"); | |
211 | dump_matrix(buffs, 5, TEST_SOURCES); | |
212 | printf("dprod_base:"); | |
213 | dump(dest_ref2, 25); | |
214 | printf("dprod_dut:"); | |
215 | dump(dest2, 25); | |
216 | return -1; | |
217 | } | |
218 | if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { | |
219 | printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test3\n"); | |
220 | dump_matrix(buffs, 5, TEST_SOURCES); | |
221 | printf("dprod_base:"); | |
222 | dump(dest_ref3, 25); | |
223 | printf("dprod_dut:"); | |
224 | dump(dest3, 25); | |
225 | return -1; | |
226 | } | |
227 | ||
228 | putchar('.'); | |
229 | ||
230 | // Rand data test | |
231 | ||
232 | for (rtest = 0; rtest < RANDOMS; rtest++) { | |
233 | for (i = 0; i < TEST_SOURCES; i++) | |
234 | for (j = 0; j < TEST_LEN; j++) | |
235 | buffs[i][j] = rand(); | |
236 | ||
237 | for (i = 0; i < TEST_SOURCES; i++) { | |
238 | g1[i] = rand(); | |
239 | g2[i] = rand(); | |
240 | g3[i] = rand(); | |
241 | } | |
242 | ||
243 | for (i = 0; i < TEST_SOURCES; i++) { | |
244 | gf_vect_mul_init(g1[i], &g_tbls[i * 32]); | |
245 | gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]); | |
246 | gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]); | |
247 | } | |
248 | ||
249 | gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); | |
250 | gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], | |
251 | buffs, dest_ref2); | |
252 | gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], | |
253 | buffs, dest_ref3); | |
254 | ||
255 | FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); | |
256 | ||
257 | if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { | |
258 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest); | |
259 | dump_matrix(buffs, 5, TEST_SOURCES); | |
260 | printf("dprod_base:"); | |
261 | dump(dest_ref1, 25); | |
262 | printf("dprod_dut:"); | |
263 | dump(dest1, 25); | |
264 | return -1; | |
265 | } | |
266 | if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { | |
267 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest); | |
268 | dump_matrix(buffs, 5, TEST_SOURCES); | |
269 | printf("dprod_base:"); | |
270 | dump(dest_ref2, 25); | |
271 | printf("dprod_dut:"); | |
272 | dump(dest2, 25); | |
273 | return -1; | |
274 | } | |
275 | if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { | |
276 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest); | |
277 | dump_matrix(buffs, 5, TEST_SOURCES); | |
278 | printf("dprod_base:"); | |
279 | dump(dest_ref3, 25); | |
280 | printf("dprod_dut:"); | |
281 | dump(dest3, 25); | |
282 | return -1; | |
283 | } | |
284 | ||
285 | putchar('.'); | |
286 | } | |
287 | ||
288 | // Rand data test with varied parameters | |
289 | for (rtest = 0; rtest < RANDOMS; rtest++) { | |
290 | for (srcs = TEST_SOURCES; srcs > 0; srcs--) { | |
291 | for (i = 0; i < srcs; i++) | |
292 | for (j = 0; j < TEST_LEN; j++) | |
293 | buffs[i][j] = rand(); | |
294 | ||
295 | for (i = 0; i < srcs; i++) { | |
296 | g1[i] = rand(); | |
297 | g2[i] = rand(); | |
298 | g3[i] = rand(); | |
299 | } | |
300 | ||
301 | for (i = 0; i < srcs; i++) { | |
302 | gf_vect_mul_init(g1[i], &g_tbls[i * 32]); | |
303 | gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); | |
304 | gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]); | |
305 | } | |
306 | ||
307 | gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref1); | |
308 | gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[32 * srcs], buffs, | |
309 | dest_ref2); | |
310 | gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[64 * srcs], buffs, | |
311 | dest_ref3); | |
312 | ||
313 | FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest_ptrs); | |
314 | ||
315 | if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { | |
316 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) | |
317 | " test1 srcs=%d\n", srcs); | |
318 | dump_matrix(buffs, 5, TEST_SOURCES); | |
319 | printf("dprod_base:"); | |
320 | dump(dest_ref1, 25); | |
321 | printf("dprod_dut:"); | |
322 | dump(dest1, 25); | |
323 | return -1; | |
324 | } | |
325 | if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { | |
326 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) | |
327 | " test2 srcs=%d\n", srcs); | |
328 | dump_matrix(buffs, 5, TEST_SOURCES); | |
329 | printf("dprod_base:"); | |
330 | dump(dest_ref2, 25); | |
331 | printf("dprod_dut:"); | |
332 | dump(dest2, 25); | |
333 | return -1; | |
334 | } | |
335 | if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { | |
336 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) | |
337 | " test3 srcs=%d\n", srcs); | |
338 | dump_matrix(buffs, 5, TEST_SOURCES); | |
339 | printf("dprod_base:"); | |
340 | dump(dest_ref3, 25); | |
341 | printf("dprod_dut:"); | |
342 | dump(dest3, 25); | |
343 | return -1; | |
344 | } | |
345 | ||
346 | putchar('.'); | |
347 | } | |
348 | } | |
349 | ||
350 | // Run tests at end of buffer for Electric Fence | |
351 | align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16; | |
352 | for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) { | |
353 | for (i = 0; i < TEST_SOURCES; i++) | |
354 | for (j = 0; j < TEST_LEN; j++) | |
355 | buffs[i][j] = rand(); | |
356 | ||
357 | for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end | |
358 | efence_buffs[i] = buffs[i] + TEST_LEN - size; | |
359 | ||
360 | for (i = 0; i < TEST_SOURCES; i++) { | |
361 | g1[i] = rand(); | |
362 | g2[i] = rand(); | |
363 | g3[i] = rand(); | |
364 | } | |
365 | ||
366 | for (i = 0; i < TEST_SOURCES; i++) { | |
367 | gf_vect_mul_init(g1[i], &g_tbls[i * 32]); | |
368 | gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]); | |
369 | gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]); | |
370 | } | |
371 | ||
372 | gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref1); | |
373 | gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], | |
374 | efence_buffs, dest_ref2); | |
375 | gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], | |
376 | efence_buffs, dest_ref3); | |
377 | ||
378 | FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest_ptrs); | |
379 | ||
380 | if (0 != memcmp(dest_ref1, dest1, size)) { | |
381 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest); | |
382 | dump_matrix(efence_buffs, 5, TEST_SOURCES); | |
383 | printf("dprod_base:"); | |
384 | dump(dest_ref1, align); | |
385 | printf("dprod_dut:"); | |
386 | dump(dest1, align); | |
387 | return -1; | |
388 | } | |
389 | ||
390 | if (0 != memcmp(dest_ref2, dest2, size)) { | |
391 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest); | |
392 | dump_matrix(efence_buffs, 5, TEST_SOURCES); | |
393 | printf("dprod_base:"); | |
394 | dump(dest_ref2, align); | |
395 | printf("dprod_dut:"); | |
396 | dump(dest2, align); | |
397 | return -1; | |
398 | } | |
399 | ||
400 | if (0 != memcmp(dest_ref3, dest3, size)) { | |
401 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest); | |
402 | dump_matrix(efence_buffs, 5, TEST_SOURCES); | |
403 | printf("dprod_base:"); | |
404 | dump(dest_ref3, align); | |
405 | printf("dprod_dut:"); | |
406 | dump(dest3, align); | |
407 | return -1; | |
408 | } | |
409 | ||
410 | putchar('.'); | |
411 | } | |
412 | ||
413 | // Test rand ptr alignment if available | |
414 | ||
415 | for (rtest = 0; rtest < RANDOMS; rtest++) { | |
416 | size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1); | |
417 | srcs = rand() % TEST_SOURCES; | |
418 | if (srcs == 0) | |
419 | continue; | |
420 | ||
421 | offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B; | |
422 | // Add random offsets | |
423 | for (i = 0; i < srcs; i++) | |
424 | ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset)); | |
425 | ||
426 | udest_ptrs[0] = dest1 + (rand() & (PTR_ALIGN_CHK_B - offset)); | |
427 | udest_ptrs[1] = dest2 + (rand() & (PTR_ALIGN_CHK_B - offset)); | |
428 | udest_ptrs[2] = dest3 + (rand() & (PTR_ALIGN_CHK_B - offset)); | |
429 | ||
430 | memset(dest1, 0, TEST_LEN); // zero pad to check write-over | |
431 | memset(dest2, 0, TEST_LEN); | |
432 | memset(dest3, 0, TEST_LEN); | |
433 | ||
434 | for (i = 0; i < srcs; i++) | |
435 | for (j = 0; j < size; j++) | |
436 | ubuffs[i][j] = rand(); | |
437 | ||
438 | for (i = 0; i < srcs; i++) { | |
439 | g1[i] = rand(); | |
440 | g2[i] = rand(); | |
441 | g3[i] = rand(); | |
442 | } | |
443 | ||
444 | for (i = 0; i < srcs; i++) { | |
445 | gf_vect_mul_init(g1[i], &g_tbls[i * 32]); | |
446 | gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); | |
447 | gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]); | |
448 | } | |
449 | ||
450 | gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref1); | |
451 | gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], ubuffs, dest_ref2); | |
452 | gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], ubuffs, dest_ref3); | |
453 | ||
454 | FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptrs); | |
455 | ||
456 | if (memcmp(dest_ref1, udest_ptrs[0], size)) { | |
457 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", | |
458 | srcs); | |
459 | dump_matrix(ubuffs, 5, TEST_SOURCES); | |
460 | printf("dprod_base:"); | |
461 | dump(dest_ref1, 25); | |
462 | printf("dprod_dut:"); | |
463 | dump(udest_ptrs[0], 25); | |
464 | return -1; | |
465 | } | |
466 | if (memcmp(dest_ref2, udest_ptrs[1], size)) { | |
467 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", | |
468 | srcs); | |
469 | dump_matrix(ubuffs, 5, TEST_SOURCES); | |
470 | printf("dprod_base:"); | |
471 | dump(dest_ref2, 25); | |
472 | printf("dprod_dut:"); | |
473 | dump(udest_ptrs[1], 25); | |
474 | return -1; | |
475 | } | |
476 | if (memcmp(dest_ref3, udest_ptrs[2], size)) { | |
477 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", | |
478 | srcs); | |
479 | dump_matrix(ubuffs, 5, TEST_SOURCES); | |
480 | printf("dprod_base:"); | |
481 | dump(dest_ref3, 25); | |
482 | printf("dprod_dut:"); | |
483 | dump(udest_ptrs[2], 25); | |
484 | return -1; | |
485 | } | |
486 | // Confirm that padding around dests is unchanged | |
487 | memset(dest_ref1, 0, PTR_ALIGN_CHK_B); // Make reference zero buff | |
488 | offset = udest_ptrs[0] - dest1; | |
489 | ||
490 | if (memcmp(dest1, dest_ref1, offset)) { | |
491 | printf("Fail rand ualign pad1 start\n"); | |
492 | return -1; | |
493 | } | |
494 | if (memcmp(dest1 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { | |
495 | printf("Fail rand ualign pad1 end\n"); | |
496 | return -1; | |
497 | } | |
498 | ||
499 | offset = udest_ptrs[1] - dest2; | |
500 | if (memcmp(dest2, dest_ref1, offset)) { | |
501 | printf("Fail rand ualign pad2 start\n"); | |
502 | return -1; | |
503 | } | |
504 | if (memcmp(dest2 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { | |
505 | printf("Fail rand ualign pad2 end\n"); | |
506 | return -1; | |
507 | } | |
508 | ||
509 | offset = udest_ptrs[2] - dest3; | |
510 | if (memcmp(dest3, dest_ref1, offset)) { | |
511 | printf("Fail rand ualign pad3 start\n"); | |
512 | return -1; | |
513 | } | |
514 | if (memcmp(dest3 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { | |
515 | printf("Fail rand ualign pad3 end\n");; | |
516 | return -1; | |
517 | } | |
518 | ||
519 | putchar('.'); | |
520 | } | |
521 | ||
522 | // Test all size alignment | |
523 | align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16; | |
524 | ||
525 | for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) { | |
526 | srcs = TEST_SOURCES; | |
527 | ||
528 | for (i = 0; i < srcs; i++) | |
529 | for (j = 0; j < size; j++) | |
530 | buffs[i][j] = rand(); | |
531 | ||
532 | for (i = 0; i < srcs; i++) { | |
533 | g1[i] = rand(); | |
534 | g2[i] = rand(); | |
535 | g3[i] = rand(); | |
536 | } | |
537 | ||
538 | for (i = 0; i < srcs; i++) { | |
539 | gf_vect_mul_init(g1[i], &g_tbls[i * 32]); | |
540 | gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); | |
541 | gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]); | |
542 | } | |
543 | ||
544 | gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref1); | |
545 | gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], buffs, dest_ref2); | |
546 | gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], buffs, dest_ref3); | |
547 | ||
548 | FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest_ptrs); | |
549 | ||
550 | if (memcmp(dest_ref1, dest_ptrs[0], size)) { | |
551 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", | |
552 | size); | |
553 | dump_matrix(buffs, 5, TEST_SOURCES); | |
554 | printf("dprod_base:"); | |
555 | dump(dest_ref1, 25); | |
556 | printf("dprod_dut:"); | |
557 | dump(dest_ptrs[0], 25); | |
558 | return -1; | |
559 | } | |
560 | if (memcmp(dest_ref2, dest_ptrs[1], size)) { | |
561 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", | |
562 | size); | |
563 | dump_matrix(buffs, 5, TEST_SOURCES); | |
564 | printf("dprod_base:"); | |
565 | dump(dest_ref2, 25); | |
566 | printf("dprod_dut:"); | |
567 | dump(dest_ptrs[1], 25); | |
568 | return -1; | |
569 | } | |
570 | if (memcmp(dest_ref3, dest_ptrs[2], size)) { | |
571 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", | |
572 | size); | |
573 | dump_matrix(buffs, 5, TEST_SOURCES); | |
574 | printf("dprod_base:"); | |
575 | dump(dest_ref3, 25); | |
576 | printf("dprod_dut:"); | |
577 | dump(dest_ptrs[2], 25); | |
578 | return -1; | |
579 | } | |
580 | } | |
581 | ||
582 | printf("Pass\n"); | |
583 | return 0; | |
584 | ||
585 | } |
96 | 96 | %define return rax |
97 | 97 | %define return.w eax |
98 | 98 | |
99 | %define func(x) x: | |
99 | %define func(x) x: endbranch | |
100 | 100 | %define FUNC_SAVE |
101 | 101 | %define FUNC_RESTORE |
102 | 102 | %endif |
157 | 157 | %define xd3 xtmph1 |
158 | 158 | |
159 | 159 | align 16 |
160 | global gf_3vect_mad_avx:ISAL_SYM_TYPE_FUNCTION | |
160 | mk_global gf_3vect_mad_avx, function | |
161 | 161 | func(gf_3vect_mad_avx) |
162 | 162 | FUNC_SAVE |
163 | 163 | sub len, 16 |
102 | 102 | %define return rax |
103 | 103 | %define return.w eax |
104 | 104 | |
105 | %define func(x) x: | |
105 | %define func(x) x: endbranch | |
106 | 106 | %define FUNC_SAVE |
107 | 107 | %define FUNC_RESTORE |
108 | 108 | %endif |
164 | 164 | %define xd3 ymm10 |
165 | 165 | |
166 | 166 | align 16 |
167 | global gf_3vect_mad_avx2:ISAL_SYM_TYPE_FUNCTION | |
167 | mk_global gf_3vect_mad_avx2, function | |
168 | 168 | func(gf_3vect_mad_avx2) |
169 | 169 | FUNC_SAVE |
170 | 170 | sub len, 32 |
43 | 43 | %define arg5 r9 |
44 | 44 | %define tmp r11 |
45 | 45 | %define return rax |
46 | %define func(x) x: | |
46 | %define func(x) x: endbranch | |
47 | 47 | %define FUNC_SAVE |
48 | 48 | %define FUNC_RESTORE |
49 | 49 | %endif |
151 | 151 | %define xmask0f zmm17 |
152 | 152 | |
153 | 153 | align 16 |
154 | global gf_3vect_mad_avx512:ISAL_SYM_TYPE_FUNCTION | |
154 | mk_global gf_3vect_mad_avx512, function | |
155 | 155 | func(gf_3vect_mad_avx512) |
156 | 156 | FUNC_SAVE |
157 | 157 | sub len, 64 |
95 | 95 | %define return rax |
96 | 96 | %define return.w eax |
97 | 97 | |
98 | %define func(x) x: | |
98 | %define func(x) x: endbranch | |
99 | 99 | %define FUNC_SAVE |
100 | 100 | %define FUNC_RESTORE |
101 | 101 | %endif |
155 | 155 | %define xd3 xtmph1 |
156 | 156 | |
157 | 157 | align 16 |
158 | global gf_3vect_mad_sse:ISAL_SYM_TYPE_FUNCTION | |
158 | mk_global gf_3vect_mad_sse, function | |
159 | 159 | func(gf_3vect_mad_sse) |
160 | 160 | FUNC_SAVE |
161 | 161 | sub len, 16 |
53 | 53 | %define PS 8 |
54 | 54 | %define LOG_PS 3 |
55 | 55 | |
56 | %define func(x) x: | |
56 | %define func(x) x: endbranch | |
57 | 57 | %macro FUNC_SAVE 0 |
58 | 58 | push r12 |
59 | 59 | push r13 |
94 | 94 | %define func(x) proc_frame x |
95 | 95 | %macro FUNC_SAVE 0 |
96 | 96 | alloc_stack stack_size |
97 | save_xmm128 xmm6, 0*16 | |
98 | save_xmm128 xmm7, 1*16 | |
99 | save_xmm128 xmm8, 2*16 | |
100 | save_xmm128 xmm9, 3*16 | |
101 | save_xmm128 xmm10, 4*16 | |
102 | save_xmm128 xmm11, 5*16 | |
103 | save_xmm128 xmm12, 6*16 | |
104 | save_xmm128 xmm13, 7*16 | |
105 | save_xmm128 xmm14, 8*16 | |
97 | vmovdqa [rsp + 0*16], xmm6 | |
98 | vmovdqa [rsp + 1*16], xmm7 | |
99 | vmovdqa [rsp + 2*16], xmm8 | |
100 | vmovdqa [rsp + 3*16], xmm9 | |
101 | vmovdqa [rsp + 4*16], xmm10 | |
102 | vmovdqa [rsp + 5*16], xmm11 | |
103 | vmovdqa [rsp + 6*16], xmm12 | |
104 | vmovdqa [rsp + 7*16], xmm13 | |
105 | vmovdqa [rsp + 8*16], xmm14 | |
106 | 106 | save_reg r12, 9*16 + 0*8 |
107 | 107 | save_reg r13, 9*16 + 1*8 |
108 | 108 | save_reg r14, 9*16 + 2*8 |
158 | 158 | |
159 | 159 | %define PS 4 |
160 | 160 | %define LOG_PS 2 |
161 | %define func(x) x: | |
161 | %define func(x) x: endbranch | |
162 | 162 | %define arg(x) [ebp + PS*2 + PS*x] |
163 | 163 | %define var(x) [ebp - PS - PS*x] |
164 | 164 | |
293 | 293 | %define xp4 xmm5 |
294 | 294 | %endif |
295 | 295 | align 16 |
296 | global gf_4vect_dot_prod_avx:ISAL_SYM_TYPE_FUNCTION | |
296 | mk_global gf_4vect_dot_prod_avx, function | |
297 | 297 | func(gf_4vect_dot_prod_avx) |
298 | 298 | FUNC_SAVE |
299 | 299 | SLDR len, len_m |
55 | 55 | %define PS 8 |
56 | 56 | %define LOG_PS 3 |
57 | 57 | |
58 | %define func(x) x: | |
58 | %define func(x) x: endbranch | |
59 | 59 | %macro FUNC_SAVE 0 |
60 | 60 | push r12 |
61 | 61 | push r13 |
162 | 162 | |
163 | 163 | %define PS 4 |
164 | 164 | %define LOG_PS 2 |
165 | %define func(x) x: | |
165 | %define func(x) x: endbranch | |
166 | 166 | %define arg(x) [ebp + PS*2 + PS*x] |
167 | 167 | %define var(x) [ebp - PS - PS*x] |
168 | 168 | |
301 | 301 | %define xp4 ymm5 |
302 | 302 | %endif |
303 | 303 | align 16 |
304 | global gf_4vect_dot_prod_avx2:ISAL_SYM_TYPE_FUNCTION | |
304 | mk_global gf_4vect_dot_prod_avx2, function | |
305 | 305 | func(gf_4vect_dot_prod_avx2) |
306 | 306 | FUNC_SAVE |
307 | 307 | SLDR len, len_m |
54 | 54 | %define PS 8 |
55 | 55 | %define LOG_PS 3 |
56 | 56 | |
57 | %define func(x) x: | |
57 | %define func(x) x: endbranch | |
58 | 58 | %macro FUNC_SAVE 0 |
59 | 59 | push r12 |
60 | 60 | push r13 |
190 | 190 | section .text |
191 | 191 | |
192 | 192 | align 16 |
193 | global gf_4vect_dot_prod_avx512:ISAL_SYM_TYPE_FUNCTION | |
193 | mk_global gf_4vect_dot_prod_avx512, function | |
194 | 194 | func(gf_4vect_dot_prod_avx512) |
195 | 195 | FUNC_SAVE |
196 | 196 | sub len, 64 |
53 | 53 | %define PS 8 |
54 | 54 | %define LOG_PS 3 |
55 | 55 | |
56 | %define func(x) x: | |
56 | %define func(x) x: endbranch | |
57 | 57 | %macro FUNC_SAVE 0 |
58 | 58 | push r12 |
59 | 59 | push r13 |
158 | 158 | |
159 | 159 | %define PS 4 |
160 | 160 | %define LOG_PS 2 |
161 | %define func(x) x: | |
161 | %define func(x) x: endbranch | |
162 | 162 | %define arg(x) [ebp + PS*2 + PS*x] |
163 | 163 | %define var(x) [ebp - PS - PS*x] |
164 | 164 | |
293 | 293 | %define xp4 xmm5 |
294 | 294 | %endif |
295 | 295 | align 16 |
296 | global gf_4vect_dot_prod_sse:ISAL_SYM_TYPE_FUNCTION | |
296 | mk_global gf_4vect_dot_prod_sse, function | |
297 | 297 | func(gf_4vect_dot_prod_sse) |
298 | 298 | FUNC_SAVE |
299 | 299 | SLDR len, len_m |
0 | /********************************************************************** | |
1 | Copyright(c) 2011-2015 Intel Corporation All rights reserved. | |
2 | ||
3 | Redistribution and use in source and binary forms, with or without | |
4 | modification, are permitted provided that the following conditions | |
5 | are met: | |
6 | * Redistributions of source code must retain the above copyright | |
7 | notice, this list of conditions and the following disclaimer. | |
8 | * Redistributions in binary form must reproduce the above copyright | |
9 | notice, this list of conditions and the following disclaimer in | |
10 | the documentation and/or other materials provided with the | |
11 | distribution. | |
12 | * Neither the name of Intel Corporation nor the names of its | |
13 | contributors may be used to endorse or promote products derived | |
14 | from this software without specific prior written permission. | |
15 | ||
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
17 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
18 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
19 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
20 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
21 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
22 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
23 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
24 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
27 | **********************************************************************/ | |
28 | ||
29 | #include <stdio.h> | |
30 | #include <stdlib.h> | |
31 | #include <string.h> // for memset, memcmp | |
32 | #include "erasure_code.h" | |
33 | #include "types.h" | |
34 | ||
35 | #ifndef FUNCTION_UNDER_TEST | |
36 | # define FUNCTION_UNDER_TEST gf_4vect_dot_prod_sse | |
37 | #endif | |
38 | #ifndef TEST_MIN_SIZE | |
39 | # define TEST_MIN_SIZE 16 | |
40 | #endif | |
41 | ||
42 | #define str(s) #s | |
43 | #define xstr(s) str(s) | |
44 | ||
45 | #define TEST_LEN 8192 | |
46 | #define TEST_SIZE (TEST_LEN/2) | |
47 | #define TEST_MEM TEST_SIZE | |
48 | #define TEST_LOOPS 10000 | |
49 | #define TEST_TYPE_STR "" | |
50 | ||
51 | #ifndef TEST_SOURCES | |
52 | # define TEST_SOURCES 16 | |
53 | #endif | |
54 | #ifndef RANDOMS | |
55 | # define RANDOMS 20 | |
56 | #endif | |
57 | ||
58 | #ifdef EC_ALIGNED_ADDR | |
59 | // Define power of 2 range to check ptr, len alignment | |
60 | # define PTR_ALIGN_CHK_B 0 | |
61 | # define LEN_ALIGN_CHK_B 0 // 0 for aligned only | |
62 | #else | |
63 | // Define power of 2 range to check ptr, len alignment | |
64 | # define PTR_ALIGN_CHK_B 32 | |
65 | # define LEN_ALIGN_CHK_B 32 // 0 for aligned only | |
66 | #endif | |
67 | ||
68 | typedef unsigned char u8; | |
69 | ||
70 | extern void FUNCTION_UNDER_TEST(int len, int vlen, unsigned char *gftbls, | |
71 | unsigned char **src, unsigned char **dest); | |
72 | ||
73 | void dump(unsigned char *buf, int len) | |
74 | { | |
75 | int i; | |
76 | for (i = 0; i < len;) { | |
77 | printf(" %2x", 0xff & buf[i++]); | |
78 | if (i % 32 == 0) | |
79 | printf("\n"); | |
80 | } | |
81 | printf("\n"); | |
82 | } | |
83 | ||
84 | void dump_matrix(unsigned char **s, int k, int m) | |
85 | { | |
86 | int i, j; | |
87 | for (i = 0; i < k; i++) { | |
88 | for (j = 0; j < m; j++) { | |
89 | printf(" %2x", s[i][j]); | |
90 | } | |
91 | printf("\n"); | |
92 | } | |
93 | printf("\n"); | |
94 | } | |
95 | ||
96 | void dump_u8xu8(unsigned char *s, int k, int m) | |
97 | { | |
98 | int i, j; | |
99 | for (i = 0; i < k; i++) { | |
100 | for (j = 0; j < m; j++) { | |
101 | printf(" %2x", 0xff & s[j + (i * m)]); | |
102 | } | |
103 | printf("\n"); | |
104 | } | |
105 | printf("\n"); | |
106 | } | |
107 | ||
108 | int main(int argc, char *argv[]) | |
109 | { | |
110 | int i, j, rtest, srcs; | |
111 | void *buf; | |
112 | u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g3[TEST_SOURCES]; | |
113 | u8 g4[TEST_SOURCES], g_tbls[4 * TEST_SOURCES * 32], *buffs[TEST_SOURCES]; | |
114 | u8 *dest1, *dest2, *dest3, *dest4, *dest_ref1, *dest_ref2, *dest_ref3; | |
115 | u8 *dest_ref4, *dest_ptrs[4]; | |
116 | ||
117 | int align, size; | |
118 | unsigned char *efence_buffs[TEST_SOURCES]; | |
119 | unsigned int offset; | |
120 | u8 *ubuffs[TEST_SOURCES]; | |
121 | u8 *udest_ptrs[4]; | |
122 | printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN); | |
123 | ||
124 | // Allocate the arrays | |
125 | for (i = 0; i < TEST_SOURCES; i++) { | |
126 | if (posix_memalign(&buf, 64, TEST_LEN)) { | |
127 | printf("alloc error: Fail"); | |
128 | return -1; | |
129 | } | |
130 | buffs[i] = buf; | |
131 | } | |
132 | ||
133 | if (posix_memalign(&buf, 64, TEST_LEN)) { | |
134 | printf("alloc error: Fail"); | |
135 | return -1; | |
136 | } | |
137 | dest1 = buf; | |
138 | ||
139 | if (posix_memalign(&buf, 64, TEST_LEN)) { | |
140 | printf("alloc error: Fail"); | |
141 | return -1; | |
142 | } | |
143 | dest2 = buf; | |
144 | ||
145 | if (posix_memalign(&buf, 64, TEST_LEN)) { | |
146 | printf("alloc error: Fail"); | |
147 | return -1; | |
148 | } | |
149 | dest3 = buf; | |
150 | ||
151 | if (posix_memalign(&buf, 64, TEST_LEN)) { | |
152 | printf("alloc error: Fail"); | |
153 | return -1; | |
154 | } | |
155 | dest4 = buf; | |
156 | ||
157 | if (posix_memalign(&buf, 64, TEST_LEN)) { | |
158 | printf("alloc error: Fail"); | |
159 | return -1; | |
160 | } | |
161 | dest_ref1 = buf; | |
162 | ||
163 | if (posix_memalign(&buf, 64, TEST_LEN)) { | |
164 | printf("alloc error: Fail"); | |
165 | return -1; | |
166 | } | |
167 | dest_ref2 = buf; | |
168 | ||
169 | if (posix_memalign(&buf, 64, TEST_LEN)) { | |
170 | printf("alloc error: Fail"); | |
171 | return -1; | |
172 | } | |
173 | dest_ref3 = buf; | |
174 | ||
175 | if (posix_memalign(&buf, 64, TEST_LEN)) { | |
176 | printf("alloc error: Fail"); | |
177 | return -1; | |
178 | } | |
179 | dest_ref4 = buf; | |
180 | ||
181 | dest_ptrs[0] = dest1; | |
182 | dest_ptrs[1] = dest2; | |
183 | dest_ptrs[2] = dest3; | |
184 | dest_ptrs[3] = dest4; | |
185 | ||
186 | // Test of all zeros | |
187 | for (i = 0; i < TEST_SOURCES; i++) | |
188 | memset(buffs[i], 0, TEST_LEN); | |
189 | ||
190 | memset(dest1, 0, TEST_LEN); | |
191 | memset(dest2, 0, TEST_LEN); | |
192 | memset(dest3, 0, TEST_LEN); | |
193 | memset(dest4, 0, TEST_LEN); | |
194 | memset(dest_ref1, 0, TEST_LEN); | |
195 | memset(dest_ref2, 0, TEST_LEN); | |
196 | memset(dest_ref3, 0, TEST_LEN); | |
197 | memset(dest_ref4, 0, TEST_LEN); | |
198 | memset(g1, 2, TEST_SOURCES); | |
199 | memset(g2, 1, TEST_SOURCES); | |
200 | memset(g3, 7, TEST_SOURCES); | |
201 | memset(g4, 3, TEST_SOURCES); | |
202 | ||
203 | for (i = 0; i < TEST_SOURCES; i++) { | |
204 | gf_vect_mul_init(g1[i], &g_tbls[i * 32]); | |
205 | gf_vect_mul_init(g2[i], &g_tbls[32 * TEST_SOURCES + i * 32]); | |
206 | gf_vect_mul_init(g3[i], &g_tbls[64 * TEST_SOURCES + i * 32]); | |
207 | gf_vect_mul_init(g4[i], &g_tbls[96 * TEST_SOURCES + i * 32]); | |
208 | } | |
209 | ||
210 | gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); | |
211 | gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs, | |
212 | dest_ref2); | |
213 | gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], buffs, | |
214 | dest_ref3); | |
215 | gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], buffs, | |
216 | dest_ref4); | |
217 | ||
218 | FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); | |
219 | ||
220 | if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { | |
221 | printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test1\n"); | |
222 | dump_matrix(buffs, 5, TEST_SOURCES); | |
223 | printf("dprod_base:"); | |
224 | dump(dest_ref1, 25); | |
225 | printf("dprod_dut:"); | |
226 | dump(dest1, 25); | |
227 | return -1; | |
228 | } | |
229 | if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { | |
230 | printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test2\n"); | |
231 | dump_matrix(buffs, 5, TEST_SOURCES); | |
232 | printf("dprod_base:"); | |
233 | dump(dest_ref2, 25); | |
234 | printf("dprod_dut:"); | |
235 | dump(dest2, 25); | |
236 | return -1; | |
237 | } | |
238 | if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { | |
239 | printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test3\n"); | |
240 | dump_matrix(buffs, 5, TEST_SOURCES); | |
241 | printf("dprod_base:"); | |
242 | dump(dest_ref3, 25); | |
243 | printf("dprod_dut:"); | |
244 | dump(dest3, 25); | |
245 | return -1; | |
246 | } | |
247 | if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) { | |
248 | printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test4\n"); | |
249 | dump_matrix(buffs, 5, TEST_SOURCES); | |
250 | printf("dprod_base:"); | |
251 | dump(dest_ref4, 25); | |
252 | printf("dprod_dut:"); | |
253 | dump(dest4, 25); | |
254 | return -1; | |
255 | } | |
256 | ||
257 | putchar('.'); | |
258 | ||
259 | // Rand data test | |
260 | ||
261 | for (rtest = 0; rtest < RANDOMS; rtest++) { | |
262 | for (i = 0; i < TEST_SOURCES; i++) | |
263 | for (j = 0; j < TEST_LEN; j++) | |
264 | buffs[i][j] = rand(); | |
265 | ||
266 | for (i = 0; i < TEST_SOURCES; i++) { | |
267 | g1[i] = rand(); | |
268 | g2[i] = rand(); | |
269 | g3[i] = rand(); | |
270 | g4[i] = rand(); | |
271 | } | |
272 | ||
273 | for (i = 0; i < TEST_SOURCES; i++) { | |
274 | gf_vect_mul_init(g1[i], &g_tbls[i * 32]); | |
275 | gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]); | |
276 | gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]); | |
277 | gf_vect_mul_init(g4[i], &g_tbls[(96 * TEST_SOURCES) + (i * 32)]); | |
278 | } | |
279 | ||
280 | gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); | |
281 | gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], | |
282 | buffs, dest_ref2); | |
283 | gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], | |
284 | buffs, dest_ref3); | |
285 | gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], | |
286 | buffs, dest_ref4); | |
287 | ||
288 | FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); | |
289 | ||
290 | if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { | |
291 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest); | |
292 | dump_matrix(buffs, 5, TEST_SOURCES); | |
293 | printf("dprod_base:"); | |
294 | dump(dest_ref1, 25); | |
295 | printf("dprod_dut:"); | |
296 | dump(dest1, 25); | |
297 | return -1; | |
298 | } | |
299 | if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { | |
300 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest); | |
301 | dump_matrix(buffs, 5, TEST_SOURCES); | |
302 | printf("dprod_base:"); | |
303 | dump(dest_ref2, 25); | |
304 | printf("dprod_dut:"); | |
305 | dump(dest2, 25); | |
306 | return -1; | |
307 | } | |
308 | if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { | |
309 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest); | |
310 | dump_matrix(buffs, 5, TEST_SOURCES); | |
311 | printf("dprod_base:"); | |
312 | dump(dest_ref3, 25); | |
313 | printf("dprod_dut:"); | |
314 | dump(dest3, 25); | |
315 | return -1; | |
316 | } | |
317 | if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) { | |
318 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test4 %d\n", rtest); | |
319 | dump_matrix(buffs, 5, TEST_SOURCES); | |
320 | printf("dprod_base:"); | |
321 | dump(dest_ref4, 25); | |
322 | printf("dprod_dut:"); | |
323 | dump(dest4, 25); | |
324 | return -1; | |
325 | } | |
326 | ||
327 | putchar('.'); | |
328 | } | |
329 | ||
330 | // Rand data test with varied parameters | |
331 | for (rtest = 0; rtest < RANDOMS; rtest++) { | |
332 | for (srcs = TEST_SOURCES; srcs > 0; srcs--) { | |
333 | for (i = 0; i < srcs; i++) | |
334 | for (j = 0; j < TEST_LEN; j++) | |
335 | buffs[i][j] = rand(); | |
336 | ||
337 | for (i = 0; i < srcs; i++) { | |
338 | g1[i] = rand(); | |
339 | g2[i] = rand(); | |
340 | g3[i] = rand(); | |
341 | g4[i] = rand(); | |
342 | } | |
343 | ||
344 | for (i = 0; i < srcs; i++) { | |
345 | gf_vect_mul_init(g1[i], &g_tbls[i * 32]); | |
346 | gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); | |
347 | gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]); | |
348 | gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]); | |
349 | } | |
350 | ||
351 | gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref1); | |
352 | gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[32 * srcs], buffs, | |
353 | dest_ref2); | |
354 | gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[64 * srcs], buffs, | |
355 | dest_ref3); | |
356 | gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[96 * srcs], buffs, | |
357 | dest_ref4); | |
358 | ||
359 | FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest_ptrs); | |
360 | ||
361 | if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { | |
362 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) | |
363 | " test1 srcs=%d\n", srcs); | |
364 | dump_matrix(buffs, 5, TEST_SOURCES); | |
365 | printf("dprod_base:"); | |
366 | dump(dest_ref1, 25); | |
367 | printf("dprod_dut:"); | |
368 | dump(dest1, 25); | |
369 | return -1; | |
370 | } | |
371 | if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { | |
372 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) | |
373 | " test2 srcs=%d\n", srcs); | |
374 | dump_matrix(buffs, 5, TEST_SOURCES); | |
375 | printf("dprod_base:"); | |
376 | dump(dest_ref2, 25); | |
377 | printf("dprod_dut:"); | |
378 | dump(dest2, 25); | |
379 | return -1; | |
380 | } | |
381 | if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { | |
382 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) | |
383 | " test3 srcs=%d\n", srcs); | |
384 | dump_matrix(buffs, 5, TEST_SOURCES); | |
385 | printf("dprod_base:"); | |
386 | dump(dest_ref3, 25); | |
387 | printf("dprod_dut:"); | |
388 | dump(dest3, 25); | |
389 | return -1; | |
390 | } | |
391 | if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) { | |
392 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) | |
393 | " test4 srcs=%d\n", srcs); | |
394 | dump_matrix(buffs, 5, TEST_SOURCES); | |
395 | printf("dprod_base:"); | |
396 | dump(dest_ref4, 25); | |
397 | printf("dprod_dut:"); | |
398 | dump(dest4, 25); | |
399 | return -1; | |
400 | } | |
401 | ||
402 | putchar('.'); | |
403 | } | |
404 | } | |
405 | ||
406 | // Run tests at end of buffer for Electric Fence | |
407 | align = (LEN_ALIGN_CHK_B != 0) ? 1 : 32; | |
408 | for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) { | |
409 | for (i = 0; i < TEST_SOURCES; i++) | |
410 | for (j = 0; j < TEST_LEN; j++) | |
411 | buffs[i][j] = rand(); | |
412 | ||
413 | for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end | |
414 | efence_buffs[i] = buffs[i] + TEST_LEN - size; | |
415 | ||
416 | for (i = 0; i < TEST_SOURCES; i++) { | |
417 | g1[i] = rand(); | |
418 | g2[i] = rand(); | |
419 | g3[i] = rand(); | |
420 | g4[i] = rand(); | |
421 | } | |
422 | ||
423 | for (i = 0; i < TEST_SOURCES; i++) { | |
424 | gf_vect_mul_init(g1[i], &g_tbls[i * 32]); | |
425 | gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]); | |
426 | gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]); | |
427 | gf_vect_mul_init(g4[i], &g_tbls[(96 * TEST_SOURCES) + (i * 32)]); | |
428 | } | |
429 | ||
430 | gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref1); | |
431 | gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], | |
432 | efence_buffs, dest_ref2); | |
433 | gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], | |
434 | efence_buffs, dest_ref3); | |
435 | gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], | |
436 | efence_buffs, dest_ref4); | |
437 | ||
438 | FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest_ptrs); | |
439 | ||
440 | if (0 != memcmp(dest_ref1, dest1, size)) { | |
441 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest); | |
442 | dump_matrix(efence_buffs, 5, TEST_SOURCES); | |
443 | printf("dprod_base:"); | |
444 | dump(dest_ref1, align); | |
445 | printf("dprod_dut:"); | |
446 | dump(dest1, align); | |
447 | return -1; | |
448 | } | |
449 | ||
450 | if (0 != memcmp(dest_ref2, dest2, size)) { | |
451 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest); | |
452 | dump_matrix(efence_buffs, 5, TEST_SOURCES); | |
453 | printf("dprod_base:"); | |
454 | dump(dest_ref2, align); | |
455 | printf("dprod_dut:"); | |
456 | dump(dest2, align); | |
457 | return -1; | |
458 | } | |
459 | ||
460 | if (0 != memcmp(dest_ref3, dest3, size)) { | |
461 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest); | |
462 | dump_matrix(efence_buffs, 5, TEST_SOURCES); | |
463 | printf("dprod_base:"); | |
464 | dump(dest_ref3, align); | |
465 | printf("dprod_dut:"); | |
466 | dump(dest3, align); | |
467 | return -1; | |
468 | } | |
469 | ||
470 | if (0 != memcmp(dest_ref4, dest4, size)) { | |
471 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test4 %d\n", rtest); | |
472 | dump_matrix(efence_buffs, 5, TEST_SOURCES); | |
473 | printf("dprod_base:"); | |
474 | dump(dest_ref4, align); | |
475 | printf("dprod_dut:"); | |
476 | dump(dest4, align); | |
477 | return -1; | |
478 | } | |
479 | ||
480 | putchar('.'); | |
481 | } | |
482 | ||
483 | // Test rand ptr alignment if available | |
484 | ||
485 | for (rtest = 0; rtest < RANDOMS; rtest++) { | |
486 | size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1); | |
487 | srcs = rand() % TEST_SOURCES; | |
488 | if (srcs == 0) | |
489 | continue; | |
490 | ||
491 | offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B; | |
492 | // Add random offsets | |
493 | for (i = 0; i < srcs; i++) | |
494 | ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset)); | |
495 | ||
496 | udest_ptrs[0] = dest1 + (rand() & (PTR_ALIGN_CHK_B - offset)); | |
497 | udest_ptrs[1] = dest2 + (rand() & (PTR_ALIGN_CHK_B - offset)); | |
498 | udest_ptrs[2] = dest3 + (rand() & (PTR_ALIGN_CHK_B - offset)); | |
499 | udest_ptrs[3] = dest4 + (rand() & (PTR_ALIGN_CHK_B - offset)); | |
500 | ||
501 | memset(dest1, 0, TEST_LEN); // zero pad to check write-over | |
502 | memset(dest2, 0, TEST_LEN); | |
503 | memset(dest3, 0, TEST_LEN); | |
504 | memset(dest4, 0, TEST_LEN); | |
505 | ||
506 | for (i = 0; i < srcs; i++) | |
507 | for (j = 0; j < size; j++) | |
508 | ubuffs[i][j] = rand(); | |
509 | ||
510 | for (i = 0; i < srcs; i++) { | |
511 | g1[i] = rand(); | |
512 | g2[i] = rand(); | |
513 | g3[i] = rand(); | |
514 | g4[i] = rand(); | |
515 | } | |
516 | ||
517 | for (i = 0; i < srcs; i++) { | |
518 | gf_vect_mul_init(g1[i], &g_tbls[i * 32]); | |
519 | gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); | |
520 | gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]); | |
521 | gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]); | |
522 | } | |
523 | ||
524 | gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref1); | |
525 | gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], ubuffs, dest_ref2); | |
526 | gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], ubuffs, dest_ref3); | |
527 | gf_vect_dot_prod_base(size, srcs, &g_tbls[96 * srcs], ubuffs, dest_ref4); | |
528 | ||
529 | FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptrs); | |
530 | ||
531 | if (memcmp(dest_ref1, udest_ptrs[0], size)) { | |
532 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", | |
533 | srcs); | |
534 | dump_matrix(ubuffs, 5, TEST_SOURCES); | |
535 | printf("dprod_base:"); | |
536 | dump(dest_ref1, 25); | |
537 | printf("dprod_dut:"); | |
538 | dump(udest_ptrs[0], 25); | |
539 | return -1; | |
540 | } | |
541 | if (memcmp(dest_ref2, udest_ptrs[1], size)) { | |
542 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", | |
543 | srcs); | |
544 | dump_matrix(ubuffs, 5, TEST_SOURCES); | |
545 | printf("dprod_base:"); | |
546 | dump(dest_ref2, 25); | |
547 | printf("dprod_dut:"); | |
548 | dump(udest_ptrs[1], 25); | |
549 | return -1; | |
550 | } | |
551 | if (memcmp(dest_ref3, udest_ptrs[2], size)) { | |
552 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", | |
553 | srcs); | |
554 | dump_matrix(ubuffs, 5, TEST_SOURCES); | |
555 | printf("dprod_base:"); | |
556 | dump(dest_ref3, 25); | |
557 | printf("dprod_dut:"); | |
558 | dump(udest_ptrs[2], 25); | |
559 | return -1; | |
560 | } | |
561 | if (memcmp(dest_ref4, udest_ptrs[3], size)) { | |
562 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", | |
563 | srcs); | |
564 | dump_matrix(ubuffs, 5, TEST_SOURCES); | |
565 | printf("dprod_base:"); | |
566 | dump(dest_ref4, 25); | |
567 | printf("dprod_dut:"); | |
568 | dump(udest_ptrs[3], 25); | |
569 | return -1; | |
570 | } | |
571 | // Confirm that padding around dests is unchanged | |
572 | memset(dest_ref1, 0, PTR_ALIGN_CHK_B); // Make reference zero buff | |
573 | offset = udest_ptrs[0] - dest1; | |
574 | ||
575 | if (memcmp(dest1, dest_ref1, offset)) { | |
576 | printf("Fail rand ualign pad1 start\n"); | |
577 | return -1; | |
578 | } | |
579 | if (memcmp(dest1 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { | |
580 | printf("Fail rand ualign pad1 end\n"); | |
581 | printf("size=%d offset=%d srcs=%d\n", size, offset, srcs); | |
582 | return -1; | |
583 | } | |
584 | ||
585 | offset = udest_ptrs[1] - dest2; | |
586 | if (memcmp(dest2, dest_ref1, offset)) { | |
587 | printf("Fail rand ualign pad2 start\n"); | |
588 | return -1; | |
589 | } | |
590 | if (memcmp(dest2 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { | |
591 | printf("Fail rand ualign pad2 end\n"); | |
592 | return -1; | |
593 | } | |
594 | ||
595 | offset = udest_ptrs[2] - dest3; | |
596 | if (memcmp(dest3, dest_ref1, offset)) { | |
597 | printf("Fail rand ualign pad3 start\n"); | |
598 | return -1; | |
599 | } | |
600 | if (memcmp(dest3 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { | |
601 | printf("Fail rand ualign pad3 end\n"); | |
602 | return -1; | |
603 | } | |
604 | ||
605 | offset = udest_ptrs[3] - dest4; | |
606 | if (memcmp(dest4, dest_ref1, offset)) { | |
607 | printf("Fail rand ualign pad4 start\n"); | |
608 | return -1; | |
609 | } | |
610 | if (memcmp(dest4 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { | |
611 | printf("Fail rand ualign pad4 end\n"); | |
612 | return -1; | |
613 | } | |
614 | ||
615 | putchar('.'); | |
616 | } | |
617 | ||
618 | // Test all size alignment | |
619 | align = (LEN_ALIGN_CHK_B != 0) ? 1 : 32; | |
620 | ||
621 | for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) { | |
622 | srcs = TEST_SOURCES; | |
623 | ||
624 | for (i = 0; i < srcs; i++) | |
625 | for (j = 0; j < size; j++) | |
626 | buffs[i][j] = rand(); | |
627 | ||
628 | for (i = 0; i < srcs; i++) { | |
629 | g1[i] = rand(); | |
630 | g2[i] = rand(); | |
631 | g3[i] = rand(); | |
632 | g4[i] = rand(); | |
633 | } | |
634 | ||
635 | for (i = 0; i < srcs; i++) { | |
636 | gf_vect_mul_init(g1[i], &g_tbls[i * 32]); | |
637 | gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); | |
638 | gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]); | |
639 | gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]); | |
640 | } | |
641 | ||
642 | gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref1); | |
643 | gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], buffs, dest_ref2); | |
644 | gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], buffs, dest_ref3); | |
645 | gf_vect_dot_prod_base(size, srcs, &g_tbls[96 * srcs], buffs, dest_ref4); | |
646 | ||
647 | FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest_ptrs); | |
648 | ||
649 | if (memcmp(dest_ref1, dest_ptrs[0], size)) { | |
650 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", | |
651 | size); | |
652 | dump_matrix(buffs, 5, TEST_SOURCES); | |
653 | printf("dprod_base:"); | |
654 | dump(dest_ref1, 25); | |
655 | printf("dprod_dut:"); | |
656 | dump(dest_ptrs[0], 25); | |
657 | return -1; | |
658 | } | |
659 | if (memcmp(dest_ref2, dest_ptrs[1], size)) { | |
660 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", | |
661 | size); | |
662 | dump_matrix(buffs, 5, TEST_SOURCES); | |
663 | printf("dprod_base:"); | |
664 | dump(dest_ref2, 25); | |
665 | printf("dprod_dut:"); | |
666 | dump(dest_ptrs[1], 25); | |
667 | return -1; | |
668 | } | |
669 | if (memcmp(dest_ref3, dest_ptrs[2], size)) { | |
670 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", | |
671 | size); | |
672 | dump_matrix(buffs, 5, TEST_SOURCES); | |
673 | printf("dprod_base:"); | |
674 | dump(dest_ref3, 25); | |
675 | printf("dprod_dut:"); | |
676 | dump(dest_ptrs[2], 25); | |
677 | return -1; | |
678 | } | |
679 | if (memcmp(dest_ref4, dest_ptrs[3], size)) { | |
680 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", | |
681 | size); | |
682 | dump_matrix(buffs, 5, TEST_SOURCES); | |
683 | printf("dprod_base:"); | |
684 | dump(dest_ref4, 25); | |
685 | printf("dprod_dut:"); | |
686 | dump(dest_ptrs[3], 25); | |
687 | return -1; | |
688 | } | |
689 | } | |
690 | ||
691 | printf("Pass\n"); | |
692 | return 0; | |
693 | ||
694 | } |
102 | 102 | %define return rax |
103 | 103 | %define return.w eax |
104 | 104 | |
105 | %define func(x) x: | |
105 | %define func(x) x: endbranch | |
106 | 106 | %macro FUNC_SAVE 0 |
107 | 107 | push r12 |
108 | 108 | %endmacro |
168 | 168 | %define xd4 xtmpl1 |
169 | 169 | |
170 | 170 | align 16 |
171 | global gf_4vect_mad_avx:ISAL_SYM_TYPE_FUNCTION | |
171 | mk_global gf_4vect_mad_avx, function | |
172 | 172 | func(gf_4vect_mad_avx) |
173 | 173 | FUNC_SAVE |
174 | 174 | sub len, 16 |
100 | 100 | %define return rax |
101 | 101 | %define return.w eax |
102 | 102 | |
103 | %define func(x) x: | |
103 | %define func(x) x: endbranch | |
104 | 104 | %define FUNC_SAVE |
105 | 105 | %define FUNC_RESTORE |
106 | 106 | %endif |
164 | 164 | %define xd4 ymm10 |
165 | 165 | |
166 | 166 | align 16 |
167 | global gf_4vect_mad_avx2:ISAL_SYM_TYPE_FUNCTION | |
167 | mk_global gf_4vect_mad_avx2, function | |
168 | 168 | func(gf_4vect_mad_avx2) |
169 | 169 | FUNC_SAVE |
170 | 170 | sub len, 32 |
43 | 43 | %define arg5 r9 |
44 | 44 | %define tmp r11 |
45 | 45 | %define return rax |
46 | %define func(x) x: | |
46 | %define func(x) x: endbranch | |
47 | 47 | %define FUNC_SAVE |
48 | 48 | %define FUNC_RESTORE |
49 | 49 | %endif |
158 | 158 | %define xtmpl5 zmm23 |
159 | 159 | |
160 | 160 | align 16 |
161 | global gf_4vect_mad_avx512:ISAL_SYM_TYPE_FUNCTION | |
161 | mk_global gf_4vect_mad_avx512, function | |
162 | 162 | func(gf_4vect_mad_avx512) |
163 | 163 | FUNC_SAVE |
164 | 164 | sub len, 64 |
102 | 102 | %define return rax |
103 | 103 | %define return.w eax |
104 | 104 | |
105 | %define func(x) x: | |
105 | %define func(x) x: endbranch | |
106 | 106 | %macro FUNC_SAVE 0 |
107 | 107 | push r12 |
108 | 108 | %endmacro |
167 | 167 | %define xd4 xtmpl1 |
168 | 168 | |
169 | 169 | align 16 |
170 | global gf_4vect_mad_sse:ISAL_SYM_TYPE_FUNCTION | |
170 | mk_global gf_4vect_mad_sse, function | |
171 | 171 | func(gf_4vect_mad_sse) |
172 | 172 | FUNC_SAVE |
173 | 173 | sub len, 16 |
50 | 50 | %define PS 8 |
51 | 51 | %define LOG_PS 3 |
52 | 52 | |
53 | %define func(x) x: | |
53 | %define func(x) x: endbranch | |
54 | 54 | %macro FUNC_SAVE 0 |
55 | 55 | push r12 |
56 | 56 | push r13 |
88 | 88 | %define func(x) proc_frame x |
89 | 89 | %macro FUNC_SAVE 0 |
90 | 90 | alloc_stack stack_size |
91 | save_xmm128 xmm6, 0*16 | |
92 | save_xmm128 xmm7, 1*16 | |
93 | save_xmm128 xmm8, 2*16 | |
94 | save_xmm128 xmm9, 3*16 | |
95 | save_xmm128 xmm10, 4*16 | |
96 | save_xmm128 xmm11, 5*16 | |
97 | save_xmm128 xmm12, 6*16 | |
98 | save_xmm128 xmm13, 7*16 | |
99 | save_xmm128 xmm14, 8*16 | |
100 | save_xmm128 xmm15, 9*16 | |
91 | vmovdqa [rsp + 0*16], xmm6 | |
92 | vmovdqa [rsp + 1*16], xmm7 | |
93 | vmovdqa [rsp + 2*16], xmm8 | |
94 | vmovdqa [rsp + 3*16], xmm9 | |
95 | vmovdqa [rsp + 4*16], xmm10 | |
96 | vmovdqa [rsp + 5*16], xmm11 | |
97 | vmovdqa [rsp + 6*16], xmm12 | |
98 | vmovdqa [rsp + 7*16], xmm13 | |
99 | vmovdqa [rsp + 8*16], xmm14 | |
100 | vmovdqa [rsp + 9*16], xmm15 | |
101 | 101 | save_reg r12, 10*16 + 0*8 |
102 | 102 | save_reg r13, 10*16 + 1*8 |
103 | 103 | save_reg r14, 10*16 + 2*8 |
183 | 183 | %define xp5 xmm6 |
184 | 184 | |
185 | 185 | align 16 |
186 | global gf_5vect_dot_prod_avx:ISAL_SYM_TYPE_FUNCTION | |
186 | mk_global gf_5vect_dot_prod_avx, function | |
187 | 187 | func(gf_5vect_dot_prod_avx) |
188 | 188 | FUNC_SAVE |
189 | 189 | sub len, 16 |
52 | 52 | %define PS 8 |
53 | 53 | %define LOG_PS 3 |
54 | 54 | |
55 | %define func(x) x: | |
55 | %define func(x) x: endbranch | |
56 | 56 | %macro FUNC_SAVE 0 |
57 | 57 | push r12 |
58 | 58 | push r13 |
188 | 188 | %define xp5 ymm6 |
189 | 189 | |
190 | 190 | align 16 |
191 | global gf_5vect_dot_prod_avx2:ISAL_SYM_TYPE_FUNCTION | |
191 | mk_global gf_5vect_dot_prod_avx2, function | |
192 | 192 | func(gf_5vect_dot_prod_avx2) |
193 | 193 | FUNC_SAVE |
194 | 194 | sub len, 32 |
56 | 56 | %define PS 8 |
57 | 57 | %define LOG_PS 3 |
58 | 58 | |
59 | %define func(x) x: | |
59 | %define func(x) x: endbranch | |
60 | 60 | %macro FUNC_SAVE 0 |
61 | 61 | push r12 |
62 | 62 | push r13 |
112 | 112 | vmovdqa [rsp + 7*16], xmm13 |
113 | 113 | vmovdqa [rsp + 8*16], xmm14 |
114 | 114 | vmovdqa [rsp + 9*16], xmm15 |
115 | save_reg r12, 9*16 + 0*8 | |
116 | save_reg r13, 9*16 + 1*8 | |
117 | save_reg r14, 9*16 + 2*8 | |
118 | save_reg r15, 9*16 + 3*8 | |
119 | save_reg rdi, 9*16 + 4*8 | |
120 | save_reg rsi, 9*16 + 5*8 | |
121 | save_reg rbp, 9*16 + 6*8 | |
122 | save_reg rbx, 9*16 + 7*8 | |
115 | save_reg r12, 10*16 + 0*8 | |
116 | save_reg r13, 10*16 + 1*8 | |
117 | save_reg r14, 10*16 + 2*8 | |
118 | save_reg r15, 10*16 + 3*8 | |
119 | save_reg rdi, 10*16 + 4*8 | |
120 | save_reg rsi, 10*16 + 5*8 | |
121 | save_reg rbp, 10*16 + 6*8 | |
122 | save_reg rbx, 10*16 + 7*8 | |
123 | 123 | end_prolog |
124 | 124 | mov arg4, arg(4) |
125 | 125 | %endmacro |
135 | 135 | vmovdqa xmm13, [rsp + 7*16] |
136 | 136 | vmovdqa xmm14, [rsp + 8*16] |
137 | 137 | vmovdqa xmm15, [rsp + 9*16] |
138 | mov r12, [rsp + 9*16 + 0*8] | |
139 | mov r13, [rsp + 9*16 + 1*8] | |
140 | mov r14, [rsp + 9*16 + 2*8] | |
141 | mov r15, [rsp + 9*16 + 3*8] | |
142 | mov rdi, [rsp + 9*16 + 4*8] | |
143 | mov rsi, [rsp + 9*16 + 5*8] | |
144 | mov rbp, [rsp + 9*16 + 6*8] | |
145 | mov rbx, [rsp + 9*16 + 7*8] | |
138 | mov r12, [rsp + 10*16 + 0*8] | |
139 | mov r13, [rsp + 10*16 + 1*8] | |
140 | mov r14, [rsp + 10*16 + 2*8] | |
141 | mov r15, [rsp + 10*16 + 3*8] | |
142 | mov rdi, [rsp + 10*16 + 4*8] | |
143 | mov rsi, [rsp + 10*16 + 5*8] | |
144 | mov rbp, [rsp + 10*16 + 6*8] | |
145 | mov rbx, [rsp + 10*16 + 7*8] | |
146 | 146 | add rsp, stack_size |
147 | 147 | %endmacro |
148 | 148 | %endif |
210 | 210 | section .text |
211 | 211 | |
212 | 212 | align 16 |
213 | global gf_5vect_dot_prod_avx512:ISAL_SYM_TYPE_FUNCTION | |
213 | mk_global gf_5vect_dot_prod_avx512, function | |
214 | 214 | func(gf_5vect_dot_prod_avx512) |
215 | 215 | FUNC_SAVE |
216 | 216 | sub len, 64 |
50 | 50 | %define PS 8 |
51 | 51 | %define LOG_PS 3 |
52 | 52 | |
53 | %define func(x) x: | |
53 | %define func(x) x: endbranch | |
54 | 54 | %macro FUNC_SAVE 0 |
55 | 55 | push r12 |
56 | 56 | push r13 |
183 | 183 | %define xp5 xmm14 |
184 | 184 | |
185 | 185 | align 16 |
186 | global gf_5vect_dot_prod_sse:ISAL_SYM_TYPE_FUNCTION | |
186 | mk_global gf_5vect_dot_prod_sse, function | |
187 | 187 | func(gf_5vect_dot_prod_sse) |
188 | 188 | FUNC_SAVE |
189 | 189 | sub len, 16 |
0 | /********************************************************************** | |
1 | Copyright(c) 2011-2015 Intel Corporation All rights reserved. | |
2 | ||
3 | Redistribution and use in source and binary forms, with or without | |
4 | modification, are permitted provided that the following conditions | |
5 | are met: | |
6 | * Redistributions of source code must retain the above copyright | |
7 | notice, this list of conditions and the following disclaimer. | |
8 | * Redistributions in binary form must reproduce the above copyright | |
9 | notice, this list of conditions and the following disclaimer in | |
10 | the documentation and/or other materials provided with the | |
11 | distribution. | |
12 | * Neither the name of Intel Corporation nor the names of its | |
13 | contributors may be used to endorse or promote products derived | |
14 | from this software without specific prior written permission. | |
15 | ||
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
17 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
18 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
19 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
20 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
21 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
22 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
23 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
24 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
27 | **********************************************************************/ | |
28 | ||
29 | #include <stdio.h> | |
30 | #include <stdlib.h> | |
31 | #include <string.h> // for memset, memcmp | |
32 | #include "erasure_code.h" | |
33 | #include "types.h" | |
34 | ||
35 | #ifndef FUNCTION_UNDER_TEST | |
36 | # define FUNCTION_UNDER_TEST gf_5vect_dot_prod_sse | |
37 | #endif | |
38 | #ifndef TEST_MIN_SIZE | |
39 | # define TEST_MIN_SIZE 16 | |
40 | #endif | |
41 | ||
42 | #define str(s) #s | |
43 | #define xstr(s) str(s) | |
44 | ||
45 | #define TEST_LEN 8192 | |
46 | #define TEST_SIZE (TEST_LEN/2) | |
47 | #define TEST_MEM TEST_SIZE | |
48 | #define TEST_LOOPS 20000 | |
49 | #define TEST_TYPE_STR "" | |
50 | ||
51 | #ifndef TEST_SOURCES | |
52 | # define TEST_SOURCES 16 | |
53 | #endif | |
54 | #ifndef RANDOMS | |
55 | # define RANDOMS 20 | |
56 | #endif | |
57 | ||
58 | #ifdef EC_ALIGNED_ADDR | |
59 | // Define power of 2 range to check ptr, len alignment | |
60 | # define PTR_ALIGN_CHK_B 0 | |
61 | # define LEN_ALIGN_CHK_B 0 // 0 for aligned only | |
62 | #else | |
63 | // Define power of 2 range to check ptr, len alignment | |
64 | # define PTR_ALIGN_CHK_B 32 | |
65 | # define LEN_ALIGN_CHK_B 32 // 0 for aligned only | |
66 | #endif | |
67 | ||
68 | typedef unsigned char u8; | |
69 | ||
70 | void dump(unsigned char *buf, int len) | |
71 | { | |
72 | int i; | |
73 | for (i = 0; i < len;) { | |
74 | printf(" %2x", 0xff & buf[i++]); | |
75 | if (i % 32 == 0) | |
76 | printf("\n"); | |
77 | } | |
78 | printf("\n"); | |
79 | } | |
80 | ||
81 | void dump_matrix(unsigned char **s, int k, int m) | |
82 | { | |
83 | int i, j; | |
84 | for (i = 0; i < k; i++) { | |
85 | for (j = 0; j < m; j++) { | |
86 | printf(" %2x", s[i][j]); | |
87 | } | |
88 | printf("\n"); | |
89 | } | |
90 | printf("\n"); | |
91 | } | |
92 | ||
93 | void dump_u8xu8(unsigned char *s, int k, int m) | |
94 | { | |
95 | int i, j; | |
96 | for (i = 0; i < k; i++) { | |
97 | for (j = 0; j < m; j++) { | |
98 | printf(" %2x", 0xff & s[j + (i * m)]); | |
99 | } | |
100 | printf("\n"); | |
101 | } | |
102 | printf("\n"); | |
103 | } | |
104 | ||
105 | int main(int argc, char *argv[]) | |
106 | { | |
107 | int i, j, rtest, srcs; | |
108 | void *buf; | |
109 | u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g3[TEST_SOURCES]; | |
110 | u8 g4[TEST_SOURCES], g5[TEST_SOURCES], *g_tbls; | |
111 | u8 *dest1, *dest2, *dest3, *dest4, *dest5, *buffs[TEST_SOURCES]; | |
112 | u8 *dest_ref1, *dest_ref2, *dest_ref3, *dest_ref4, *dest_ref5; | |
113 | u8 *dest_ptrs[5]; | |
114 | ||
115 | int align, size; | |
116 | unsigned char *efence_buffs[TEST_SOURCES]; | |
117 | unsigned int offset; | |
118 | u8 *ubuffs[TEST_SOURCES]; | |
119 | u8 *udest_ptrs[5]; | |
120 | printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN); | |
121 | ||
122 | // Allocate the arrays | |
123 | for (i = 0; i < TEST_SOURCES; i++) { | |
124 | if (posix_memalign(&buf, 64, TEST_LEN)) { | |
125 | printf("alloc error: Fail"); | |
126 | return -1; | |
127 | } | |
128 | buffs[i] = buf; | |
129 | } | |
130 | ||
131 | if (posix_memalign(&buf, 16, 2 * (6 * TEST_SOURCES * 32))) { | |
132 | printf("alloc error: Fail"); | |
133 | return -1; | |
134 | } | |
135 | g_tbls = buf; | |
136 | ||
137 | if (posix_memalign(&buf, 64, TEST_LEN)) { | |
138 | printf("alloc error: Fail"); | |
139 | return -1; | |
140 | } | |
141 | dest1 = buf; | |
142 | ||
143 | if (posix_memalign(&buf, 64, TEST_LEN)) { | |
144 | printf("alloc error: Fail"); | |
145 | return -1; | |
146 | } | |
147 | dest2 = buf; | |
148 | ||
149 | if (posix_memalign(&buf, 64, TEST_LEN)) { | |
150 | printf("alloc error: Fail"); | |
151 | return -1; | |
152 | } | |
153 | dest3 = buf; | |
154 | ||
155 | if (posix_memalign(&buf, 64, TEST_LEN)) { | |
156 | printf("alloc error: Fail"); | |
157 | return -1; | |
158 | } | |
159 | dest4 = buf; | |
160 | ||
161 | if (posix_memalign(&buf, 64, TEST_LEN)) { | |
162 | printf("alloc error: Fail"); | |
163 | return -1; | |
164 | } | |
165 | dest5 = buf; | |
166 | ||
167 | if (posix_memalign(&buf, 64, TEST_LEN)) { | |
168 | printf("alloc error: Fail"); | |
169 | return -1; | |
170 | } | |
171 | dest_ref1 = buf; | |
172 | ||
173 | if (posix_memalign(&buf, 64, TEST_LEN)) { | |
174 | printf("alloc error: Fail"); | |
175 | return -1; | |
176 | } | |
177 | dest_ref2 = buf; | |
178 | ||
179 | if (posix_memalign(&buf, 64, TEST_LEN)) { | |
180 | printf("alloc error: Fail"); | |
181 | return -1; | |
182 | } | |
183 | dest_ref3 = buf; | |
184 | ||
185 | if (posix_memalign(&buf, 64, TEST_LEN)) { | |
186 | printf("alloc error: Fail"); | |
187 | return -1; | |
188 | } | |
189 | dest_ref4 = buf; | |
190 | ||
191 | if (posix_memalign(&buf, 64, TEST_LEN)) { | |
192 | printf("alloc error: Fail"); | |
193 | return -1; | |
194 | } | |
195 | dest_ref5 = buf; | |
196 | ||
197 | dest_ptrs[0] = dest1; | |
198 | dest_ptrs[1] = dest2; | |
199 | dest_ptrs[2] = dest3; | |
200 | dest_ptrs[3] = dest4; | |
201 | dest_ptrs[4] = dest5; | |
202 | ||
203 | // Test of all zeros | |
204 | for (i = 0; i < TEST_SOURCES; i++) | |
205 | memset(buffs[i], 0, TEST_LEN); | |
206 | ||
207 | memset(dest1, 0, TEST_LEN); | |
208 | memset(dest2, 0, TEST_LEN); | |
209 | memset(dest3, 0, TEST_LEN); | |
210 | memset(dest4, 0, TEST_LEN); | |
211 | memset(dest5, 0, TEST_LEN); | |
212 | memset(dest_ref1, 0, TEST_LEN); | |
213 | memset(dest_ref2, 0, TEST_LEN); | |
214 | memset(dest_ref3, 0, TEST_LEN); | |
215 | memset(dest_ref4, 0, TEST_LEN); | |
216 | memset(dest_ref5, 0, TEST_LEN); | |
217 | memset(g1, 2, TEST_SOURCES); | |
218 | memset(g2, 1, TEST_SOURCES); | |
219 | memset(g3, 7, TEST_SOURCES); | |
220 | memset(g4, 9, TEST_SOURCES); | |
221 | memset(g5, 4, TEST_SOURCES); | |
222 | ||
223 | for (i = 0; i < TEST_SOURCES; i++) { | |
224 | gf_vect_mul_init(g1[i], &g_tbls[i * 32]); | |
225 | gf_vect_mul_init(g2[i], &g_tbls[32 * TEST_SOURCES + i * 32]); | |
226 | gf_vect_mul_init(g3[i], &g_tbls[64 * TEST_SOURCES + i * 32]); | |
227 | gf_vect_mul_init(g4[i], &g_tbls[96 * TEST_SOURCES + i * 32]); | |
228 | gf_vect_mul_init(g5[i], &g_tbls[128 * TEST_SOURCES + i * 32]); | |
229 | } | |
230 | ||
231 | gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); | |
232 | gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs, | |
233 | dest_ref2); | |
234 | gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], buffs, | |
235 | dest_ref3); | |
236 | gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], buffs, | |
237 | dest_ref4); | |
238 | gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES], buffs, | |
239 | dest_ref5); | |
240 | ||
241 | FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); | |
242 | ||
243 | if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { | |
244 | printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test1\n"); | |
245 | dump_matrix(buffs, 5, TEST_SOURCES); | |
246 | printf("dprod_base:"); | |
247 | dump(dest_ref1, 25); | |
248 | printf("dprod_dut:"); | |
249 | dump(dest1, 25); | |
250 | return -1; | |
251 | } | |
252 | if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { | |
253 | printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test2\n"); | |
254 | dump_matrix(buffs, 5, TEST_SOURCES); | |
255 | printf("dprod_base:"); | |
256 | dump(dest_ref2, 25); | |
257 | printf("dprod_dut:"); | |
258 | dump(dest2, 25); | |
259 | return -1; | |
260 | } | |
261 | if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { | |
262 | printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test3\n"); | |
263 | dump_matrix(buffs, 5, TEST_SOURCES); | |
264 | printf("dprod_base:"); | |
265 | dump(dest_ref3, 25); | |
266 | printf("dprod_dut:"); | |
267 | dump(dest3, 25); | |
268 | return -1; | |
269 | } | |
270 | if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) { | |
271 | printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test4\n"); | |
272 | dump_matrix(buffs, 5, TEST_SOURCES); | |
273 | printf("dprod_base:"); | |
274 | dump(dest_ref4, 25); | |
275 | printf("dprod_dut:"); | |
276 | dump(dest4, 25); | |
277 | return -1; | |
278 | } | |
279 | if (0 != memcmp(dest_ref5, dest5, TEST_LEN)) { | |
280 | printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test5\n"); | |
281 | dump_matrix(buffs, 5, TEST_SOURCES); | |
282 | printf("dprod_base:"); | |
283 | dump(dest_ref5, 25); | |
284 | printf("dprod_dut:"); | |
285 | dump(dest5, 25); | |
286 | return -1; | |
287 | } | |
288 | putchar('.'); | |
289 | ||
290 | // Rand data test | |
291 | ||
292 | for (rtest = 0; rtest < RANDOMS; rtest++) { | |
293 | for (i = 0; i < TEST_SOURCES; i++) | |
294 | for (j = 0; j < TEST_LEN; j++) | |
295 | buffs[i][j] = rand(); | |
296 | ||
297 | for (i = 0; i < TEST_SOURCES; i++) { | |
298 | g1[i] = rand(); | |
299 | g2[i] = rand(); | |
300 | g3[i] = rand(); | |
301 | g4[i] = rand(); | |
302 | g5[i] = rand(); | |
303 | } | |
304 | ||
305 | for (i = 0; i < TEST_SOURCES; i++) { | |
306 | gf_vect_mul_init(g1[i], &g_tbls[i * 32]); | |
307 | gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]); | |
308 | gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]); | |
309 | gf_vect_mul_init(g4[i], &g_tbls[(96 * TEST_SOURCES) + (i * 32)]); | |
310 | gf_vect_mul_init(g5[i], &g_tbls[(128 * TEST_SOURCES) + (i * 32)]); | |
311 | } | |
312 | ||
313 | gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); | |
314 | gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], | |
315 | buffs, dest_ref2); | |
316 | gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], | |
317 | buffs, dest_ref3); | |
318 | gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], | |
319 | buffs, dest_ref4); | |
320 | gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES], | |
321 | buffs, dest_ref5); | |
322 | ||
323 | FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); | |
324 | ||
325 | if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { | |
326 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest); | |
327 | dump_matrix(buffs, 5, TEST_SOURCES); | |
328 | printf("dprod_base:"); | |
329 | dump(dest_ref1, 25); | |
330 | printf("dprod_dut:"); | |
331 | dump(dest1, 25); | |
332 | return -1; | |
333 | } | |
334 | if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { | |
335 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest); | |
336 | dump_matrix(buffs, 5, TEST_SOURCES); | |
337 | printf("dprod_base:"); | |
338 | dump(dest_ref2, 25); | |
339 | printf("dprod_dut:"); | |
340 | dump(dest2, 25); | |
341 | return -1; | |
342 | } | |
343 | if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { | |
344 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest); | |
345 | dump_matrix(buffs, 5, TEST_SOURCES); | |
346 | printf("dprod_base:"); | |
347 | dump(dest_ref3, 25); | |
348 | printf("dprod_dut:"); | |
349 | dump(dest3, 25); | |
350 | return -1; | |
351 | } | |
352 | if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) { | |
353 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test4 %d\n", rtest); | |
354 | dump_matrix(buffs, 5, TEST_SOURCES); | |
355 | printf("dprod_base:"); | |
356 | dump(dest_ref4, 25); | |
357 | printf("dprod_dut:"); | |
358 | dump(dest4, 25); | |
359 | return -1; | |
360 | } | |
361 | if (0 != memcmp(dest_ref5, dest5, TEST_LEN)) { | |
362 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test5 %d\n", rtest); | |
363 | dump_matrix(buffs, 5, TEST_SOURCES); | |
364 | printf("dprod_base:"); | |
365 | dump(dest_ref5, 25); | |
366 | printf("dprod_dut:"); | |
367 | dump(dest5, 25); | |
368 | return -1; | |
369 | } | |
370 | ||
371 | putchar('.'); | |
372 | } | |
373 | ||
374 | // Rand data test with varied parameters | |
375 | for (rtest = 0; rtest < RANDOMS; rtest++) { | |
376 | for (srcs = TEST_SOURCES; srcs > 0; srcs--) { | |
377 | for (i = 0; i < srcs; i++) | |
378 | for (j = 0; j < TEST_LEN; j++) | |
379 | buffs[i][j] = rand(); | |
380 | ||
381 | for (i = 0; i < srcs; i++) { | |
382 | g1[i] = rand(); | |
383 | g2[i] = rand(); | |
384 | g3[i] = rand(); | |
385 | g4[i] = rand(); | |
386 | g5[i] = rand(); | |
387 | } | |
388 | ||
389 | for (i = 0; i < srcs; i++) { | |
390 | gf_vect_mul_init(g1[i], &g_tbls[i * 32]); | |
391 | gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); | |
392 | gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]); | |
393 | gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]); | |
394 | gf_vect_mul_init(g5[i], &g_tbls[(128 * srcs) + (i * 32)]); | |
395 | } | |
396 | ||
397 | gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref1); | |
398 | gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[32 * srcs], buffs, | |
399 | dest_ref2); | |
400 | gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[64 * srcs], buffs, | |
401 | dest_ref3); | |
402 | gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[96 * srcs], buffs, | |
403 | dest_ref4); | |
404 | gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[128 * srcs], buffs, | |
405 | dest_ref5); | |
406 | ||
407 | FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest_ptrs); | |
408 | ||
409 | if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { | |
410 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) | |
411 | " test1 srcs=%d\n", srcs); | |
412 | dump_matrix(buffs, 5, TEST_SOURCES); | |
413 | printf("dprod_base:"); | |
414 | dump(dest_ref1, 25); | |
415 | printf("dprod_dut:"); | |
416 | dump(dest1, 25); | |
417 | return -1; | |
418 | } | |
419 | if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { | |
420 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) | |
421 | " test2 srcs=%d\n", srcs); | |
422 | dump_matrix(buffs, 5, TEST_SOURCES); | |
423 | printf("dprod_base:"); | |
424 | dump(dest_ref2, 25); | |
425 | printf("dprod_dut:"); | |
426 | dump(dest2, 25); | |
427 | return -1; | |
428 | } | |
429 | if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { | |
430 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) | |
431 | " test3 srcs=%d\n", srcs); | |
432 | dump_matrix(buffs, 5, TEST_SOURCES); | |
433 | printf("dprod_base:"); | |
434 | dump(dest_ref3, 25); | |
435 | printf("dprod_dut:"); | |
436 | dump(dest3, 25); | |
437 | return -1; | |
438 | } | |
439 | if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) { | |
440 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) | |
441 | " test4 srcs=%d\n", srcs); | |
442 | dump_matrix(buffs, 5, TEST_SOURCES); | |
443 | printf("dprod_base:"); | |
444 | dump(dest_ref4, 25); | |
445 | printf("dprod_dut:"); | |
446 | dump(dest4, 25); | |
447 | return -1; | |
448 | } | |
449 | if (0 != memcmp(dest_ref5, dest5, TEST_LEN)) { | |
450 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) | |
451 | " test5 srcs=%d\n", srcs); | |
452 | dump_matrix(buffs, 5, TEST_SOURCES); | |
453 | printf("dprod_base:"); | |
454 | dump(dest_ref5, 25); | |
455 | printf("dprod_dut:"); | |
456 | dump(dest5, 25); | |
457 | return -1; | |
458 | } | |
459 | ||
460 | putchar('.'); | |
461 | } | |
462 | } | |
463 | ||
464 | // Run tests at end of buffer for Electric Fence | |
465 | align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16; | |
466 | for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) { | |
467 | for (i = 0; i < TEST_SOURCES; i++) | |
468 | for (j = 0; j < TEST_LEN; j++) | |
469 | buffs[i][j] = rand(); | |
470 | ||
471 | for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end | |
472 | efence_buffs[i] = buffs[i] + TEST_LEN - size; | |
473 | ||
474 | for (i = 0; i < TEST_SOURCES; i++) { | |
475 | g1[i] = rand(); | |
476 | g2[i] = rand(); | |
477 | g3[i] = rand(); | |
478 | g4[i] = rand(); | |
479 | g5[i] = rand(); | |
480 | } | |
481 | ||
482 | for (i = 0; i < TEST_SOURCES; i++) { | |
483 | gf_vect_mul_init(g1[i], &g_tbls[i * 32]); | |
484 | gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]); | |
485 | gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]); | |
486 | gf_vect_mul_init(g4[i], &g_tbls[(96 * TEST_SOURCES) + (i * 32)]); | |
487 | gf_vect_mul_init(g5[i], &g_tbls[(128 * TEST_SOURCES) + (i * 32)]); | |
488 | } | |
489 | ||
490 | gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref1); | |
491 | gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], | |
492 | efence_buffs, dest_ref2); | |
493 | gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], | |
494 | efence_buffs, dest_ref3); | |
495 | gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], | |
496 | efence_buffs, dest_ref4); | |
497 | gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES], | |
498 | efence_buffs, dest_ref5); | |
499 | ||
500 | FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest_ptrs); | |
501 | ||
502 | if (0 != memcmp(dest_ref1, dest1, size)) { | |
503 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest); | |
504 | dump_matrix(efence_buffs, 5, TEST_SOURCES); | |
505 | printf("dprod_base:"); | |
506 | dump(dest_ref1, align); | |
507 | printf("dprod_dut:"); | |
508 | dump(dest1, align); | |
509 | return -1; | |
510 | } | |
511 | ||
512 | if (0 != memcmp(dest_ref2, dest2, size)) { | |
513 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest); | |
514 | dump_matrix(efence_buffs, 5, TEST_SOURCES); | |
515 | printf("dprod_base:"); | |
516 | dump(dest_ref2, align); | |
517 | printf("dprod_dut:"); | |
518 | dump(dest2, align); | |
519 | return -1; | |
520 | } | |
521 | ||
522 | if (0 != memcmp(dest_ref3, dest3, size)) { | |
523 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest); | |
524 | dump_matrix(efence_buffs, 5, TEST_SOURCES); | |
525 | printf("dprod_base:"); | |
526 | dump(dest_ref3, align); | |
527 | printf("dprod_dut:"); | |
528 | dump(dest3, align); | |
529 | return -1; | |
530 | } | |
531 | ||
532 | if (0 != memcmp(dest_ref4, dest4, size)) { | |
533 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test4 %d\n", rtest); | |
534 | dump_matrix(efence_buffs, 5, TEST_SOURCES); | |
535 | printf("dprod_base:"); | |
536 | dump(dest_ref4, align); | |
537 | printf("dprod_dut:"); | |
538 | dump(dest4, align); | |
539 | return -1; | |
540 | } | |
541 | ||
542 | if (0 != memcmp(dest_ref5, dest5, size)) { | |
543 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test5 %d\n", rtest); | |
544 | dump_matrix(efence_buffs, 5, TEST_SOURCES); | |
545 | printf("dprod_base:"); | |
546 | dump(dest_ref5, align); | |
547 | printf("dprod_dut:"); | |
548 | dump(dest5, align); | |
549 | return -1; | |
550 | } | |
551 | ||
552 | putchar('.'); | |
553 | } | |
554 | ||
555 | // Test rand ptr alignment if available | |
556 | ||
557 | for (rtest = 0; rtest < RANDOMS; rtest++) { | |
558 | size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1); | |
559 | srcs = rand() % TEST_SOURCES; | |
560 | if (srcs == 0) | |
561 | continue; | |
562 | ||
563 | offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B; | |
564 | // Add random offsets | |
565 | for (i = 0; i < srcs; i++) | |
566 | ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset)); | |
567 | ||
568 | udest_ptrs[0] = dest1 + (rand() & (PTR_ALIGN_CHK_B - offset)); | |
569 | udest_ptrs[1] = dest2 + (rand() & (PTR_ALIGN_CHK_B - offset)); | |
570 | udest_ptrs[2] = dest3 + (rand() & (PTR_ALIGN_CHK_B - offset)); | |
571 | udest_ptrs[3] = dest4 + (rand() & (PTR_ALIGN_CHK_B - offset)); | |
572 | udest_ptrs[4] = dest5 + (rand() & (PTR_ALIGN_CHK_B - offset)); | |
573 | ||
574 | memset(dest1, 0, TEST_LEN); // zero pad to check write-over | |
575 | memset(dest2, 0, TEST_LEN); | |
576 | memset(dest3, 0, TEST_LEN); | |
577 | memset(dest4, 0, TEST_LEN); | |
578 | memset(dest5, 0, TEST_LEN); | |
579 | ||
580 | for (i = 0; i < srcs; i++) | |
581 | for (j = 0; j < size; j++) | |
582 | ubuffs[i][j] = rand(); | |
583 | ||
584 | for (i = 0; i < srcs; i++) { | |
585 | g1[i] = rand(); | |
586 | g2[i] = rand(); | |
587 | g3[i] = rand(); | |
588 | g4[i] = rand(); | |
589 | g5[i] = rand(); | |
590 | } | |
591 | ||
592 | for (i = 0; i < srcs; i++) { | |
593 | gf_vect_mul_init(g1[i], &g_tbls[i * 32]); | |
594 | gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); | |
595 | gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]); | |
596 | gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]); | |
597 | gf_vect_mul_init(g5[i], &g_tbls[(128 * srcs) + (i * 32)]); | |
598 | } | |
599 | ||
600 | gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref1); | |
601 | gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], ubuffs, dest_ref2); | |
602 | gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], ubuffs, dest_ref3); | |
603 | gf_vect_dot_prod_base(size, srcs, &g_tbls[96 * srcs], ubuffs, dest_ref4); | |
604 | gf_vect_dot_prod_base(size, srcs, &g_tbls[128 * srcs], ubuffs, dest_ref5); | |
605 | ||
606 | FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptrs); | |
607 | ||
608 | if (memcmp(dest_ref1, udest_ptrs[0], size)) { | |
609 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", | |
610 | srcs); | |
611 | dump_matrix(ubuffs, 5, TEST_SOURCES); | |
612 | printf("dprod_base:"); | |
613 | dump(dest_ref1, 25); | |
614 | printf("dprod_dut:"); | |
615 | dump(udest_ptrs[0], 25); | |
616 | return -1; | |
617 | } | |
618 | if (memcmp(dest_ref2, udest_ptrs[1], size)) { | |
619 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", | |
620 | srcs); | |
621 | dump_matrix(ubuffs, 5, TEST_SOURCES); | |
622 | printf("dprod_base:"); | |
623 | dump(dest_ref2, 25); | |
624 | printf("dprod_dut:"); | |
625 | dump(udest_ptrs[1], 25); | |
626 | return -1; | |
627 | } | |
628 | if (memcmp(dest_ref3, udest_ptrs[2], size)) { | |
629 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", | |
630 | srcs); | |
631 | dump_matrix(ubuffs, 5, TEST_SOURCES); | |
632 | printf("dprod_base:"); | |
633 | dump(dest_ref3, 25); | |
634 | printf("dprod_dut:"); | |
635 | dump(udest_ptrs[2], 25); | |
636 | return -1; | |
637 | } | |
638 | if (memcmp(dest_ref4, udest_ptrs[3], size)) { | |
639 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", | |
640 | srcs); | |
641 | dump_matrix(ubuffs, 5, TEST_SOURCES); | |
642 | printf("dprod_base:"); | |
643 | dump(dest_ref4, 25); | |
644 | printf("dprod_dut:"); | |
645 | dump(udest_ptrs[3], 25); | |
646 | return -1; | |
647 | } | |
648 | if (memcmp(dest_ref5, udest_ptrs[4], size)) { | |
649 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", | |
650 | srcs); | |
651 | dump_matrix(ubuffs, 5, TEST_SOURCES); | |
652 | printf("dprod_base:"); | |
653 | dump(dest_ref5, 25); | |
654 | printf("dprod_dut:"); | |
655 | dump(udest_ptrs[4], 25); | |
656 | return -1; | |
657 | } | |
658 | // Confirm that padding around dests is unchanged | |
659 | memset(dest_ref1, 0, PTR_ALIGN_CHK_B); // Make reference zero buff | |
660 | offset = udest_ptrs[0] - dest1; | |
661 | ||
662 | if (memcmp(dest1, dest_ref1, offset)) { | |
663 | printf("Fail rand ualign pad1 start\n"); | |
664 | return -1; | |
665 | } | |
666 | if (memcmp(dest1 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { | |
667 | printf("Fail rand ualign pad1 end\n"); | |
668 | return -1; | |
669 | } | |
670 | ||
671 | offset = udest_ptrs[1] - dest2; | |
672 | if (memcmp(dest2, dest_ref1, offset)) { | |
673 | printf("Fail rand ualign pad2 start\n"); | |
674 | return -1; | |
675 | } | |
676 | if (memcmp(dest2 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { | |
677 | printf("Fail rand ualign pad2 end\n"); | |
678 | return -1; | |
679 | } | |
680 | ||
681 | offset = udest_ptrs[2] - dest3; | |
682 | if (memcmp(dest3, dest_ref1, offset)) { | |
683 | printf("Fail rand ualign pad3 start\n"); | |
684 | return -1; | |
685 | } | |
686 | if (memcmp(dest3 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { | |
687 | printf("Fail rand ualign pad3 end\n"); | |
688 | return -1; | |
689 | } | |
690 | ||
691 | offset = udest_ptrs[3] - dest4; | |
692 | if (memcmp(dest4, dest_ref1, offset)) { | |
693 | printf("Fail rand ualign pad4 start\n"); | |
694 | return -1; | |
695 | } | |
696 | if (memcmp(dest4 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { | |
697 | printf("Fail rand ualign pad4 end\n"); | |
698 | return -1; | |
699 | } | |
700 | ||
701 | offset = udest_ptrs[4] - dest5; | |
702 | if (memcmp(dest5, dest_ref1, offset)) { | |
703 | printf("Fail rand ualign pad5 start\n"); | |
704 | return -1; | |
705 | } | |
706 | if (memcmp(dest5 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { | |
707 | printf("Fail rand ualign pad5 end\n"); | |
708 | return -1; | |
709 | } | |
710 | ||
711 | putchar('.'); | |
712 | } | |
713 | ||
714 | // Test all size alignment | |
715 | align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16; | |
716 | ||
717 | for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) { | |
718 | srcs = TEST_SOURCES; | |
719 | ||
720 | for (i = 0; i < srcs; i++) | |
721 | for (j = 0; j < size; j++) | |
722 | buffs[i][j] = rand(); | |
723 | ||
724 | for (i = 0; i < srcs; i++) { | |
725 | g1[i] = rand(); | |
726 | g2[i] = rand(); | |
727 | g3[i] = rand(); | |
728 | g4[i] = rand(); | |
729 | g5[i] = rand(); | |
730 | } | |
731 | ||
732 | for (i = 0; i < srcs; i++) { | |
733 | gf_vect_mul_init(g1[i], &g_tbls[i * 32]); | |
734 | gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); | |
735 | gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]); | |
736 | gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]); | |
737 | gf_vect_mul_init(g5[i], &g_tbls[(128 * srcs) + (i * 32)]); | |
738 | } | |
739 | ||
740 | gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref1); | |
741 | gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], buffs, dest_ref2); | |
742 | gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], buffs, dest_ref3); | |
743 | gf_vect_dot_prod_base(size, srcs, &g_tbls[96 * srcs], buffs, dest_ref4); | |
744 | gf_vect_dot_prod_base(size, srcs, &g_tbls[128 * srcs], buffs, dest_ref5); | |
745 | ||
746 | FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest_ptrs); | |
747 | ||
748 | if (memcmp(dest_ref1, dest_ptrs[0], size)) { | |
749 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", | |
750 | size); | |
751 | dump_matrix(buffs, 5, TEST_SOURCES); | |
752 | printf("dprod_base:"); | |
753 | dump(dest_ref1, 25); | |
754 | printf("dprod_dut:"); | |
755 | dump(dest_ptrs[0], 25); | |
756 | ||
757 | return -1; | |
758 | } | |
759 | if (memcmp(dest_ref2, dest_ptrs[1], size)) { | |
760 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", | |
761 | size); | |
762 | dump_matrix(buffs, 5, TEST_SOURCES); | |
763 | printf("dprod_base:"); | |
764 | dump(dest_ref2, 25); | |
765 | printf("dprod_dut:"); | |
766 | dump(dest_ptrs[1], 25); | |
767 | return -1; | |
768 | } | |
769 | if (memcmp(dest_ref3, dest_ptrs[2], size)) { | |
770 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", | |
771 | size); | |
772 | dump_matrix(buffs, 5, TEST_SOURCES); | |
773 | printf("dprod_base:"); | |
774 | dump(dest_ref3, 25); | |
775 | printf("dprod_dut:"); | |
776 | dump(dest_ptrs[2], 25); | |
777 | return -1; | |
778 | } | |
779 | if (memcmp(dest_ref4, dest_ptrs[3], size)) { | |
780 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", | |
781 | size); | |
782 | dump_matrix(buffs, 5, TEST_SOURCES); | |
783 | printf("dprod_base:"); | |
784 | dump(dest_ref4, 25); | |
785 | printf("dprod_dut:"); | |
786 | dump(dest_ptrs[3], 25); | |
787 | return -1; | |
788 | } | |
789 | if (memcmp(dest_ref5, dest_ptrs[4], size)) { | |
790 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", | |
791 | size); | |
792 | dump_matrix(buffs, 5, TEST_SOURCES); | |
793 | printf("dprod_base:"); | |
794 | dump(dest_ref5, 25); | |
795 | printf("dprod_dut:"); | |
796 | dump(dest_ptrs[4], 25); | |
797 | return -1; | |
798 | } | |
799 | } | |
800 | ||
801 | printf("Pass\n"); | |
802 | return 0; | |
803 | ||
804 | } |
106 | 106 | %define return rax |
107 | 107 | %define return.w eax |
108 | 108 | |
109 | %define func(x) x: | |
109 | %define func(x) x: endbranch | |
110 | 110 | %macro FUNC_SAVE 0 |
111 | 111 | push r12 |
112 | 112 | push r13 |
177 | 177 | |
178 | 178 | |
179 | 179 | align 16 |
180 | global gf_5vect_mad_avx:ISAL_SYM_TYPE_FUNCTION | |
180 | mk_global gf_5vect_mad_avx, function | |
181 | 181 | func(gf_5vect_mad_avx) |
182 | 182 | FUNC_SAVE |
183 | 183 | sub len, 16 |
102 | 102 | %define return rax |
103 | 103 | %define return.w eax |
104 | 104 | |
105 | %define func(x) x: | |
105 | %define func(x) x: endbranch | |
106 | 106 | %define FUNC_SAVE |
107 | 107 | %define FUNC_RESTORE |
108 | 108 | %endif |
165 | 165 | %define xd5 ymm9 |
166 | 166 | |
167 | 167 | align 16 |
168 | global gf_5vect_mad_avx2:ISAL_SYM_TYPE_FUNCTION | |
168 | mk_global gf_5vect_mad_avx2, function | |
169 | 169 | func(gf_5vect_mad_avx2) |
170 | 170 | FUNC_SAVE |
171 | 171 | sub len, 32 |
44 | 44 | %define tmp r11 |
45 | 45 | %define tmp2 r10 |
46 | 46 | %define return rax |
47 | %define func(x) x: | |
47 | %define func(x) x: endbranch | |
48 | 48 | %define FUNC_SAVE |
49 | 49 | %define FUNC_RESTORE |
50 | 50 | %endif |
166 | 166 | %define xtmph5 zmm27 |
167 | 167 | |
168 | 168 | align 16 |
169 | global gf_5vect_mad_avx512:ISAL_SYM_TYPE_FUNCTION | |
169 | mk_global gf_5vect_mad_avx512, function | |
170 | 170 | func(gf_5vect_mad_avx512) |
171 | 171 | FUNC_SAVE |
172 | 172 | sub len, 64 |
106 | 106 | %define return rax |
107 | 107 | %define return.w eax |
108 | 108 | |
109 | %define func(x) x: | |
109 | %define func(x) x: endbranch | |
110 | 110 | %macro FUNC_SAVE 0 |
111 | 111 | push r12 |
112 | 112 | push r13 |
176 | 176 | |
177 | 177 | |
178 | 178 | align 16 |
179 | global gf_5vect_mad_sse:ISAL_SYM_TYPE_FUNCTION | |
179 | mk_global gf_5vect_mad_sse, function | |
180 | 180 | func(gf_5vect_mad_sse) |
181 | 181 | FUNC_SAVE |
182 | 182 | sub len, 16 |
50 | 50 | %define PS 8 |
51 | 51 | %define LOG_PS 3 |
52 | 52 | |
53 | %define func(x) x: | |
53 | %define func(x) x: endbranch | |
54 | 54 | %macro FUNC_SAVE 0 |
55 | 55 | push r12 |
56 | 56 | push r13 |
88 | 88 | %define func(x) proc_frame x |
89 | 89 | %macro FUNC_SAVE 0 |
90 | 90 | alloc_stack stack_size |
91 | save_xmm128 xmm6, 0*16 | |
92 | save_xmm128 xmm7, 1*16 | |
93 | save_xmm128 xmm8, 2*16 | |
94 | save_xmm128 xmm9, 3*16 | |
95 | save_xmm128 xmm10, 4*16 | |
96 | save_xmm128 xmm11, 5*16 | |
97 | save_xmm128 xmm12, 6*16 | |
98 | save_xmm128 xmm13, 7*16 | |
99 | save_xmm128 xmm14, 8*16 | |
100 | save_xmm128 xmm15, 9*16 | |
91 | vmovdqa [rsp + 0*16], xmm6 | |
92 | vmovdqa [rsp + 1*16], xmm7 | |
93 | vmovdqa [rsp + 2*16], xmm8 | |
94 | vmovdqa [rsp + 3*16], xmm9 | |
95 | vmovdqa [rsp + 4*16], xmm10 | |
96 | vmovdqa [rsp + 5*16], xmm11 | |
97 | vmovdqa [rsp + 6*16], xmm12 | |
98 | vmovdqa [rsp + 7*16], xmm13 | |
99 | vmovdqa [rsp + 8*16], xmm14 | |
100 | vmovdqa [rsp + 9*16], xmm15 | |
101 | 101 | save_reg r12, 10*16 + 0*8 |
102 | 102 | save_reg r13, 10*16 + 1*8 |
103 | 103 | save_reg r14, 10*16 + 2*8 |
181 | 181 | %define xp6 xmm7 |
182 | 182 | |
183 | 183 | align 16 |
184 | global gf_6vect_dot_prod_avx:ISAL_SYM_TYPE_FUNCTION | |
184 | mk_global gf_6vect_dot_prod_avx, function | |
185 | 185 | func(gf_6vect_dot_prod_avx) |
186 | 186 | FUNC_SAVE |
187 | 187 | sub len, 16 |
52 | 52 | %define PS 8 |
53 | 53 | %define LOG_PS 3 |
54 | 54 | |
55 | %define func(x) x: | |
55 | %define func(x) x: endbranch | |
56 | 56 | %macro FUNC_SAVE 0 |
57 | 57 | push r12 |
58 | 58 | push r13 |
186 | 186 | %define xp6 ymm7 |
187 | 187 | |
188 | 188 | align 16 |
189 | global gf_6vect_dot_prod_avx2:ISAL_SYM_TYPE_FUNCTION | |
189 | mk_global gf_6vect_dot_prod_avx2, function | |
190 | 190 | func(gf_6vect_dot_prod_avx2) |
191 | 191 | FUNC_SAVE |
192 | 192 | sub len, 32 |
56 | 56 | %define PS 8 |
57 | 57 | %define LOG_PS 3 |
58 | 58 | |
59 | %define func(x) x: | |
59 | %define func(x) x: endbranch | |
60 | 60 | %macro FUNC_SAVE 0 |
61 | 61 | push r12 |
62 | 62 | push r13 |
112 | 112 | vmovdqa [rsp + 7*16], xmm13 |
113 | 113 | vmovdqa [rsp + 8*16], xmm14 |
114 | 114 | vmovdqa [rsp + 9*16], xmm15 |
115 | save_reg r12, 9*16 + 0*8 | |
116 | save_reg r13, 9*16 + 1*8 | |
117 | save_reg r14, 9*16 + 2*8 | |
118 | save_reg r15, 9*16 + 3*8 | |
119 | save_reg rdi, 9*16 + 4*8 | |
120 | save_reg rsi, 9*16 + 5*8 | |
121 | save_reg rbp, 9*16 + 6*8 | |
122 | save_reg rbx, 9*16 + 7*8 | |
115 | save_reg r12, 10*16 + 0*8 | |
116 | save_reg r13, 10*16 + 1*8 | |
117 | save_reg r14, 10*16 + 2*8 | |
118 | save_reg r15, 10*16 + 3*8 | |
119 | save_reg rdi, 10*16 + 4*8 | |
120 | save_reg rsi, 10*16 + 5*8 | |
121 | save_reg rbp, 10*16 + 6*8 | |
122 | save_reg rbx, 10*16 + 7*8 | |
123 | 123 | end_prolog |
124 | 124 | mov arg4, arg(4) |
125 | 125 | %endmacro |
135 | 135 | vmovdqa xmm13, [rsp + 7*16] |
136 | 136 | vmovdqa xmm14, [rsp + 8*16] |
137 | 137 | vmovdqa xmm15, [rsp + 9*16] |
138 | mov r12, [rsp + 9*16 + 0*8] | |
139 | mov r13, [rsp + 9*16 + 1*8] | |
140 | mov r14, [rsp + 9*16 + 2*8] | |
141 | mov r15, [rsp + 9*16 + 3*8] | |
142 | mov rdi, [rsp + 9*16 + 4*8] | |
143 | mov rsi, [rsp + 9*16 + 5*8] | |
144 | mov rbp, [rsp + 9*16 + 6*8] | |
145 | mov rbx, [rsp + 9*16 + 7*8] | |
138 | mov r12, [rsp + 10*16 + 0*8] | |
139 | mov r13, [rsp + 10*16 + 1*8] | |
140 | mov r14, [rsp + 10*16 + 2*8] | |
141 | mov r15, [rsp + 10*16 + 3*8] | |
142 | mov rdi, [rsp + 10*16 + 4*8] | |
143 | mov rsi, [rsp + 10*16 + 5*8] | |
144 | mov rbp, [rsp + 10*16 + 6*8] | |
145 | mov rbx, [rsp + 10*16 + 7*8] | |
146 | 146 | add rsp, stack_size |
147 | 147 | %endmacro |
148 | 148 | %endif |
214 | 214 | section .text |
215 | 215 | |
216 | 216 | align 16 |
217 | global gf_6vect_dot_prod_avx512:ISAL_SYM_TYPE_FUNCTION | |
217 | mk_global gf_6vect_dot_prod_avx512, function | |
218 | 218 | func(gf_6vect_dot_prod_avx512) |
219 | 219 | FUNC_SAVE |
220 | 220 | sub len, 64 |
50 | 50 | %define PS 8 |
51 | 51 | %define LOG_PS 3 |
52 | 52 | |
53 | %define func(x) x: | |
53 | %define func(x) x: endbranch | |
54 | 54 | %macro FUNC_SAVE 0 |
55 | 55 | push r12 |
56 | 56 | push r13 |
181 | 181 | %define xp6 xmm13 |
182 | 182 | |
183 | 183 | align 16 |
184 | global gf_6vect_dot_prod_sse:ISAL_SYM_TYPE_FUNCTION | |
184 | mk_global gf_6vect_dot_prod_sse, function | |
185 | 185 | func(gf_6vect_dot_prod_sse) |
186 | 186 | FUNC_SAVE |
187 | 187 | sub len, 16 |
0 | /********************************************************************** | |
1 | Copyright(c) 2011-2015 Intel Corporation All rights reserved. | |
2 | ||
3 | Redistribution and use in source and binary forms, with or without | |
4 | modification, are permitted provided that the following conditions | |
5 | are met: | |
6 | * Redistributions of source code must retain the above copyright | |
7 | notice, this list of conditions and the following disclaimer. | |
8 | * Redistributions in binary form must reproduce the above copyright | |
9 | notice, this list of conditions and the following disclaimer in | |
10 | the documentation and/or other materials provided with the | |
11 | distribution. | |
12 | * Neither the name of Intel Corporation nor the names of its | |
13 | contributors may be used to endorse or promote products derived | |
14 | from this software without specific prior written permission. | |
15 | ||
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
17 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
18 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
19 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
20 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
21 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
22 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
23 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
24 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
27 | **********************************************************************/ | |
28 | ||
29 | #include <stdio.h> | |
30 | #include <stdlib.h> | |
31 | #include <string.h> // for memset, memcmp | |
32 | #include "erasure_code.h" | |
33 | #include "types.h" | |
34 | ||
35 | #ifndef FUNCTION_UNDER_TEST | |
36 | # define FUNCTION_UNDER_TEST gf_6vect_dot_prod_sse | |
37 | #endif | |
38 | #ifndef TEST_MIN_SIZE | |
39 | # define TEST_MIN_SIZE 16 | |
40 | #endif | |
41 | ||
42 | #define str(s) #s | |
43 | #define xstr(s) str(s) | |
44 | ||
45 | #define TEST_LEN 8192 | |
46 | #define TEST_SIZE (TEST_LEN/2) | |
47 | #define TEST_MEM TEST_SIZE | |
48 | #define TEST_LOOPS 20000 | |
49 | #define TEST_TYPE_STR "" | |
50 | ||
51 | #ifndef TEST_SOURCES | |
52 | # define TEST_SOURCES 16 | |
53 | #endif | |
54 | #ifndef RANDOMS | |
55 | # define RANDOMS 20 | |
56 | #endif | |
57 | ||
58 | #ifdef EC_ALIGNED_ADDR | |
59 | // Define power of 2 range to check ptr, len alignment | |
60 | # define PTR_ALIGN_CHK_B 0 | |
61 | # define LEN_ALIGN_CHK_B 0 // 0 for aligned only | |
62 | #else | |
63 | // Define power of 2 range to check ptr, len alignment | |
64 | # define PTR_ALIGN_CHK_B 32 | |
65 | # define LEN_ALIGN_CHK_B 32 // 0 for aligned only | |
66 | #endif | |
67 | ||
68 | typedef unsigned char u8; | |
69 | ||
70 | void dump(unsigned char *buf, int len) | |
71 | { | |
72 | int i; | |
73 | for (i = 0; i < len;) { | |
74 | printf(" %2x", 0xff & buf[i++]); | |
75 | if (i % 32 == 0) | |
76 | printf("\n"); | |
77 | } | |
78 | printf("\n"); | |
79 | } | |
80 | ||
81 | void dump_matrix(unsigned char **s, int k, int m) | |
82 | { | |
83 | int i, j; | |
84 | for (i = 0; i < k; i++) { | |
85 | for (j = 0; j < m; j++) { | |
86 | printf(" %2x", s[i][j]); | |
87 | } | |
88 | printf("\n"); | |
89 | } | |
90 | printf("\n"); | |
91 | } | |
92 | ||
93 | void dump_u8xu8(unsigned char *s, int k, int m) | |
94 | { | |
95 | int i, j; | |
96 | for (i = 0; i < k; i++) { | |
97 | for (j = 0; j < m; j++) { | |
98 | printf(" %2x", 0xff & s[j + (i * m)]); | |
99 | } | |
100 | printf("\n"); | |
101 | } | |
102 | printf("\n"); | |
103 | } | |
104 | ||
105 | int main(int argc, char *argv[]) | |
106 | { | |
107 | int i, j, rtest, srcs; | |
108 | void *buf; | |
109 | u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g3[TEST_SOURCES]; | |
110 | u8 g4[TEST_SOURCES], g5[TEST_SOURCES], g6[TEST_SOURCES], *g_tbls; | |
111 | u8 *dest1, *dest2, *dest3, *dest4, *dest5, *dest6, *dest_ref1; | |
112 | u8 *dest_ref2, *dest_ref3, *dest_ref4, *dest_ref5, *dest_ref6; | |
113 | u8 *dest_ptrs[6], *buffs[TEST_SOURCES]; | |
114 | ||
115 | int align, size; | |
116 | unsigned char *efence_buffs[TEST_SOURCES]; | |
117 | unsigned int offset; | |
118 | u8 *ubuffs[TEST_SOURCES]; | |
119 | u8 *udest_ptrs[6]; | |
120 | printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN); | |
121 | ||
122 | // Allocate the arrays | |
123 | for (i = 0; i < TEST_SOURCES; i++) { | |
124 | if (posix_memalign(&buf, 64, TEST_LEN)) { | |
125 | printf("alloc error: Fail"); | |
126 | return -1; | |
127 | } | |
128 | buffs[i] = buf; | |
129 | } | |
130 | ||
131 | if (posix_memalign(&buf, 16, 2 * (6 * TEST_SOURCES * 32))) { | |
132 | printf("alloc error: Fail"); | |
133 | return -1; | |
134 | } | |
135 | g_tbls = buf; | |
136 | ||
137 | if (posix_memalign(&buf, 64, TEST_LEN)) { | |
138 | printf("alloc error: Fail"); | |
139 | return -1; | |
140 | } | |
141 | dest1 = buf; | |
142 | ||
143 | if (posix_memalign(&buf, 64, TEST_LEN)) { | |
144 | printf("alloc error: Fail"); | |
145 | return -1; | |
146 | } | |
147 | dest2 = buf; | |
148 | ||
149 | if (posix_memalign(&buf, 64, TEST_LEN)) { | |
150 | printf("alloc error: Fail"); | |
151 | return -1; | |
152 | } | |
153 | dest3 = buf; | |
154 | ||
155 | if (posix_memalign(&buf, 64, TEST_LEN)) { | |
156 | printf("alloc error: Fail"); | |
157 | return -1; | |
158 | } | |
159 | dest4 = buf; | |
160 | ||
161 | if (posix_memalign(&buf, 64, TEST_LEN)) { | |
162 | printf("alloc error: Fail"); | |
163 | return -1; | |
164 | } | |
165 | dest5 = buf; | |
166 | ||
167 | if (posix_memalign(&buf, 64, TEST_LEN)) { | |
168 | printf("alloc error: Fail"); | |
169 | return -1; | |
170 | } | |
171 | dest6 = buf; | |
172 | ||
173 | if (posix_memalign(&buf, 64, TEST_LEN)) { | |
174 | printf("alloc error: Fail"); | |
175 | return -1; | |
176 | } | |
177 | dest_ref1 = buf; | |
178 | ||
179 | if (posix_memalign(&buf, 64, TEST_LEN)) { | |
180 | printf("alloc error: Fail"); | |
181 | return -1; | |
182 | } | |
183 | dest_ref2 = buf; | |
184 | ||
185 | if (posix_memalign(&buf, 64, TEST_LEN)) { | |
186 | printf("alloc error: Fail"); | |
187 | return -1; | |
188 | } | |
189 | dest_ref3 = buf; | |
190 | ||
191 | if (posix_memalign(&buf, 64, TEST_LEN)) { | |
192 | printf("alloc error: Fail"); | |
193 | return -1; | |
194 | } | |
195 | dest_ref4 = buf; | |
196 | ||
197 | if (posix_memalign(&buf, 64, TEST_LEN)) { | |
198 | printf("alloc error: Fail"); | |
199 | return -1; | |
200 | } | |
201 | dest_ref5 = buf; | |
202 | ||
203 | if (posix_memalign(&buf, 64, TEST_LEN)) { | |
204 | printf("alloc error: Fail"); | |
205 | return -1; | |
206 | } | |
207 | dest_ref6 = buf; | |
208 | ||
209 | dest_ptrs[0] = dest1; | |
210 | dest_ptrs[1] = dest2; | |
211 | dest_ptrs[2] = dest3; | |
212 | dest_ptrs[3] = dest4; | |
213 | dest_ptrs[4] = dest5; | |
214 | dest_ptrs[5] = dest6; | |
215 | ||
216 | // Test of all zeros | |
217 | for (i = 0; i < TEST_SOURCES; i++) | |
218 | memset(buffs[i], 0, TEST_LEN); | |
219 | ||
220 | memset(dest1, 0, TEST_LEN); | |
221 | memset(dest2, 0, TEST_LEN); | |
222 | memset(dest3, 0, TEST_LEN); | |
223 | memset(dest4, 0, TEST_LEN); | |
224 | memset(dest5, 0, TEST_LEN); | |
225 | memset(dest6, 0, TEST_LEN); | |
226 | memset(dest_ref1, 0, TEST_LEN); | |
227 | memset(dest_ref2, 0, TEST_LEN); | |
228 | memset(dest_ref3, 0, TEST_LEN); | |
229 | memset(dest_ref4, 0, TEST_LEN); | |
230 | memset(dest_ref5, 0, TEST_LEN); | |
231 | memset(dest_ref6, 0, TEST_LEN); | |
232 | memset(g1, 2, TEST_SOURCES); | |
233 | memset(g2, 1, TEST_SOURCES); | |
234 | memset(g3, 7, TEST_SOURCES); | |
235 | memset(g4, 9, TEST_SOURCES); | |
236 | memset(g5, 4, TEST_SOURCES); | |
237 | memset(g6, 0xe6, TEST_SOURCES); | |
238 | ||
239 | for (i = 0; i < TEST_SOURCES; i++) { | |
240 | gf_vect_mul_init(g1[i], &g_tbls[i * 32]); | |
241 | gf_vect_mul_init(g2[i], &g_tbls[32 * TEST_SOURCES + i * 32]); | |
242 | gf_vect_mul_init(g3[i], &g_tbls[64 * TEST_SOURCES + i * 32]); | |
243 | gf_vect_mul_init(g4[i], &g_tbls[96 * TEST_SOURCES + i * 32]); | |
244 | gf_vect_mul_init(g5[i], &g_tbls[128 * TEST_SOURCES + i * 32]); | |
245 | gf_vect_mul_init(g6[i], &g_tbls[160 * TEST_SOURCES + i * 32]); | |
246 | } | |
247 | ||
248 | gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); | |
249 | gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs, | |
250 | dest_ref2); | |
251 | gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], buffs, | |
252 | dest_ref3); | |
253 | gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], buffs, | |
254 | dest_ref4); | |
255 | gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES], buffs, | |
256 | dest_ref5); | |
257 | gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[160 * TEST_SOURCES], buffs, | |
258 | dest_ref6); | |
259 | ||
260 | FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); | |
261 | ||
262 | if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { | |
263 | printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test1\n"); | |
264 | dump_matrix(buffs, 5, TEST_SOURCES); | |
265 | printf("dprod_base:"); | |
266 | dump(dest_ref1, 25); | |
267 | printf("dprod_dut:"); | |
268 | dump(dest1, 25); | |
269 | return -1; | |
270 | } | |
271 | if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { | |
272 | printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test2\n"); | |
273 | dump_matrix(buffs, 5, TEST_SOURCES); | |
274 | printf("dprod_base:"); | |
275 | dump(dest_ref2, 25); | |
276 | printf("dprod_dut:"); | |
277 | dump(dest2, 25); | |
278 | return -1; | |
279 | } | |
280 | if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { | |
281 | printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test3\n"); | |
282 | dump_matrix(buffs, 5, TEST_SOURCES); | |
283 | printf("dprod_base:"); | |
284 | dump(dest_ref3, 25); | |
285 | printf("dprod_dut:"); | |
286 | dump(dest3, 25); | |
287 | return -1; | |
288 | } | |
289 | if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) { | |
290 | printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test4\n"); | |
291 | dump_matrix(buffs, 5, TEST_SOURCES); | |
292 | printf("dprod_base:"); | |
293 | dump(dest_ref4, 25); | |
294 | printf("dprod_dut:"); | |
295 | dump(dest4, 25); | |
296 | return -1; | |
297 | } | |
298 | if (0 != memcmp(dest_ref5, dest5, TEST_LEN)) { | |
299 | printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test5\n"); | |
300 | dump_matrix(buffs, 5, TEST_SOURCES); | |
301 | printf("dprod_base:"); | |
302 | dump(dest_ref5, 25); | |
303 | printf("dprod_dut:"); | |
304 | dump(dest5, 25); | |
305 | return -1; | |
306 | } | |
307 | if (0 != memcmp(dest_ref6, dest6, TEST_LEN)) { | |
308 | printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test6\n"); | |
309 | dump_matrix(buffs, 5, TEST_SOURCES); | |
310 | printf("dprod_base:"); | |
311 | dump(dest_ref6, 25); | |
312 | printf("dprod_dut:"); | |
313 | dump(dest6, 25); | |
314 | return -1; | |
315 | } | |
316 | putchar('.'); | |
317 | ||
318 | // Rand data test | |
319 | ||
320 | for (rtest = 0; rtest < RANDOMS; rtest++) { | |
321 | for (i = 0; i < TEST_SOURCES; i++) | |
322 | for (j = 0; j < TEST_LEN; j++) | |
323 | buffs[i][j] = rand(); | |
324 | ||
325 | for (i = 0; i < TEST_SOURCES; i++) { | |
326 | g1[i] = rand(); | |
327 | g2[i] = rand(); | |
328 | g3[i] = rand(); | |
329 | g4[i] = rand(); | |
330 | g5[i] = rand(); | |
331 | g6[i] = rand(); | |
332 | } | |
333 | ||
334 | for (i = 0; i < TEST_SOURCES; i++) { | |
335 | gf_vect_mul_init(g1[i], &g_tbls[i * 32]); | |
336 | gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]); | |
337 | gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]); | |
338 | gf_vect_mul_init(g4[i], &g_tbls[(96 * TEST_SOURCES) + (i * 32)]); | |
339 | gf_vect_mul_init(g5[i], &g_tbls[(128 * TEST_SOURCES) + (i * 32)]); | |
340 | gf_vect_mul_init(g6[i], &g_tbls[(160 * TEST_SOURCES) + (i * 32)]); | |
341 | } | |
342 | ||
343 | gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); | |
344 | gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], | |
345 | buffs, dest_ref2); | |
346 | gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], | |
347 | buffs, dest_ref3); | |
348 | gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], | |
349 | buffs, dest_ref4); | |
350 | gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES], | |
351 | buffs, dest_ref5); | |
352 | gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[160 * TEST_SOURCES], | |
353 | buffs, dest_ref6); | |
354 | ||
355 | FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); | |
356 | ||
357 | if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { | |
358 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest); | |
359 | dump_matrix(buffs, 5, TEST_SOURCES); | |
360 | printf("dprod_base:"); | |
361 | dump(dest_ref1, 25); | |
362 | printf("dprod_dut:"); | |
363 | dump(dest1, 25); | |
364 | return -1; | |
365 | } | |
366 | if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { | |
367 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest); | |
368 | dump_matrix(buffs, 5, TEST_SOURCES); | |
369 | printf("dprod_base:"); | |
370 | dump(dest_ref2, 25); | |
371 | printf("dprod_dut:"); | |
372 | dump(dest2, 25); | |
373 | return -1; | |
374 | } | |
375 | if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { | |
376 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest); | |
377 | dump_matrix(buffs, 5, TEST_SOURCES); | |
378 | printf("dprod_base:"); | |
379 | dump(dest_ref3, 25); | |
380 | printf("dprod_dut:"); | |
381 | dump(dest3, 25); | |
382 | return -1; | |
383 | } | |
384 | if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) { | |
385 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test4 %d\n", rtest); | |
386 | dump_matrix(buffs, 5, TEST_SOURCES); | |
387 | printf("dprod_base:"); | |
388 | dump(dest_ref4, 25); | |
389 | printf("dprod_dut:"); | |
390 | dump(dest4, 25); | |
391 | return -1; | |
392 | } | |
393 | if (0 != memcmp(dest_ref5, dest5, TEST_LEN)) { | |
394 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test5 %d\n", rtest); | |
395 | dump_matrix(buffs, 5, TEST_SOURCES); | |
396 | printf("dprod_base:"); | |
397 | dump(dest_ref5, 25); | |
398 | printf("dprod_dut:"); | |
399 | dump(dest5, 25); | |
400 | return -1; | |
401 | } | |
402 | if (0 != memcmp(dest_ref6, dest6, TEST_LEN)) { | |
403 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test6 %d\n", rtest); | |
404 | dump_matrix(buffs, 5, TEST_SOURCES); | |
405 | printf("dprod_base:"); | |
406 | dump(dest_ref6, 25); | |
407 | printf("dprod_dut:"); | |
408 | dump(dest6, 25); | |
409 | return -1; | |
410 | } | |
411 | ||
412 | putchar('.'); | |
413 | } | |
414 | ||
415 | // Rand data test with varied parameters | |
416 | for (rtest = 0; rtest < RANDOMS; rtest++) { | |
417 | for (srcs = TEST_SOURCES; srcs > 0; srcs--) { | |
418 | for (i = 0; i < srcs; i++) | |
419 | for (j = 0; j < TEST_LEN; j++) | |
420 | buffs[i][j] = rand(); | |
421 | ||
422 | for (i = 0; i < srcs; i++) { | |
423 | g1[i] = rand(); | |
424 | g2[i] = rand(); | |
425 | g3[i] = rand(); | |
426 | g4[i] = rand(); | |
427 | g5[i] = rand(); | |
428 | g6[i] = rand(); | |
429 | } | |
430 | ||
431 | for (i = 0; i < srcs; i++) { | |
432 | gf_vect_mul_init(g1[i], &g_tbls[i * 32]); | |
433 | gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); | |
434 | gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]); | |
435 | gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]); | |
436 | gf_vect_mul_init(g5[i], &g_tbls[(128 * srcs) + (i * 32)]); | |
437 | gf_vect_mul_init(g6[i], &g_tbls[(160 * srcs) + (i * 32)]); | |
438 | } | |
439 | ||
440 | gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref1); | |
441 | gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[32 * srcs], buffs, | |
442 | dest_ref2); | |
443 | gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[64 * srcs], buffs, | |
444 | dest_ref3); | |
445 | gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[96 * srcs], buffs, | |
446 | dest_ref4); | |
447 | gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[128 * srcs], buffs, | |
448 | dest_ref5); | |
449 | gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[160 * srcs], buffs, | |
450 | dest_ref6); | |
451 | ||
452 | FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest_ptrs); | |
453 | ||
454 | if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { | |
455 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) | |
456 | " test1 srcs=%d\n", srcs); | |
457 | dump_matrix(buffs, 5, TEST_SOURCES); | |
458 | printf("dprod_base:"); | |
459 | dump(dest_ref1, 25); | |
460 | printf("dprod_dut:"); | |
461 | dump(dest1, 25); | |
462 | return -1; | |
463 | } | |
464 | if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { | |
465 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) | |
466 | " test2 srcs=%d\n", srcs); | |
467 | dump_matrix(buffs, 5, TEST_SOURCES); | |
468 | printf("dprod_base:"); | |
469 | dump(dest_ref2, 25); | |
470 | printf("dprod_dut:"); | |
471 | dump(dest2, 25); | |
472 | return -1; | |
473 | } | |
474 | if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { | |
475 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) | |
476 | " test3 srcs=%d\n", srcs); | |
477 | dump_matrix(buffs, 5, TEST_SOURCES); | |
478 | printf("dprod_base:"); | |
479 | dump(dest_ref3, 25); | |
480 | printf("dprod_dut:"); | |
481 | dump(dest3, 25); | |
482 | return -1; | |
483 | } | |
484 | if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) { | |
485 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) | |
486 | " test4 srcs=%d\n", srcs); | |
487 | dump_matrix(buffs, 5, TEST_SOURCES); | |
488 | printf("dprod_base:"); | |
489 | dump(dest_ref4, 25); | |
490 | printf("dprod_dut:"); | |
491 | dump(dest4, 25); | |
492 | return -1; | |
493 | } | |
494 | if (0 != memcmp(dest_ref5, dest5, TEST_LEN)) { | |
495 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) | |
496 | " test5 srcs=%d\n", srcs); | |
497 | dump_matrix(buffs, 5, TEST_SOURCES); | |
498 | printf("dprod_base:"); | |
499 | dump(dest_ref5, 25); | |
500 | printf("dprod_dut:"); | |
501 | dump(dest5, 25); | |
502 | return -1; | |
503 | } | |
504 | if (0 != memcmp(dest_ref6, dest6, TEST_LEN)) { | |
505 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) | |
506 | " test6 srcs=%d\n", srcs); | |
507 | dump_matrix(buffs, 5, TEST_SOURCES); | |
508 | printf("dprod_base:"); | |
509 | dump(dest_ref6, 25); | |
510 | printf("dprod_dut:"); | |
511 | dump(dest6, 25); | |
512 | return -1; | |
513 | } | |
514 | ||
515 | putchar('.'); | |
516 | } | |
517 | } | |
518 | ||
519 | // Run tests at end of buffer for Electric Fence | |
520 | align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16; | |
521 | for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) { | |
522 | for (i = 0; i < TEST_SOURCES; i++) | |
523 | for (j = 0; j < TEST_LEN; j++) | |
524 | buffs[i][j] = rand(); | |
525 | ||
526 | for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end | |
527 | efence_buffs[i] = buffs[i] + TEST_LEN - size; | |
528 | ||
529 | for (i = 0; i < TEST_SOURCES; i++) { | |
530 | g1[i] = rand(); | |
531 | g2[i] = rand(); | |
532 | g3[i] = rand(); | |
533 | g4[i] = rand(); | |
534 | g5[i] = rand(); | |
535 | g6[i] = rand(); | |
536 | } | |
537 | ||
538 | for (i = 0; i < TEST_SOURCES; i++) { | |
539 | gf_vect_mul_init(g1[i], &g_tbls[i * 32]); | |
540 | gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]); | |
541 | gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]); | |
542 | gf_vect_mul_init(g4[i], &g_tbls[(96 * TEST_SOURCES) + (i * 32)]); | |
543 | gf_vect_mul_init(g5[i], &g_tbls[(128 * TEST_SOURCES) + (i * 32)]); | |
544 | gf_vect_mul_init(g6[i], &g_tbls[(160 * TEST_SOURCES) + (i * 32)]); | |
545 | } | |
546 | ||
547 | gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref1); | |
548 | gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], | |
549 | efence_buffs, dest_ref2); | |
550 | gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], | |
551 | efence_buffs, dest_ref3); | |
552 | gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], | |
553 | efence_buffs, dest_ref4); | |
554 | gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES], | |
555 | efence_buffs, dest_ref5); | |
556 | gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[160 * TEST_SOURCES], | |
557 | efence_buffs, dest_ref6); | |
558 | ||
559 | FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest_ptrs); | |
560 | ||
561 | if (0 != memcmp(dest_ref1, dest1, size)) { | |
562 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest); | |
563 | dump_matrix(efence_buffs, 5, TEST_SOURCES); | |
564 | printf("dprod_base:"); | |
565 | dump(dest_ref1, align); | |
566 | printf("dprod_dut:"); | |
567 | dump(dest1, align); | |
568 | return -1; | |
569 | } | |
570 | ||
571 | if (0 != memcmp(dest_ref2, dest2, size)) { | |
572 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest); | |
573 | dump_matrix(efence_buffs, 5, TEST_SOURCES); | |
574 | printf("dprod_base:"); | |
575 | dump(dest_ref2, align); | |
576 | printf("dprod_dut:"); | |
577 | dump(dest2, align); | |
578 | return -1; | |
579 | } | |
580 | ||
581 | if (0 != memcmp(dest_ref3, dest3, size)) { | |
582 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest); | |
583 | dump_matrix(efence_buffs, 5, TEST_SOURCES); | |
584 | printf("dprod_base:"); | |
585 | dump(dest_ref3, align); | |
586 | printf("dprod_dut:"); | |
587 | dump(dest3, align); | |
588 | return -1; | |
589 | } | |
590 | ||
591 | if (0 != memcmp(dest_ref4, dest4, size)) { | |
592 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test4 %d\n", rtest); | |
593 | dump_matrix(efence_buffs, 5, TEST_SOURCES); | |
594 | printf("dprod_base:"); | |
595 | dump(dest_ref4, align); | |
596 | printf("dprod_dut:"); | |
597 | dump(dest4, align); | |
598 | return -1; | |
599 | } | |
600 | ||
601 | if (0 != memcmp(dest_ref5, dest5, size)) { | |
602 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test5 %d\n", rtest); | |
603 | dump_matrix(efence_buffs, 5, TEST_SOURCES); | |
604 | printf("dprod_base:"); | |
605 | dump(dest_ref5, align); | |
606 | printf("dprod_dut:"); | |
607 | dump(dest5, align); | |
608 | return -1; | |
609 | } | |
610 | ||
611 | if (0 != memcmp(dest_ref6, dest6, size)) { | |
612 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test6 %d\n", rtest); | |
613 | dump_matrix(efence_buffs, 5, TEST_SOURCES); | |
614 | printf("dprod_base:"); | |
615 | dump(dest_ref6, align); | |
616 | printf("dprod_dut:"); | |
617 | dump(dest6, align); | |
618 | return -1; | |
619 | } | |
620 | ||
621 | putchar('.'); | |
622 | } | |
623 | ||
624 | // Test rand ptr alignment if available | |
625 | ||
626 | for (rtest = 0; rtest < RANDOMS; rtest++) { | |
627 | size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1); | |
628 | srcs = rand() % TEST_SOURCES; | |
629 | if (srcs == 0) | |
630 | continue; | |
631 | ||
632 | offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B; | |
633 | // Add random offsets | |
634 | for (i = 0; i < srcs; i++) | |
635 | ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset)); | |
636 | ||
637 | udest_ptrs[0] = dest1 + (rand() & (PTR_ALIGN_CHK_B - offset)); | |
638 | udest_ptrs[1] = dest2 + (rand() & (PTR_ALIGN_CHK_B - offset)); | |
639 | udest_ptrs[2] = dest3 + (rand() & (PTR_ALIGN_CHK_B - offset)); | |
640 | udest_ptrs[3] = dest4 + (rand() & (PTR_ALIGN_CHK_B - offset)); | |
641 | udest_ptrs[4] = dest5 + (rand() & (PTR_ALIGN_CHK_B - offset)); | |
642 | udest_ptrs[5] = dest6 + (rand() & (PTR_ALIGN_CHK_B - offset)); | |
643 | ||
644 | memset(dest1, 0, TEST_LEN); // zero pad to check write-over | |
645 | memset(dest2, 0, TEST_LEN); | |
646 | memset(dest3, 0, TEST_LEN); | |
647 | memset(dest4, 0, TEST_LEN); | |
648 | memset(dest5, 0, TEST_LEN); | |
649 | memset(dest6, 0, TEST_LEN); | |
650 | ||
651 | for (i = 0; i < srcs; i++) | |
652 | for (j = 0; j < size; j++) | |
653 | ubuffs[i][j] = rand(); | |
654 | ||
655 | for (i = 0; i < srcs; i++) { | |
656 | g1[i] = rand(); | |
657 | g2[i] = rand(); | |
658 | g3[i] = rand(); | |
659 | g4[i] = rand(); | |
660 | g5[i] = rand(); | |
661 | g6[i] = rand(); | |
662 | } | |
663 | ||
664 | for (i = 0; i < srcs; i++) { | |
665 | gf_vect_mul_init(g1[i], &g_tbls[i * 32]); | |
666 | gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); | |
667 | gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]); | |
668 | gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]); | |
669 | gf_vect_mul_init(g5[i], &g_tbls[(128 * srcs) + (i * 32)]); | |
670 | gf_vect_mul_init(g6[i], &g_tbls[(160 * srcs) + (i * 32)]); | |
671 | } | |
672 | ||
673 | gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref1); | |
674 | gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], ubuffs, dest_ref2); | |
675 | gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], ubuffs, dest_ref3); | |
676 | gf_vect_dot_prod_base(size, srcs, &g_tbls[96 * srcs], ubuffs, dest_ref4); | |
677 | gf_vect_dot_prod_base(size, srcs, &g_tbls[128 * srcs], ubuffs, dest_ref5); | |
678 | gf_vect_dot_prod_base(size, srcs, &g_tbls[160 * srcs], ubuffs, dest_ref6); | |
679 | ||
680 | FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptrs); | |
681 | ||
682 | if (memcmp(dest_ref1, udest_ptrs[0], size)) { | |
683 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", | |
684 | srcs); | |
685 | dump_matrix(ubuffs, 5, TEST_SOURCES); | |
686 | printf("dprod_base:"); | |
687 | dump(dest_ref1, 25); | |
688 | printf("dprod_dut:"); | |
689 | dump(udest_ptrs[0], 25); | |
690 | return -1; | |
691 | } | |
692 | if (memcmp(dest_ref2, udest_ptrs[1], size)) { | |
693 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", | |
694 | srcs); | |
695 | dump_matrix(ubuffs, 5, TEST_SOURCES); | |
696 | printf("dprod_base:"); | |
697 | dump(dest_ref2, 25); | |
698 | printf("dprod_dut:"); | |
699 | dump(udest_ptrs[1], 25); | |
700 | return -1; | |
701 | } | |
702 | if (memcmp(dest_ref3, udest_ptrs[2], size)) { | |
703 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", | |
704 | srcs); | |
705 | dump_matrix(ubuffs, 5, TEST_SOURCES); | |
706 | printf("dprod_base:"); | |
707 | dump(dest_ref3, 25); | |
708 | printf("dprod_dut:"); | |
709 | dump(udest_ptrs[2], 25); | |
710 | return -1; | |
711 | } | |
712 | if (memcmp(dest_ref4, udest_ptrs[3], size)) { | |
713 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", | |
714 | srcs); | |
715 | dump_matrix(ubuffs, 5, TEST_SOURCES); | |
716 | printf("dprod_base:"); | |
717 | dump(dest_ref4, 25); | |
718 | printf("dprod_dut:"); | |
719 | dump(udest_ptrs[3], 25); | |
720 | return -1; | |
721 | } | |
722 | if (memcmp(dest_ref5, udest_ptrs[4], size)) { | |
723 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", | |
724 | srcs); | |
725 | dump_matrix(ubuffs, 5, TEST_SOURCES); | |
726 | printf("dprod_base:"); | |
727 | dump(dest_ref5, 25); | |
728 | printf("dprod_dut:"); | |
729 | dump(udest_ptrs[4], 25); | |
730 | return -1; | |
731 | } | |
732 | if (memcmp(dest_ref6, udest_ptrs[5], size)) { | |
733 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", | |
734 | srcs); | |
735 | dump_matrix(ubuffs, 5, TEST_SOURCES); | |
736 | printf("dprod_base:"); | |
737 | dump(dest_ref6, 25); | |
738 | printf("dprod_dut:"); | |
739 | dump(udest_ptrs[5], 25); | |
740 | return -1; | |
741 | } | |
742 | // Confirm that padding around dests is unchanged | |
743 | memset(dest_ref1, 0, PTR_ALIGN_CHK_B); // Make reference zero buff | |
744 | offset = udest_ptrs[0] - dest1; | |
745 | ||
746 | if (memcmp(dest1, dest_ref1, offset)) { | |
747 | printf("Fail rand ualign pad1 start\n"); | |
748 | return -1; | |
749 | } | |
750 | if (memcmp(dest1 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { | |
751 | printf("Fail rand ualign pad1 end\n"); | |
752 | return -1; | |
753 | } | |
754 | ||
755 | offset = udest_ptrs[1] - dest2; | |
756 | if (memcmp(dest2, dest_ref1, offset)) { | |
757 | printf("Fail rand ualign pad2 start\n"); | |
758 | return -1; | |
759 | } | |
760 | if (memcmp(dest2 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { | |
761 | printf("Fail rand ualign pad2 end\n"); | |
762 | return -1; | |
763 | } | |
764 | ||
765 | offset = udest_ptrs[2] - dest3; | |
766 | if (memcmp(dest3, dest_ref1, offset)) { | |
767 | printf("Fail rand ualign pad3 start\n"); | |
768 | return -1; | |
769 | } | |
770 | if (memcmp(dest3 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { | |
771 | printf("Fail rand ualign pad3 end\n"); | |
772 | return -1; | |
773 | } | |
774 | ||
775 | offset = udest_ptrs[3] - dest4; | |
776 | if (memcmp(dest4, dest_ref1, offset)) { | |
777 | printf("Fail rand ualign pad4 start\n"); | |
778 | return -1; | |
779 | } | |
780 | if (memcmp(dest4 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { | |
781 | printf("Fail rand ualign pad4 end\n"); | |
782 | return -1; | |
783 | } | |
784 | ||
785 | offset = udest_ptrs[4] - dest5; | |
786 | if (memcmp(dest5, dest_ref1, offset)) { | |
787 | printf("Fail rand ualign pad5 start\n"); | |
788 | return -1; | |
789 | } | |
790 | if (memcmp(dest5 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { | |
791 | printf("Fail rand ualign pad5 end\n"); | |
792 | return -1; | |
793 | } | |
794 | ||
795 | offset = udest_ptrs[5] - dest6; | |
796 | if (memcmp(dest6, dest_ref1, offset)) { | |
797 | printf("Fail rand ualign pad6 start\n"); | |
798 | return -1; | |
799 | } | |
800 | if (memcmp(dest6 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { | |
801 | printf("Fail rand ualign pad6 end\n"); | |
802 | return -1; | |
803 | } | |
804 | ||
805 | putchar('.'); | |
806 | } | |
807 | ||
808 | // Test all size alignment | |
809 | align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16; | |
810 | ||
811 | for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) { | |
812 | srcs = TEST_SOURCES; | |
813 | ||
814 | for (i = 0; i < srcs; i++) | |
815 | for (j = 0; j < size; j++) | |
816 | buffs[i][j] = rand(); | |
817 | ||
818 | for (i = 0; i < srcs; i++) { | |
819 | g1[i] = rand(); | |
820 | g2[i] = rand(); | |
821 | g3[i] = rand(); | |
822 | g4[i] = rand(); | |
823 | g5[i] = rand(); | |
824 | g6[i] = rand(); | |
825 | } | |
826 | ||
827 | for (i = 0; i < srcs; i++) { | |
828 | gf_vect_mul_init(g1[i], &g_tbls[i * 32]); | |
829 | gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); | |
830 | gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]); | |
831 | gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]); | |
832 | gf_vect_mul_init(g5[i], &g_tbls[(128 * srcs) + (i * 32)]); | |
833 | gf_vect_mul_init(g6[i], &g_tbls[(160 * srcs) + (i * 32)]); | |
834 | } | |
835 | ||
836 | gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref1); | |
837 | gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], buffs, dest_ref2); | |
838 | gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], buffs, dest_ref3); | |
839 | gf_vect_dot_prod_base(size, srcs, &g_tbls[96 * srcs], buffs, dest_ref4); | |
840 | gf_vect_dot_prod_base(size, srcs, &g_tbls[128 * srcs], buffs, dest_ref5); | |
841 | gf_vect_dot_prod_base(size, srcs, &g_tbls[160 * srcs], buffs, dest_ref6); | |
842 | ||
843 | FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest_ptrs); | |
844 | ||
845 | if (memcmp(dest_ref1, dest_ptrs[0], size)) { | |
846 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", | |
847 | size); | |
848 | dump_matrix(buffs, 5, TEST_SOURCES); | |
849 | printf("dprod_base:"); | |
850 | dump(dest_ref1, 25); | |
851 | printf("dprod_dut:"); | |
852 | dump(dest_ptrs[0], 25); | |
853 | return -1; | |
854 | } | |
855 | if (memcmp(dest_ref2, dest_ptrs[1], size)) { | |
856 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", | |
857 | size); | |
858 | dump_matrix(buffs, 5, TEST_SOURCES); | |
859 | printf("dprod_base:"); | |
860 | dump(dest_ref2, 25); | |
861 | printf("dprod_dut:"); | |
862 | dump(dest_ptrs[1], 25); | |
863 | return -1; | |
864 | } | |
865 | if (memcmp(dest_ref3, dest_ptrs[2], size)) { | |
866 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", | |
867 | size); | |
868 | dump_matrix(buffs, 5, TEST_SOURCES); | |
869 | printf("dprod_base:"); | |
870 | dump(dest_ref3, 25); | |
871 | printf("dprod_dut:"); | |
872 | dump(dest_ptrs[2], 25); | |
873 | return -1; | |
874 | } | |
875 | if (memcmp(dest_ref4, dest_ptrs[3], size)) { | |
876 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", | |
877 | size); | |
878 | dump_matrix(buffs, 5, TEST_SOURCES); | |
879 | printf("dprod_base:"); | |
880 | dump(dest_ref4, 25); | |
881 | printf("dprod_dut:"); | |
882 | dump(dest_ptrs[3], 25); | |
883 | return -1; | |
884 | } | |
885 | if (memcmp(dest_ref5, dest_ptrs[4], size)) { | |
886 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", | |
887 | size); | |
888 | dump_matrix(buffs, 5, TEST_SOURCES); | |
889 | printf("dprod_base:"); | |
890 | dump(dest_ref5, 25); | |
891 | printf("dprod_dut:"); | |
892 | dump(dest_ptrs[4], 25); | |
893 | return -1; | |
894 | } | |
895 | if (memcmp(dest_ref6, dest_ptrs[5], size)) { | |
896 | printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", | |
897 | size); | |
898 | dump_matrix(buffs, 5, TEST_SOURCES); | |
899 | printf("dprod_base:"); | |
900 | dump(dest_ref6, 25); | |
901 | printf("dprod_dut:"); | |
902 | dump(dest_ptrs[5], 25); | |
903 | return -1; | |
904 | } | |
905 | } | |
906 | ||
907 | printf("Pass\n"); | |
908 | return 0; | |
909 | ||
910 | } |
110 | 110 | %define return rax |
111 | 111 | %define return.w eax |
112 | 112 | |
113 | %define func(x) x: | |
113 | %define func(x) x: endbranch | |
114 | 114 | %macro FUNC_SAVE 0 |
115 | 115 | push r12 |
116 | 116 | push r13 |
183 | 183 | |
184 | 184 | |
185 | 185 | align 16 |
186 | global gf_6vect_mad_avx:ISAL_SYM_TYPE_FUNCTION | |
186 | mk_global gf_6vect_mad_avx, function | |
187 | 187 | func(gf_6vect_mad_avx) |
188 | 188 | FUNC_SAVE |
189 | 189 | sub len, 16 |
106 | 106 | %define return rax |
107 | 107 | %define return.w eax |
108 | 108 | |
109 | %define func(x) x: | |
109 | %define func(x) x: endbranch | |
110 | 110 | %macro FUNC_SAVE 0 |
111 | 111 | push r12 |
112 | 112 | %endmacro |
176 | 176 | %define xd6 xd1 |
177 | 177 | |
178 | 178 | align 16 |
179 | global gf_6vect_mad_avx2:ISAL_SYM_TYPE_FUNCTION | |
179 | mk_global gf_6vect_mad_avx2, function | |
180 | 180 | func(gf_6vect_mad_avx2) |
181 | 181 | FUNC_SAVE |
182 | 182 | sub len, 32 |
45 | 45 | %define tmp2 r10 |
46 | 46 | %define tmp3 r12 ;must be saved and restored |
47 | 47 | %define return rax |
48 | %define func(x) x: | |
48 | %define func(x) x: endbranch | |
49 | 49 | %macro FUNC_SAVE 0 |
50 | 50 | push r12 |
51 | 51 | %endmacro |
180 | 180 | %define xtmph6 zmm31 |
181 | 181 | |
182 | 182 | align 16 |
183 | global gf_6vect_mad_avx512:ISAL_SYM_TYPE_FUNCTION | |
183 | mk_global gf_6vect_mad_avx512, function | |
184 | 184 | func(gf_6vect_mad_avx512) |
185 | 185 | FUNC_SAVE |
186 | 186 | sub len, 64 |
112 | 112 | %define return rax |
113 | 113 | %define return.w eax |
114 | 114 | |
115 | %define func(x) x: | |
115 | %define func(x) x: endbranch | |
116 | 116 | %macro FUNC_SAVE 0 |
117 | 117 | push r12 |
118 | 118 | push r13 |
184 | 184 | |
185 | 185 | |
186 | 186 | align 16 |
187 | global gf_6vect_mad_sse:ISAL_SYM_TYPE_FUNCTION | |
187 | mk_global gf_6vect_mad_sse, function | |
188 | 188 | func(gf_6vect_mad_sse) |
189 | 189 | FUNC_SAVE |
190 | 190 | sub len, 16 |
47 | 47 | %endmacro |
48 | 48 | %define SSTR SLDR |
49 | 49 | %define PS 8 |
50 | %define func(x) x: | |
50 | %define func(x) x: endbranch | |
51 | 51 | %define FUNC_SAVE |
52 | 52 | %define FUNC_RESTORE |
53 | 53 | %endif |
105 | 105 | |
106 | 106 | %define PS 4 |
107 | 107 | %define LOG_PS 2 |
108 | %define func(x) x: | |
108 | %define func(x) x: endbranch | |
109 | 109 | %define arg(x) [ebp + PS*2 + PS*x] |
110 | 110 | |
111 | 111 | %define trans ecx ;trans is for the variables in stack |
193 | 193 | %define xp xmm2 |
194 | 194 | |
195 | 195 | align 16 |
196 | global gf_vect_dot_prod_avx:ISAL_SYM_TYPE_FUNCTION | |
196 | mk_global gf_vect_dot_prod_avx, function | |
197 | 197 | func(gf_vect_dot_prod_avx) |
198 | 198 | FUNC_SAVE |
199 | 199 | SLDR len, len_m |
50 | 50 | %endmacro |
51 | 51 | %define SSTR SLDR |
52 | 52 | %define PS 8 |
53 | %define func(x) x: | |
53 | %define func(x) x: endbranch | |
54 | 54 | %define FUNC_SAVE |
55 | 55 | %define FUNC_RESTORE |
56 | 56 | %endif |
110 | 110 | |
111 | 111 | %define PS 4 |
112 | 112 | %define LOG_PS 2 |
113 | %define func(x) x: | |
113 | %define func(x) x: endbranch | |
114 | 114 | %define arg(x) [ebp + PS*2 + PS*x] |
115 | 115 | |
116 | 116 | %define trans ecx ;trans is for the variables in stack |
201 | 201 | %define xp ymm2 |
202 | 202 | |
203 | 203 | align 16 |
204 | global gf_vect_dot_prod_avx2:ISAL_SYM_TYPE_FUNCTION | |
204 | mk_global gf_vect_dot_prod_avx2, function | |
205 | 205 | func(gf_vect_dot_prod_avx2) |
206 | 206 | FUNC_SAVE |
207 | 207 | SLDR len, len_m |
48 | 48 | %define PS 8 |
49 | 49 | %define LOG_PS 3 |
50 | 50 | |
51 | %define func(x) x: | |
51 | %define func(x) x: endbranch | |
52 | 52 | %define FUNC_SAVE |
53 | 53 | %define FUNC_RESTORE |
54 | 54 | %endif |
72 | 72 | %define func(x) proc_frame x |
73 | 73 | %macro FUNC_SAVE 0 |
74 | 74 | alloc_stack stack_size |
75 | save_reg r12, 9*16 + 0*8 | |
76 | save_reg r15, 9*16 + 3*8 | |
75 | save_reg r12, 0*8 | |
76 | save_reg r15, 1*8 | |
77 | 77 | end_prolog |
78 | 78 | mov arg4, arg(4) |
79 | 79 | %endmacro |
80 | 80 | |
81 | 81 | %macro FUNC_RESTORE 0 |
82 | mov r12, [rsp + 9*16 + 0*8] | |
83 | mov r15, [rsp + 9*16 + 3*8] | |
82 | mov r12, [rsp + 0*8] | |
83 | mov r15, [rsp + 1*8] | |
84 | 84 | add rsp, stack_size |
85 | 85 | %endmacro |
86 | 86 | %endif |
127 | 127 | section .text |
128 | 128 | |
129 | 129 | align 16 |
130 | global gf_vect_dot_prod_avx512:ISAL_SYM_TYPE_FUNCTION | |
130 | mk_global gf_vect_dot_prod_avx512, function | |
131 | 131 | func(gf_vect_dot_prod_avx512) |
132 | 132 | FUNC_SAVE |
133 | 133 | xor pos, pos |
47 | 47 | %endmacro |
48 | 48 | %define SSTR SLDR |
49 | 49 | %define PS 8 |
50 | %define func(x) x: | |
50 | %define func(x) x: endbranch | |
51 | 51 | %define FUNC_SAVE |
52 | 52 | %define FUNC_RESTORE |
53 | 53 | %endif |
105 | 105 | |
106 | 106 | %define PS 4 |
107 | 107 | %define LOG_PS 2 |
108 | %define func(x) x: | |
108 | %define func(x) x: endbranch | |
109 | 109 | %define arg(x) [ebp + PS*2 + PS*x] |
110 | 110 | |
111 | 111 | %define trans ecx ;trans is for the variables in stack |
193 | 193 | %define xp xmm2 |
194 | 194 | |
195 | 195 | align 16 |
196 | global gf_vect_dot_prod_sse:ISAL_SYM_TYPE_FUNCTION | |
196 | mk_global gf_vect_dot_prod_sse, function | |
197 | 197 | func(gf_vect_dot_prod_sse) |
198 | 198 | FUNC_SAVE |
199 | 199 | SLDR len, len_m |
81 | 81 | %define return rax |
82 | 82 | %define return.w eax |
83 | 83 | |
84 | %define func(x) x: | |
84 | %define func(x) x: endbranch | |
85 | 85 | %define FUNC_SAVE |
86 | 86 | %define FUNC_RESTORE |
87 | 87 | %endif |
130 | 130 | %define xtmpd xmm5 |
131 | 131 | |
132 | 132 | align 16 |
133 | global gf_vect_mad_avx:ISAL_SYM_TYPE_FUNCTION | |
133 | mk_global gf_vect_mad_avx, function | |
134 | 134 | func(gf_vect_mad_avx) |
135 | 135 | FUNC_SAVE |
136 | 136 | sub len, 16 |
87 | 87 | %define return rax |
88 | 88 | %define return.w eax |
89 | 89 | |
90 | %define func(x) x: | |
90 | %define func(x) x: endbranch | |
91 | 91 | %define FUNC_SAVE |
92 | 92 | %define FUNC_RESTORE |
93 | 93 | %endif |
138 | 138 | %define xtmpd ymm5 |
139 | 139 | |
140 | 140 | align 16 |
141 | global gf_vect_mad_avx2:ISAL_SYM_TYPE_FUNCTION | |
141 | mk_global gf_vect_mad_avx2, function | |
142 | 142 | func(gf_vect_mad_avx2) |
143 | 143 | FUNC_SAVE |
144 | 144 | sub len, 32 |
43 | 43 | %define arg5 r9 |
44 | 44 | %define tmp r11 |
45 | 45 | %define return rax |
46 | %define func(x) x: | |
46 | %define func(x) x: endbranch | |
47 | 47 | %define FUNC_SAVE |
48 | 48 | %define FUNC_RESTORE |
49 | 49 | %endif |
126 | 126 | %define xmask0f zmm8 |
127 | 127 | |
128 | 128 | align 16 |
129 | global gf_vect_mad_avx512:ISAL_SYM_TYPE_FUNCTION | |
129 | mk_global gf_vect_mad_avx512, function | |
130 | 130 | func(gf_vect_mad_avx512) |
131 | 131 | FUNC_SAVE |
132 | 132 | sub len, 64 |
81 | 81 | %define return rax |
82 | 82 | %define return.w eax |
83 | 83 | |
84 | %define func(x) x: | |
84 | %define func(x) x: endbranch | |
85 | 85 | %define FUNC_SAVE |
86 | 86 | %define FUNC_RESTORE |
87 | 87 | %endif |
130 | 130 | |
131 | 131 | |
132 | 132 | align 16 |
133 | global gf_vect_mad_sse:ISAL_SYM_TYPE_FUNCTION | |
133 | mk_global gf_vect_mad_sse, function | |
134 | 134 | func(gf_vect_mad_sse) |
135 | 135 | FUNC_SAVE |
136 | 136 | sub len, 16 |
41 | 41 | %define arg5 r9 |
42 | 42 | %define tmp r11 |
43 | 43 | %define return rax |
44 | %define func(x) x: | |
44 | %define func(x) x: endbranch | |
45 | 45 | %define FUNC_SAVE |
46 | 46 | %define FUNC_RESTORE |
47 | 47 | |
55 | 55 | %define func(x) proc_frame x |
56 | 56 | %macro FUNC_SAVE 0 |
57 | 57 | alloc_stack stack_size |
58 | save_xmm128 xmm6, 0*16 | |
59 | save_xmm128 xmm7, 1*16 | |
60 | save_xmm128 xmm13, 2*16 | |
61 | save_xmm128 xmm14, 3*16 | |
62 | save_xmm128 xmm15, 4*16 | |
58 | vmovdqa [rsp + 0*16], xmm6 | |
59 | vmovdqa [rsp + 1*16], xmm7 | |
60 | vmovdqa [rsp + 2*16], xmm13 | |
61 | vmovdqa [rsp + 3*16], xmm14 | |
62 | vmovdqa [rsp + 4*16], xmm15 | |
63 | 63 | end_prolog |
64 | 64 | %endmacro |
65 | 65 | |
110 | 110 | %define xtmp2c xmm7 |
111 | 111 | |
112 | 112 | align 16 |
113 | global gf_vect_mul_avx:ISAL_SYM_TYPE_FUNCTION | |
113 | mk_global gf_vect_mul_avx, function | |
114 | 114 | func(gf_vect_mul_avx) |
115 | 115 | FUNC_SAVE |
116 | 116 | mov pos, 0 |
41 | 41 | %define arg5 r9 |
42 | 42 | %define tmp r11 |
43 | 43 | %define return rax |
44 | %define func(x) x: | |
44 | %define func(x) x: endbranch | |
45 | 45 | %define FUNC_SAVE |
46 | 46 | %define FUNC_RESTORE |
47 | 47 | |
111 | 111 | |
112 | 112 | |
113 | 113 | align 16 |
114 | global gf_vect_mul_sse:ISAL_SYM_TYPE_FUNCTION | |
114 | mk_global gf_vect_mul_sse, function | |
115 | 115 | func(gf_vect_mul_sse) |
116 | 116 | FUNC_SAVE |
117 | 117 | mov pos, 0 |
54 | 54 | %define b_d r8d |
55 | 55 | %define end r13 |
56 | 56 | |
57 | %define func(x) x: | |
57 | %define func(x) x: endbranch | |
58 | 58 | %macro FUNC_SAVE 0 |
59 | 59 | push r12 |
60 | 60 | push r13 |
122 | 122 | %define yshuf0 ymm6 |
123 | 123 | %define yshuf1 ymm7 |
124 | 124 | |
125 | ||
126 | global adler32_avx2_4:ISAL_SYM_TYPE_FUNCTION | |
125 | [bits 64] | |
126 | default rel | |
127 | section .text | |
128 | ||
129 | mk_global adler32_avx2_4, function | |
127 | 130 | func(adler32_avx2_4) |
128 | 131 | FUNC_SAVE |
129 | 132 |
51 | 51 | %define b_d r8d |
52 | 52 | %define end r13 |
53 | 53 | |
54 | %define func(x) x: | |
54 | %define func(x) x: endbranch | |
55 | 55 | %macro FUNC_SAVE 0 |
56 | 56 | push r12 |
57 | 57 | push r13 |
103 | 103 | %define xdata1 xmm3 |
104 | 104 | %define xsa xmm4 |
105 | 105 | |
106 | global adler32_sse:ISAL_SYM_TYPE_FUNCTION | |
106 | [bits 64] | |
107 | default rel | |
108 | section .text | |
109 | ||
110 | mk_global adler32_sse, function | |
107 | 111 | func(adler32_sse) |
108 | 112 | FUNC_SAVE |
109 | 113 |
171 | 171 | |
172 | 172 | %endmacro |
173 | 173 | |
174 | default rel | |
175 | section .text | |
176 | ||
174 | 177 | global encode_deflate_icf_ %+ ARCH |
175 | 178 | encode_deflate_icf_ %+ ARCH: |
179 | endbranch | |
176 | 180 | FUNC_SAVE |
177 | 181 | |
178 | 182 | %ifnidn ptr, arg1 |
184 | 184 | |
185 | 185 | %endmacro |
186 | 186 | |
187 | default rel | |
188 | section .text | |
189 | ||
187 | 190 | global encode_deflate_icf_ %+ ARCH |
188 | 191 | encode_deflate_icf_ %+ ARCH: |
192 | endbranch | |
189 | 193 | FUNC_SAVE |
190 | 194 | |
191 | 195 | %ifnidn ptr, arg1 |
63 | 63 | #include <stdlib.h> |
64 | 64 | #include "igzip_lib.h" |
65 | 65 | |
66 | #include "huff_codes.h" | |
67 | #include "huffman.h" | |
68 | ||
66 | 69 | /*These max code lengths are limited by how the data is stored in |
67 | 70 | * hufftables.asm. The deflate standard max is 15.*/ |
68 | 71 | |
232 | 235 | fprintf(output_file, "const uint32_t zlib_trl_bytes = %d;\n", ZLIB_TRAILER_SIZE); |
233 | 236 | } |
234 | 237 | |
238 | static uint32_t convert_dist_to_dist_sym(uint32_t dist) | |
239 | { | |
240 | assert(dist <= 32768 && dist > 0); | |
241 | if (dist <= 32768) { | |
242 | uint32_t msb = dist > 4 ? bsr(dist - 1) - 2 : 0; | |
243 | return (msb * 2) + ((dist - 1) >> msb); | |
244 | } else { | |
245 | return ~0; | |
246 | } | |
247 | } | |
248 | ||
249 | /** | |
250 | * @brief Returns the deflate symbol value for a repeat length. | |
251 | */ | |
252 | static uint32_t convert_length_to_len_sym(uint32_t length) | |
253 | { | |
254 | assert(length > 2 && length < 259); | |
255 | ||
256 | /* Based on tables on page 11 in RFC 1951 */ | |
257 | if (length < 11) | |
258 | return 257 + length - 3; | |
259 | else if (length < 19) | |
260 | return 261 + (length - 3) / 2; | |
261 | else if (length < 35) | |
262 | return 265 + (length - 3) / 4; | |
263 | else if (length < 67) | |
264 | return 269 + (length - 3) / 8; | |
265 | else if (length < 131) | |
266 | return 273 + (length - 3) / 16; | |
267 | else if (length < 258) | |
268 | return 277 + (length - 3) / 32; | |
269 | else | |
270 | return 285; | |
271 | } | |
272 | ||
273 | void isal_update_histogram_dict(uint8_t * start_stream, int dict_length, int length, | |
274 | struct isal_huff_histogram *histogram) | |
275 | { | |
276 | uint32_t literal = 0, hash; | |
277 | uint16_t seen, *last_seen = histogram->hash_table; | |
278 | uint8_t *current, *end_stream, *next_hash, *end, *end_dict; | |
279 | uint32_t match_length; | |
280 | uint32_t dist; | |
281 | uint64_t *lit_len_histogram = histogram->lit_len_histogram; | |
282 | uint64_t *dist_histogram = histogram->dist_histogram; | |
283 | ||
284 | if (length <= 0) | |
285 | return; | |
286 | ||
287 | end_stream = start_stream + dict_length + length; | |
288 | end_dict = start_stream + dict_length; | |
289 | ||
290 | memset(last_seen, 0, sizeof(histogram->hash_table)); /* Initialize last_seen to be 0. */ | |
291 | ||
292 | for (current = start_stream; current < end_dict - 4; current++) { | |
293 | literal = load_u32(current); | |
294 | hash = compute_hash(literal) & LVL0_HASH_MASK; | |
295 | last_seen[hash] = (current - start_stream) & 0xFFFF; | |
296 | } | |
297 | ||
298 | for (current = start_stream + dict_length; current < end_stream - 3; current++) { | |
299 | literal = load_u32(current); | |
300 | hash = compute_hash(literal) & LVL0_HASH_MASK; | |
301 | seen = last_seen[hash]; | |
302 | last_seen[hash] = (current - start_stream) & 0xFFFF; | |
303 | dist = (current - start_stream - seen) & 0xFFFF; | |
304 | if (dist - 1 < D - 1) { | |
305 | assert(start_stream <= current - dist); | |
306 | match_length = | |
307 | compare258(current - dist, current, end_stream - current); | |
308 | if (match_length >= SHORTEST_MATCH) { | |
309 | next_hash = current; | |
310 | #ifdef ISAL_LIMIT_HASH_UPDATE | |
311 | end = next_hash + 3; | |
312 | #else | |
313 | end = next_hash + match_length; | |
314 | #endif | |
315 | if (end > end_stream - 3) | |
316 | end = end_stream - 3; | |
317 | next_hash++; | |
318 | for (; next_hash < end; next_hash++) { | |
319 | literal = load_u32(next_hash); | |
320 | hash = compute_hash(literal) & LVL0_HASH_MASK; | |
321 | last_seen[hash] = (next_hash - start_stream) & 0xFFFF; | |
322 | } | |
323 | ||
324 | dist_histogram[convert_dist_to_dist_sym(dist)] += 1; | |
325 | lit_len_histogram[convert_length_to_len_sym(match_length)] += | |
326 | 1; | |
327 | current += match_length - 1; | |
328 | continue; | |
329 | } | |
330 | } | |
331 | lit_len_histogram[literal & 0xFF] += 1; | |
332 | } | |
333 | ||
334 | for (; current < end_stream; current++) | |
335 | lit_len_histogram[*current] += 1; | |
336 | ||
337 | lit_len_histogram[256] += 1; | |
338 | return; | |
339 | } | |
340 | ||
235 | 341 | int main(int argc, char *argv[]) |
236 | 342 | { |
237 | 343 | long int file_length; |
344 | int argi = 1; | |
238 | 345 | uint8_t *stream = NULL; |
239 | 346 | struct isal_hufftables hufftables; |
240 | 347 | struct isal_huff_histogram histogram; |
241 | 348 | struct isal_zstream tmp_stream; |
242 | FILE *file; | |
349 | FILE *file = NULL; | |
350 | FILE *dict_file = NULL; | |
351 | FILE *hist_file = NULL; | |
352 | long int dict_file_length = 0; | |
353 | long int hist_file_length = 0; | |
354 | uint8_t *dict_stream = NULL; | |
243 | 355 | |
244 | 356 | if (argc == 1) { |
245 | 357 | printf("Error, no input file.\n"); |
246 | 358 | return 1; |
247 | 359 | } |
248 | 360 | |
249 | memset(&histogram, 0, sizeof(histogram)); /* Initialize histograms. */ | |
250 | ||
251 | while (argc > 1) { | |
252 | printf("Processing %s\n", argv[argc - 1]); | |
253 | file = fopen(argv[argc - 1], "r"); | |
361 | if (argc > 3 && argv[1][0] == '-' && argv[1][1] == 'd') { | |
362 | dict_file = fopen(argv[2], "r"); | |
363 | ||
364 | fseek(dict_file, 0, SEEK_END); | |
365 | dict_file_length = ftell(dict_file); | |
366 | fseek(dict_file, 0, SEEK_SET); | |
367 | dict_file_length -= ftell(dict_file); | |
368 | dict_stream = malloc(dict_file_length); | |
369 | if (dict_stream == NULL) { | |
370 | printf("Failed to allocate memory to read in dictionary file\n"); | |
371 | fclose(dict_file); | |
372 | return 1; | |
373 | } | |
374 | if (fread(dict_stream, 1, dict_file_length, dict_file) != dict_file_length) { | |
375 | printf("Error occurred when reading dictionary file"); | |
376 | fclose(dict_file); | |
377 | free(dict_stream); | |
378 | return 1; | |
379 | } | |
380 | isal_update_histogram(dict_stream, dict_file_length, &histogram); | |
381 | ||
382 | printf("Read %ld bytes of dictionary file %s\n", dict_file_length, argv[2]); | |
383 | argi += 2; | |
384 | fclose(dict_file); | |
385 | free(dict_stream); | |
386 | } | |
387 | ||
388 | if ((argc > argi + 1) && argv[argi][0] == '-' && argv[argi][1] == 'h') { | |
389 | hist_file = fopen(argv[argi + 1], "r+"); | |
390 | fseek(hist_file, 0, SEEK_END); | |
391 | hist_file_length = ftell(hist_file); | |
392 | fseek(hist_file, 0, SEEK_SET); | |
393 | hist_file_length -= ftell(hist_file); | |
394 | if (hist_file_length > sizeof(histogram)) { | |
395 | printf("Histogram file too long\n"); | |
396 | return 1; | |
397 | } | |
398 | if (fread(&histogram, 1, hist_file_length, hist_file) != hist_file_length) { | |
399 | printf("Error occurred when reading history file"); | |
400 | fclose(hist_file); | |
401 | return 1; | |
402 | } | |
403 | fseek(hist_file, 0, SEEK_SET); | |
404 | ||
405 | printf("Read %ld bytes of history file %s\n", hist_file_length, | |
406 | argv[argi + 1]); | |
407 | argi += 2; | |
408 | } else | |
409 | memset(&histogram, 0, sizeof(histogram)); /* Initialize histograms. */ | |
410 | ||
411 | while (argi < argc) { | |
412 | printf("Processing %s\n", argv[argi]); | |
413 | file = fopen(argv[argi], "r"); | |
254 | 414 | if (file == NULL) { |
255 | 415 | printf("Error opening file\n"); |
256 | 416 | return 1; |
259 | 419 | file_length = ftell(file); |
260 | 420 | fseek(file, 0, SEEK_SET); |
261 | 421 | file_length -= ftell(file); |
262 | stream = malloc(file_length); | |
422 | stream = malloc(file_length + dict_file_length); | |
263 | 423 | if (stream == NULL) { |
264 | 424 | printf("Failed to allocate memory to read in file\n"); |
265 | 425 | fclose(file); |
266 | 426 | return 1; |
267 | 427 | } |
268 | if (fread(stream, 1, file_length, file) != file_length) { | |
428 | if (dict_file_length > 0) | |
429 | memcpy(stream, dict_stream, dict_file_length); | |
430 | ||
431 | if (fread(&stream[dict_file_length], 1, file_length, file) != file_length) { | |
269 | 432 | printf("Error occurred when reading file"); |
270 | 433 | fclose(file); |
271 | 434 | free(stream); |
274 | 437 | |
275 | 438 | /* Create a histogram of frequency of symbols found in stream to |
276 | 439 | * generate the huffman tree.*/ |
277 | isal_update_histogram(stream, file_length, &histogram); | |
440 | if (0 == dict_file_length) | |
441 | isal_update_histogram(stream, file_length, &histogram); | |
442 | else | |
443 | isal_update_histogram_dict(stream, dict_file_length, file_length, | |
444 | &histogram); | |
278 | 445 | |
279 | 446 | fclose(file); |
280 | 447 | free(stream); |
281 | argc--; | |
448 | argi++; | |
282 | 449 | } |
283 | 450 | |
284 | 451 | isal_create_hufftables(&hufftables, &histogram); |
303 | 470 | |
304 | 471 | fclose(file); |
305 | 472 | |
473 | if (hist_file) { | |
474 | int len = fwrite(&histogram, 1, sizeof(histogram), hist_file); | |
475 | printf("wrote %d bytes of histogram file\n", len); | |
476 | fclose(hist_file); | |
477 | } | |
306 | 478 | return 0; |
307 | 479 | } |
34 | 34 | #include "igzip_lib.h" |
35 | 35 | |
36 | 36 | #define STATIC_INFLATE_FILE "static_inflate.h" |
37 | #define DOUBLE_SYM_THRESH (4 * 1024) | |
38 | ||
39 | extern struct isal_hufftables hufftables_default; | |
37 | 40 | |
38 | 41 | /** |
39 | 42 | * @brief Prints a table of uint16_t elements to a file. |
115 | 118 | struct inflate_state state; |
116 | 119 | FILE *file; |
117 | 120 | uint8_t static_deflate_hdr = 3; |
118 | uint8_t tmp_space[8]; | |
121 | uint8_t tmp_space[8], *in_buf; | |
122 | ||
123 | if (NULL == (in_buf = malloc(DOUBLE_SYM_THRESH + 1))) { | |
124 | printf("Can not allocote memory\n"); | |
125 | return 1; | |
126 | } | |
119 | 127 | |
120 | 128 | isal_inflate_init(&state); |
121 | 129 | |
122 | state.next_in = &static_deflate_hdr; | |
123 | state.avail_in = sizeof(static_deflate_hdr); | |
130 | memcpy(in_buf, &static_deflate_hdr, sizeof(static_deflate_hdr)); | |
131 | state.next_in = in_buf; | |
132 | state.avail_in = DOUBLE_SYM_THRESH + 1; | |
124 | 133 | state.next_out = tmp_space; |
125 | 134 | state.avail_out = sizeof(tmp_space); |
126 | 135 | |
132 | 141 | printf("Error creating file hufftables_c.c\n"); |
133 | 142 | return 1; |
134 | 143 | } |
144 | // Add decode tables describing a type 2 static (fixed) header | |
135 | 145 | |
136 | 146 | fprintf(file, "#ifndef STATIC_HEADER_H\n" "#define STATIC_HEADER_H\n\n"); |
137 | 147 | |
156 | 166 | fprintf(file, "};\n\n"); |
157 | 167 | |
158 | 168 | fprintf(file, "#endif\n"); |
169 | ||
170 | // Add other tables for known dynamic headers - level 0 | |
171 | ||
172 | isal_inflate_init(&state); | |
173 | ||
174 | memcpy(in_buf, &hufftables_default.deflate_hdr, | |
175 | sizeof(hufftables_default.deflate_hdr)); | |
176 | state.next_in = in_buf; | |
177 | state.avail_in = DOUBLE_SYM_THRESH + 1; | |
178 | state.next_out = tmp_space; | |
179 | state.avail_out = sizeof(tmp_space); | |
180 | ||
181 | isal_inflate(&state); | |
182 | ||
183 | fprintf(file, "struct inflate_huff_code_large pregen_lit_huff_code = {\n"); | |
184 | fprint_uint32_table(file, state.lit_huff_code.short_code_lookup, | |
185 | sizeof(state.lit_huff_code.short_code_lookup) / sizeof(uint32_t), | |
186 | "\t.short_code_lookup = {", "\t},\n\n", "\t\t"); | |
187 | fprint_uint16_table(file, state.lit_huff_code.long_code_lookup, | |
188 | sizeof(state.lit_huff_code.long_code_lookup) / sizeof(uint16_t), | |
189 | "\t.long_code_lookup = {", "\t}\n", "\t\t"); | |
190 | fprintf(file, "};\n\n"); | |
191 | ||
192 | fprintf(file, "struct inflate_huff_code_small pregen_dist_huff_code = {\n"); | |
193 | fprint_uint16_table(file, state.dist_huff_code.short_code_lookup, | |
194 | sizeof(state.dist_huff_code.short_code_lookup) / sizeof(uint16_t), | |
195 | "\t.short_code_lookup = {", "\t},\n\n", "\t\t"); | |
196 | fprint_uint16_table(file, state.dist_huff_code.long_code_lookup, | |
197 | sizeof(state.dist_huff_code.long_code_lookup) / sizeof(uint16_t), | |
198 | "\t.long_code_lookup = {", "\t}\n", "\t\t"); | |
199 | fprintf(file, "};\n\n"); | |
200 | ||
159 | 201 | fclose(file); |
160 | ||
202 | free(in_buf); | |
161 | 203 | return 0; |
162 | 204 | } |
1240 | 1240 | stream->internal_state.has_hist = IGZIP_HIST; |
1241 | 1241 | } |
1242 | 1242 | |
1243 | int isal_deflate_process_dict(struct isal_zstream *stream, struct isal_dict *dict, | |
1244 | uint8_t * dict_data, uint32_t dict_len) | |
1245 | { | |
1246 | if ((dict == NULL) | |
1247 | || (dict_len == 0) | |
1248 | || (dict->level > ISAL_DEF_MAX_LEVEL)) | |
1249 | return ISAL_INVALID_STATE; | |
1250 | ||
1251 | if (dict_len > IGZIP_HIST_SIZE) { | |
1252 | dict_data = dict_data + dict_len - IGZIP_HIST_SIZE; | |
1253 | dict_len = IGZIP_HIST_SIZE; | |
1254 | } | |
1255 | ||
1256 | dict->level = stream->level; | |
1257 | dict->hist_size = dict_len; | |
1258 | memcpy(dict->history, dict_data, dict_len); | |
1259 | memset(dict->hashtable, -1, sizeof(dict->hashtable)); | |
1260 | ||
1261 | switch (stream->level) { | |
1262 | case 3: | |
1263 | dict->hash_size = IGZIP_LVL3_HASH_SIZE; | |
1264 | isal_deflate_hash_lvl3(dict->hashtable, LVL3_HASH_MASK, | |
1265 | 0, dict_data, dict_len); | |
1266 | break; | |
1267 | ||
1268 | case 2: | |
1269 | dict->hash_size = IGZIP_LVL2_HASH_SIZE; | |
1270 | isal_deflate_hash_lvl2(dict->hashtable, LVL2_HASH_MASK, | |
1271 | 0, dict_data, dict_len); | |
1272 | break; | |
1273 | case 1: | |
1274 | dict->hash_size = IGZIP_LVL1_HASH_SIZE; | |
1275 | isal_deflate_hash_lvl1(dict->hashtable, LVL1_HASH_MASK, | |
1276 | 0, dict_data, dict_len); | |
1277 | break; | |
1278 | default: | |
1279 | dict->hash_size = IGZIP_LVL0_HASH_SIZE; | |
1280 | isal_deflate_hash_lvl0(dict->hashtable, LVL0_HASH_MASK, | |
1281 | 0, dict_data, dict_len); | |
1282 | } | |
1283 | return COMP_OK; | |
1284 | } | |
1285 | ||
1286 | int isal_deflate_reset_dict(struct isal_zstream *stream, struct isal_dict *dict) | |
1287 | { | |
1288 | struct isal_zstate *state = &stream->internal_state; | |
1289 | struct level_buf *level_buf = (struct level_buf *)stream->level_buf; | |
1290 | int ret; | |
1291 | ||
1292 | if ((state->state != ZSTATE_NEW_HDR) | |
1293 | || (state->b_bytes_processed != state->b_bytes_valid) | |
1294 | || (dict->level != stream->level) | |
1295 | || (dict->hist_size == 0) | |
1296 | || (dict->hist_size > IGZIP_HIST_SIZE) | |
1297 | || (dict->hash_size > IGZIP_LVL3_HASH_SIZE)) | |
1298 | return ISAL_INVALID_STATE; | |
1299 | ||
1300 | ret = check_level_req(stream); | |
1301 | if (ret) | |
1302 | return ret; | |
1303 | ||
1304 | memcpy(state->buffer, dict->history, dict->hist_size); | |
1305 | state->b_bytes_processed = dict->hist_size; | |
1306 | state->b_bytes_valid = dict->hist_size; | |
1307 | state->has_hist = IGZIP_DICT_HASH_SET; | |
1308 | ||
1309 | switch (stream->level) { | |
1310 | case 3: | |
1311 | memcpy(level_buf->lvl3.hash_table, dict->hashtable, | |
1312 | sizeof(level_buf->lvl3.hash_table)); | |
1313 | break; | |
1314 | ||
1315 | case 2: | |
1316 | memcpy(level_buf->lvl2.hash_table, dict->hashtable, | |
1317 | sizeof(level_buf->lvl2.hash_table)); | |
1318 | break; | |
1319 | case 1: | |
1320 | memcpy(level_buf->lvl1.hash_table, dict->hashtable, | |
1321 | sizeof(level_buf->lvl1.hash_table)); | |
1322 | break; | |
1323 | default: | |
1324 | memcpy(stream->internal_state.head, dict->hashtable, | |
1325 | sizeof(stream->internal_state.head)); | |
1326 | } | |
1327 | ||
1328 | return COMP_OK; | |
1329 | } | |
1330 | ||
1243 | 1331 | int isal_deflate_set_dict(struct isal_zstream *stream, uint8_t * dict, uint32_t dict_len) |
1244 | 1332 | { |
1245 | 1333 | struct isal_zstate *state = &stream->internal_state; |
1464 | 1552 | set_dist_mask(stream); |
1465 | 1553 | set_hash_mask(stream); |
1466 | 1554 | isal_deflate_hash(stream, state->buffer, state->b_bytes_processed); |
1555 | } else if (state->has_hist == IGZIP_DICT_HASH_SET) { | |
1556 | set_dist_mask(stream); | |
1557 | set_hash_mask(stream); | |
1467 | 1558 | } |
1468 | 1559 | |
1469 | 1560 | in_size = stream->avail_in + buffered_size; |
133 | 133 | %if ARCH == 04 |
134 | 134 | %define USE_HSWNI |
135 | 135 | %endif |
136 | ||
137 | [bits 64] | |
138 | default rel | |
139 | section .text | |
140 | ||
136 | 141 | ; void isal_deflate_body ( isal_zstream *stream ) |
137 | 142 | ; arg 1: rcx: addr of stream |
138 | 143 | global isal_deflate_body_ %+ ARCH |
139 | 144 | isal_deflate_body_ %+ ARCH %+ : |
145 | endbranch | |
140 | 146 | %ifidn __OUTPUT_FORMAT__, elf64 |
141 | 147 | mov rcx, rdi |
142 | 148 | %endif |
458 | 458 | decode_next_dist %%state, %%lookup_size, %%state_offset, %%read_in, %%read_in_length, %%next_sym, %%next_extra_bits, %%next_bits |
459 | 459 | %endm |
460 | 460 | |
461 | [bits 64] | |
462 | default rel | |
463 | section .text | |
464 | ||
461 | 465 | global decode_huffman_code_block_stateless_ %+ ARCH |
462 | 466 | decode_huffman_code_block_stateless_ %+ ARCH %+ : |
467 | endbranch | |
463 | 468 | |
464 | 469 | FUNC_SAVE |
465 | 470 |
97 | 97 | %endif |
98 | 98 | %endm |
99 | 99 | |
100 | [bits 64] | |
101 | default rel | |
102 | section .text | |
103 | ||
100 | 104 | global isal_deflate_hash_crc_01 |
101 | 105 | isal_deflate_hash_crc_01: |
106 | endbranch | |
102 | 107 | FUNC_SAVE |
103 | 108 | |
104 | 109 | neg f_i |
97 | 97 | " -h help\n" |
98 | 98 | " -X use compression level X with 0 <= X <= 1\n" |
99 | 99 | " -b <size> input buffer size, 0 buffers all the input\n" |
100 | " -i <time> time in seconds to benchmark (at least 1)\n" | |
100 | " -i <time> time in seconds to benchmark (at least 0)\n" | |
101 | 101 | " -o <file> output file for compresed data\n" |
102 | 102 | " -d <file> dictionary file used by compression\n" |
103 | 103 | " -w <size> log base 2 size of history window, between 8 and 15\n"); |
108 | 108 | void deflate_perf(struct isal_zstream *stream, uint8_t * inbuf, size_t infile_size, |
109 | 109 | size_t inbuf_size, uint8_t * outbuf, size_t outbuf_size, int level, |
110 | 110 | uint8_t * level_buf, int level_size, uint32_t hist_bits, uint8_t * dictbuf, |
111 | size_t dictfile_size, struct isal_hufftables *hufftables_custom) | |
111 | size_t dictfile_size, struct isal_dict *dict_str, | |
112 | struct isal_hufftables *hufftables_custom) | |
112 | 113 | { |
113 | 114 | int avail_in; |
114 | 115 | isal_deflate_init(stream); |
115 | if (dictbuf != NULL) | |
116 | isal_deflate_set_dict(stream, dictbuf, dictfile_size); | |
117 | stream->end_of_stream = 0; | |
118 | stream->flush = NO_FLUSH; | |
119 | 116 | stream->level = level; |
120 | 117 | stream->level_buf = level_buf; |
121 | 118 | stream->level_buf_size = level_size; |
119 | ||
120 | if (COMP_OK != isal_deflate_reset_dict(stream, dict_str)) | |
121 | if (dictbuf != NULL) | |
122 | isal_deflate_set_dict(stream, dictbuf, dictfile_size); | |
123 | ||
124 | stream->end_of_stream = 0; | |
125 | stream->flush = NO_FLUSH; | |
122 | 126 | stream->next_out = outbuf; |
123 | 127 | stream->avail_out = outbuf_size; |
124 | 128 | stream->next_in = inbuf; |
174 | 178 | break; |
175 | 179 | case 'i': |
176 | 180 | time = atoi(optarg); |
177 | if (time < 1) | |
181 | if (time < 0) | |
178 | 182 | usage(); |
179 | 183 | break; |
180 | 184 | case 'b': |
284 | 288 | exit(0); |
285 | 289 | } |
286 | 290 | |
291 | struct isal_dict dict_str; | |
292 | stream.level = level; | |
293 | isal_deflate_process_dict(&stream, &dict_str, dictbuf, dictfile_size); | |
294 | ||
287 | 295 | struct perf start; |
288 | BENCHMARK(&start, time, | |
289 | deflate_perf(&stream, inbuf, infile_size, inbuf_size, outbuf, outbuf_size, | |
290 | level, level_buf, level_size, hist_bits, dictbuf, | |
291 | dictfile_size, NULL)); | |
296 | if (time > 0) { | |
297 | BENCHMARK(&start, time, | |
298 | deflate_perf(&stream, inbuf, infile_size, inbuf_size, outbuf, | |
299 | outbuf_size, level, level_buf, level_size, hist_bits, | |
300 | dictbuf, dictfile_size, &dict_str, NULL)); | |
301 | } else { | |
302 | deflate_perf(&stream, inbuf, infile_size, inbuf_size, outbuf, outbuf_size, | |
303 | level, level_buf, level_size, hist_bits, dictbuf, | |
304 | dictfile_size, &dict_str, NULL); | |
305 | } | |
292 | 306 | if (stream.avail_in != 0) { |
293 | 307 | fprintf(stderr, "Could not compress all of inbuf\n"); |
294 | 308 | exit(0); |
306 | 320 | |
307 | 321 | deflate_perf(&stream, inbuf, infile_size, inbuf_size, outbuf, outbuf_size, |
308 | 322 | level, level_buf, level_size, hist_bits, dictbuf, |
309 | dictfile_size, &hufftables_custom); | |
323 | dictfile_size, &dict_str, &hufftables_custom); | |
310 | 324 | |
311 | 325 | printf(" ratio_custom=%3.1f%%", 100.0 * stream.total_out / infile_size); |
312 | 326 | } |
84 | 84 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
85 | 85 | f_end_i_mem_offset equ 0 ; local variable (8 bytes) |
86 | 86 | stack_size equ 8 |
87 | ||
88 | [bits 64] | |
89 | default rel | |
90 | section .text | |
91 | ||
87 | 92 | ; void isal_deflate_finish ( isal_zstream *stream ) |
88 | 93 | ; arg 1: rcx: addr of stream |
89 | 94 | global isal_deflate_finish_01 |
90 | 95 | isal_deflate_finish_01: |
96 | endbranch | |
91 | 97 | PUSH_ALL rbx, rsi, rdi, rbp, r12, r13, r14, r15 |
92 | 98 | sub rsp, stack_size |
93 | 99 |
146 | 146 | %define stack_size 16 |
147 | 147 | %define local_storage_offset 0 |
148 | 148 | |
149 | %define func(x) x: | |
149 | %define func(x) x: endbranch | |
150 | 150 | %macro FUNC_SAVE 0 |
151 | 151 | push rbp |
152 | 152 | push r12 |
168 | 168 | %define VECT_SIZE 8 |
169 | 169 | %define HASH_BYTES 2 |
170 | 170 | |
171 | [bits 64] | |
172 | default rel | |
173 | section .text | |
174 | ||
171 | 175 | global gen_icf_map_lh1_04 |
172 | 176 | func(gen_icf_map_lh1_04) |
177 | endbranch | |
173 | 178 | FUNC_SAVE |
174 | 179 | |
175 | 180 | mov file_start, [stream + _next_in] |
142 | 142 | add rsp, stack_size |
143 | 143 | %endm |
144 | 144 | %else |
145 | %define func(x) x: | |
145 | %define func(x) x: endbranch | |
146 | 146 | %macro FUNC_SAVE 0 |
147 | 147 | push rbp |
148 | 148 | push r12 |
159 | 159 | %define VECT_SIZE 16 |
160 | 160 | %define HASH_BYTES 2 |
161 | 161 | |
162 | [bits 64] | |
163 | default rel | |
164 | section .text | |
165 | ||
162 | 166 | global gen_icf_map_lh1_06 |
163 | 167 | func(gen_icf_map_lh1_06) |
168 | endbranch | |
164 | 169 | FUNC_SAVE |
165 | 170 | |
166 | 171 | mov file_start, [stream + _next_in] |
154 | 154 | %define USE_HSWNI |
155 | 155 | %endif |
156 | 156 | |
157 | [bits 64] | |
158 | default rel | |
159 | section .text | |
160 | ||
157 | 161 | ; void isal_deflate_icf_body <hashsize> <arch> ( isal_zstream *stream ) |
158 | 162 | ; we make 6 different versions of this function |
159 | 163 | ; arg 1: rcx: addr of stream |
160 | 164 | global isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH |
161 | 165 | isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ : |
166 | endbranch | |
162 | 167 | %ifidn __OUTPUT_FORMAT__, elf64 |
163 | 168 | mov rcx, rdi |
164 | 169 | %endif |
93 | 93 | |
94 | 94 | %xdefine METHOD hash_hist |
95 | 95 | |
96 | [bits 64] | |
97 | default rel | |
98 | section .text | |
99 | ||
96 | 100 | ; void isal_deflate_icf_finish ( isal_zstream *stream ) |
97 | 101 | ; arg 1: rcx: addr of stream |
98 | 102 | global isal_deflate_icf_finish_ %+ METHOD %+ _01 |
99 | 103 | isal_deflate_icf_finish_ %+ METHOD %+ _01: |
104 | endbranch | |
100 | 105 | PUSH_ALL rbx, rsi, rdi, rbp, r12, r13, r14, r15 |
101 | 106 | sub rsp, stack_size |
102 | 107 |
52 | 52 | #endif |
53 | 53 | |
54 | 54 | extern int decode_huffman_code_block_stateless(struct inflate_state *, uint8_t * start_out); |
55 | extern struct isal_hufftables hufftables_default; /* For known header detection */ | |
55 | 56 | |
56 | 57 | #define LARGE_SHORT_SYM_LEN 25 |
57 | 58 | #define LARGE_SHORT_SYM_MASK ((1 << LARGE_SHORT_SYM_LEN) - 1) |
931 | 932 | } |
932 | 933 | } |
933 | 934 | |
935 | static int header_matches_pregen(struct inflate_state *state) | |
936 | { | |
937 | #ifndef ISAL_STATIC_INFLATE_TABLE | |
938 | return 0; | |
939 | #else | |
940 | uint8_t *in, *hdr; | |
941 | uint32_t in_end_bits, hdr_end_bits; | |
942 | uint32_t bytes_read_in, header_len, last_bits, last_bit_mask; | |
943 | uint64_t bits_read_mask; | |
944 | uint64_t hdr_stash, in_stash; | |
945 | const uint64_t bits_read_prior = 3; // Have read bfinal(1) and btype(2) | |
946 | ||
947 | /* Check if stashed read_in_bytes match header */ | |
948 | hdr = &(hufftables_default.deflate_hdr[0]); | |
949 | bits_read_mask = (1ull << state->read_in_length) - 1; | |
950 | hdr_stash = (load_u64(hdr) >> bits_read_prior) & bits_read_mask; | |
951 | in_stash = state->read_in & bits_read_mask; | |
952 | ||
953 | if (hdr_stash != in_stash) | |
954 | return 0; | |
955 | ||
956 | /* Check if input is byte aligned */ | |
957 | if ((state->read_in_length + bits_read_prior) % 8) | |
958 | return 0; | |
959 | ||
960 | /* Check if header bulk is the same */ | |
961 | in = state->next_in; | |
962 | bytes_read_in = (state->read_in_length + bits_read_prior) / 8; | |
963 | header_len = hufftables_default.deflate_hdr_count; | |
964 | ||
965 | if (memcmp(in, &hdr[bytes_read_in], header_len - bytes_read_in)) | |
966 | return 0; | |
967 | ||
968 | /* If there are any last/end bits to the header check them too */ | |
969 | last_bits = hufftables_default.deflate_hdr_extra_bits; | |
970 | last_bit_mask = (1 << last_bits) - 1; | |
971 | ||
972 | if (0 == last_bits) { | |
973 | state->next_in += header_len - bytes_read_in; | |
974 | state->avail_in -= header_len - bytes_read_in; | |
975 | state->read_in_length = 0; | |
976 | state->read_in = 0; | |
977 | return 1; | |
978 | } | |
979 | ||
980 | in_end_bits = in[header_len - bytes_read_in] & last_bit_mask; | |
981 | hdr_end_bits = hdr[header_len] & last_bit_mask; | |
982 | if (in_end_bits == hdr_end_bits) { | |
983 | state->next_in += header_len - bytes_read_in; | |
984 | state->avail_in -= header_len - bytes_read_in; | |
985 | state->read_in_length = 0; | |
986 | state->read_in = 0; | |
987 | inflate_in_read_bits(state, last_bits); | |
988 | return 1; | |
989 | } | |
990 | ||
991 | return 0; | |
992 | #endif // ISAL_STATIC_INFLATE_TABLE | |
993 | } | |
994 | ||
995 | static int setup_pregen_header(struct inflate_state *state) | |
996 | { | |
997 | #ifdef ISAL_STATIC_INFLATE_TABLE | |
998 | memcpy(&state->lit_huff_code, &pregen_lit_huff_code, sizeof(pregen_lit_huff_code)); | |
999 | memcpy(&state->dist_huff_code, &pregen_dist_huff_code, sizeof(pregen_dist_huff_code)); | |
1000 | state->block_state = ISAL_BLOCK_CODED; | |
1001 | #endif // ISAL_STATIC_INFLATE_TABLE | |
1002 | return 0; | |
1003 | } | |
1004 | ||
934 | 1005 | /* Sets the inflate_huff_codes in state to be the huffcodes corresponding to the |
935 | 1006 | * deflate static header */ |
936 | 1007 | static int inline setup_static_header(struct inflate_state *state) |
1188 | 1259 | 0x10, 0x11, 0x12, 0x00, 0x08, 0x07, 0x09, 0x06, |
1189 | 1260 | 0x0a, 0x05, 0x0b, 0x04, 0x0c, 0x03, 0x0d, 0x02, 0x0e, 0x01, 0x0f |
1190 | 1261 | }; |
1262 | ||
1263 | /* If you are given a whole header and it matches the pregen header */ | |
1264 | if (state->avail_in > (hufftables_default.deflate_hdr_count + sizeof(uint64_t)) | |
1265 | && header_matches_pregen(state)) | |
1266 | return setup_pregen_header(state); | |
1191 | 1267 | |
1192 | 1268 | if (state->bfinal && state->avail_in <= SINGLE_SYM_THRESH) { |
1193 | 1269 | multisym = SINGLE_SYM_FLAG; |
1748 | 1824 | state->write_overflow_len = 0; |
1749 | 1825 | state->copy_overflow_length = 0; |
1750 | 1826 | state->copy_overflow_distance = 0; |
1827 | state->wrapper_flag = 0; | |
1751 | 1828 | state->tmp_in_size = 0; |
1752 | 1829 | state->tmp_out_processed = 0; |
1753 | 1830 | state->tmp_out_valid = 0; |
1785 | 1862 | } |
1786 | 1863 | |
1787 | 1864 | static inline uint32_t buffer_header_copy(struct inflate_state *state, uint32_t in_len, |
1788 | uint8_t * buf, uint32_t buf_len, uint32_t buf_error) | |
1865 | uint8_t * buf, uint32_t buffer_len, uint32_t offset, | |
1866 | uint32_t buf_error) | |
1789 | 1867 | { |
1790 | 1868 | uint32_t len = in_len; |
1869 | uint32_t buf_len = buffer_len - offset; | |
1870 | ||
1791 | 1871 | if (len > state->avail_in) |
1792 | 1872 | len = state->avail_in; |
1793 | 1873 | |
1794 | 1874 | if (buf != NULL && buf_len < len) { |
1795 | memcpy(buf, state->next_in, buf_len); | |
1875 | memcpy(&buf[offset], state->next_in, buf_len); | |
1796 | 1876 | state->next_in += buf_len; |
1797 | 1877 | state->avail_in -= buf_len; |
1798 | 1878 | state->count = in_len - buf_len; |
1799 | 1879 | return buf_error; |
1800 | 1880 | } else { |
1801 | 1881 | if (buf != NULL) |
1802 | memcpy(buf, state->next_in, len); | |
1882 | memcpy(&buf[offset], state->next_in, len); | |
1803 | 1883 | state->next_in += len; |
1804 | 1884 | state->avail_in -= len; |
1805 | 1885 | state->count = in_len - len; |
1812 | 1892 | } |
1813 | 1893 | |
1814 | 1894 | static inline uint32_t string_header_copy(struct inflate_state *state, |
1815 | char *str_buf, uint32_t str_len, uint32_t str_error) | |
1816 | { | |
1817 | uint32_t len, max_len = str_len; | |
1895 | char *str_buf, uint32_t str_len, | |
1896 | uint32_t offset, uint32_t str_error) | |
1897 | { | |
1898 | uint32_t len, max_len = str_len - offset; | |
1818 | 1899 | |
1819 | 1900 | if (max_len > state->avail_in || str_buf == NULL) |
1820 | 1901 | max_len = state->avail_in; |
1822 | 1903 | len = strnlen((char *)state->next_in, max_len); |
1823 | 1904 | |
1824 | 1905 | if (str_buf != NULL) |
1825 | memcpy(str_buf, state->next_in, len); | |
1906 | memcpy(&str_buf[offset], state->next_in, len); | |
1826 | 1907 | |
1827 | 1908 | state->next_in += len; |
1828 | 1909 | state->avail_in -= len; |
1829 | 1910 | state->count += len; |
1830 | 1911 | |
1831 | if (str_buf != NULL && len == str_len) | |
1912 | if (str_buf != NULL && len == (str_len - offset)) | |
1832 | 1913 | return str_error; |
1833 | 1914 | else if (state->avail_in <= 0) |
1834 | 1915 | return ISAL_END_INPUT; |
2001 | 2082 | case ISAL_GZIP_EXTRA: |
2002 | 2083 | offset = gz_hdr->extra_len - count; |
2003 | 2084 | ret = |
2004 | buffer_header_copy(state, count, gz_hdr->extra + offset, | |
2005 | gz_hdr->extra_buf_len - offset, | |
2006 | ISAL_EXTRA_OVERFLOW); | |
2085 | buffer_header_copy(state, count, gz_hdr->extra, | |
2086 | gz_hdr->extra_buf_len, | |
2087 | offset, ISAL_EXTRA_OVERFLOW); | |
2007 | 2088 | |
2008 | 2089 | if (ret) { |
2009 | 2090 | state->block_state = ISAL_GZIP_EXTRA; |
2016 | 2097 | if (flags & NAME_FLAG) { |
2017 | 2098 | case ISAL_GZIP_NAME: |
2018 | 2099 | offset = state->count; |
2019 | ret = string_header_copy(state, gz_hdr->name + offset, | |
2020 | gz_hdr->name_buf_len - offset, | |
2021 | ISAL_NAME_OVERFLOW); | |
2100 | ret = string_header_copy(state, gz_hdr->name, | |
2101 | gz_hdr->name_buf_len, | |
2102 | offset, ISAL_NAME_OVERFLOW); | |
2022 | 2103 | if (ret) { |
2023 | 2104 | state->block_state = ISAL_GZIP_NAME; |
2024 | 2105 | break; |
2028 | 2109 | if (flags & COMMENT_FLAG) { |
2029 | 2110 | case ISAL_GZIP_COMMENT: |
2030 | 2111 | offset = state->count; |
2031 | ret = string_header_copy(state, gz_hdr->comment + offset, | |
2032 | gz_hdr->comment_buf_len - offset, | |
2033 | ISAL_COMMENT_OVERFLOW); | |
2112 | ret = string_header_copy(state, gz_hdr->comment, | |
2113 | gz_hdr->comment_buf_len, | |
2114 | offset, ISAL_COMMENT_OVERFLOW); | |
2034 | 2115 | if (ret) { |
2035 | 2116 | state->block_state = ISAL_GZIP_COMMENT; |
2036 | 2117 | break; |
2146 | 2227 | |
2147 | 2228 | if (state->crc_flag == IGZIP_GZIP) { |
2148 | 2229 | struct isal_gzip_header gz_hdr; |
2230 | isal_gzip_header_init(&gz_hdr); | |
2149 | 2231 | ret = isal_read_gzip_header(state, &gz_hdr); |
2150 | 2232 | if (ret) |
2151 | 2233 | return ret; |
2152 | 2234 | } else if (state->crc_flag == IGZIP_ZLIB) { |
2153 | struct isal_zlib_header z_hdr; | |
2235 | struct isal_zlib_header z_hdr = { 0 }; | |
2154 | 2236 | ret = isal_read_zlib_header(state, &z_hdr); |
2155 | 2237 | if (ret) |
2156 | 2238 | return ret; |
2218 | 2300 | |
2219 | 2301 | if (!state->wrapper_flag && state->crc_flag == IGZIP_GZIP) { |
2220 | 2302 | struct isal_gzip_header gz_hdr; |
2303 | isal_gzip_header_init(&gz_hdr); | |
2221 | 2304 | ret = isal_read_gzip_header(state, &gz_hdr); |
2222 | 2305 | if (ret < 0) |
2223 | 2306 | return ret; |
2224 | 2307 | else if (ret > 0) |
2225 | 2308 | return ISAL_DECOMP_OK; |
2226 | 2309 | } else if (!state->wrapper_flag && state->crc_flag == IGZIP_ZLIB) { |
2227 | struct isal_zlib_header z_hdr; | |
2310 | struct isal_zlib_header z_hdr = { 0 }; | |
2228 | 2311 | ret = isal_read_zlib_header(state, &z_hdr); |
2229 | 2312 | if (ret < 0) |
2230 | 2313 | return ret; |
1096 | 1096 | uint32_t reset_test_flag = 0; |
1097 | 1097 | uint8_t tmp_symbol; |
1098 | 1098 | int no_mod = 0; |
1099 | struct isal_dict dict_str; | |
1099 | 1100 | |
1100 | 1101 | log_print("Starting Compress Multi Pass\n"); |
1101 | 1102 | |
1146 | 1147 | if (reset_test_flag) |
1147 | 1148 | isal_deflate_reset(stream); |
1148 | 1149 | |
1149 | if (dict != NULL) | |
1150 | isal_deflate_set_dict(stream, dict, dict_len); | |
1150 | if (dict != NULL) { | |
1151 | if (rand() % 2 == 0) | |
1152 | isal_deflate_set_dict(stream, dict, dict_len); | |
1153 | else { | |
1154 | isal_deflate_process_dict(stream, &dict_str, dict, dict_len); | |
1155 | isal_deflate_reset_dict(stream, &dict_str); | |
1156 | } | |
1157 | } | |
1151 | 1158 | |
1152 | 1159 | while (1) { |
1153 | 1160 | loop_count++; |
1287 | 1294 | uint8_t *level_buf = NULL; |
1288 | 1295 | struct isal_hufftables *huff_tmp; |
1289 | 1296 | uint32_t reset_test_flag = 0; |
1297 | struct isal_dict dict_str; | |
1290 | 1298 | |
1291 | 1299 | log_print("Starting Compress Single Pass\n"); |
1292 | 1300 | |
1334 | 1342 | if (reset_test_flag) |
1335 | 1343 | isal_deflate_reset(&stream); |
1336 | 1344 | |
1337 | if (dict != NULL) | |
1338 | isal_deflate_set_dict(&stream, dict, dict_len); | |
1345 | if (dict != NULL) { | |
1346 | if (rand() % 2 == 0) | |
1347 | isal_deflate_set_dict(&stream, dict, dict_len); | |
1348 | else { | |
1349 | isal_deflate_process_dict(&stream, &dict_str, dict, dict_len); | |
1350 | isal_deflate_reset_dict(&stream, &dict_str); | |
1351 | } | |
1352 | } | |
1339 | 1353 | |
1340 | 1354 | ret = |
1341 | 1355 | isal_deflate_with_checks(&stream, data_size, *compressed_size, data, data_size, |
121 | 121 | add rsp, stack_size |
122 | 122 | %endm |
123 | 123 | %else |
124 | %define func(x) x: | |
124 | %define func(x) x: endbranch | |
125 | 125 | %macro FUNC_SAVE 0 |
126 | 126 | push r12 |
127 | 127 | push r13 |
134 | 134 | %endif |
135 | 135 | %define VECT_SIZE 8 |
136 | 136 | |
137 | [bits 64] | |
138 | default rel | |
139 | section .text | |
140 | ||
137 | 141 | global set_long_icf_fg_04 |
138 | 142 | func(set_long_icf_fg_04) |
143 | endbranch | |
139 | 144 | FUNC_SAVE |
140 | 145 | |
141 | 146 | lea end_in, [next_in + arg3] |
128 | 128 | add rsp, stack_size |
129 | 129 | %endm |
130 | 130 | %else |
131 | %define func(x) x: | |
131 | %define func(x) x: endbranch | |
132 | 132 | %macro FUNC_SAVE 0 |
133 | 133 | push r12 |
134 | 134 | push r13 |
141 | 141 | %endif |
142 | 142 | %define VECT_SIZE 16 |
143 | 143 | |
144 | [bits 64] | |
145 | default rel | |
146 | section .text | |
147 | ||
144 | 148 | global set_long_icf_fg_06 |
145 | 149 | func(set_long_icf_fg_06) |
150 | endbranch | |
146 | 151 | FUNC_SAVE |
147 | 152 | |
148 | 153 | lea end_in, [next_in + arg3] |
248 | 248 | cmovle %%dist_coded, %%dist |
249 | 249 | %endm |
250 | 250 | |
251 | [bits 64] | |
252 | default rel | |
253 | section .text | |
254 | ||
251 | 255 | ; void isal_update_histogram |
252 | 256 | global isal_update_histogram_ %+ ARCH |
253 | 257 | isal_update_histogram_ %+ ARCH %+ : |
258 | endbranch | |
254 | 259 | FUNC_SAVE |
255 | 260 | |
256 | 261 | %ifnidn file_start, arg0 |
53 | 53 | %define i r11 |
54 | 54 | %define tmp2 r12 |
55 | 55 | |
56 | [bits 64] | |
57 | default rel | |
58 | section .text | |
59 | ||
56 | 60 | global build_huff_tree |
57 | 61 | build_huff_tree: |
62 | endbranch | |
58 | 63 | %ifidn __OUTPUT_FORMAT__, win64 |
59 | 64 | push rsi |
60 | 65 | push rdi |
103 | 108 | align 32 |
104 | 109 | global build_heap |
105 | 110 | build_heap: |
111 | endbranch | |
106 | 112 | %ifidn __OUTPUT_FORMAT__, win64 |
107 | 113 | push rsi |
108 | 114 | push rdi |
44 | 44 | ;; uint16_t len_start[32]; |
45 | 45 | ;; }; |
46 | 46 | |
47 | global rfc1951_lookup_table:ISAL_SYM_TYPE_DATA_INTERNAL | |
47 | mk_global rfc1951_lookup_table, data, internal | |
48 | 48 | rfc1951_lookup_table: |
49 | 49 | len_to_code: |
50 | 50 | db 0x00, 0x00, 0x00 |
1343 | 1343 | }; |
1344 | 1344 | |
1345 | 1345 | #endif |
1346 | struct inflate_huff_code_large pregen_lit_huff_code = { | |
1347 | .short_code_lookup = { | |
1348 | 0x24000102, 0x88010265, 0x44000103, 0xa8010277, | |
1349 | 0x24000102, 0x98010268, 0x78010220, 0xb80102e0, | |
1350 | 0x24000102, 0x88010273, 0x44000104, 0xb8010235, | |
1351 | 0x24000102, 0x74000108, 0x64000109, 0xc80102fd, | |
1352 | 0x24000102, 0x8801026f, 0x44000103, 0xb8010206, | |
1353 | 0x24000102, 0x98010270, 0x54000105, 0xc8010259, | |
1354 | 0x24000102, 0x9801020a, 0x44000104, 0xb8010249, | |
1355 | 0x24000102, 0xa8010230, 0x88010200, 0xb40000ad, | |
1356 | 0x24000102, 0x88010269, 0x44000103, 0x9400010d, | |
1357 | 0x24000102, 0x9801026d, 0xb8006520, 0xc8010223, | |
1358 | 0x24000102, 0x64000106, 0x44000104, 0xb801023f, | |
1359 | 0x24000102, 0xa8010204, 0x6400010a, 0xb4000087, | |
1360 | 0x24000102, 0x88010272, 0x44000103, 0xb8010210, | |
1361 | 0x24000102, 0x98010275, 0x54000105, 0xc80102a6, | |
1362 | 0x24000102, 0x98010263, 0x44000104, 0xb8010254, | |
1363 | 0x24000102, 0xa8010242, 0x88010261, 0xb40000d7, | |
1364 | 0x24000102, 0xc8006565, 0x44000103, 0xa80102ff, | |
1365 | 0x24000102, 0x9801026c, 0x98010320, 0xc8010211, | |
1366 | 0x24000102, 0xc8006573, 0x44000104, 0xb8010239, | |
1367 | 0x24000102, 0xa8010201, 0x64000109, 0xb4000017, | |
1368 | 0x24000102, 0xc800656f, 0x44000103, 0xb801020b, | |
1369 | 0x24000102, 0x98010274, 0x54000105, 0xc801027c, | |
1370 | 0x24000102, 0x9801022c, 0x44000104, 0xb801024f, | |
1371 | 0x24000102, 0xa8010232, 0xc8006500, 0xb40000c4, | |
1372 | 0x24000102, 0xc8006569, 0x44000103, 0xa4000111, | |
1373 | 0x24000102, 0x9801026e, 0x54000020, 0xc801023d, | |
1374 | 0x24000102, 0x64000106, 0x44000104, 0xb8010245, | |
1375 | 0x24000102, 0xa801022d, 0x6400010a, 0xb400009a, | |
1376 | 0x24000102, 0xc8006572, 0x44000103, 0xb8010228, | |
1377 | 0x24000102, 0x74000107, 0x54000105, 0xc80102e3, | |
1378 | 0x24000102, 0x98010264, 0x44000104, 0xb8010280, | |
1379 | 0x24000102, 0xa8010266, 0xc8006561, 0xb40000eb, | |
1380 | 0x24000102, 0xa8010365, 0x44000103, 0xa8010279, | |
1381 | 0x24000102, 0x74000068, 0x78010220, 0xb80102fe, | |
1382 | 0x24000102, 0xa8010373, 0x44000104, 0xb8010237, | |
1383 | 0x24000102, 0x74000108, 0x64000109, 0x36000008, | |
1384 | 0x24000102, 0xa801036f, 0x44000103, 0xb8010208, | |
1385 | 0x24000102, 0x74000070, 0x54000105, 0xc8010260, | |
1386 | 0x24000102, 0x7400000a, 0x44000104, 0xb801024d, | |
1387 | 0x24000102, 0xa8010231, 0xa8010300, 0xb40000b7, | |
1388 | 0x24000102, 0xa8010369, 0x44000103, 0x9400010f, | |
1389 | 0x24000102, 0x7400006d, 0xc8006820, 0xc801022a, | |
1390 | 0x24000102, 0x64000106, 0x44000104, 0xb8010243, | |
1391 | 0x24000102, 0xa801020d, 0x6400010a, 0xb4000092, | |
1392 | 0x24000102, 0xa8010372, 0x44000103, 0xb8010222, | |
1393 | 0x24000102, 0x74000075, 0x54000105, 0xc80102c1, | |
1394 | 0x24000102, 0x74000063, 0x44000104, 0xb8010276, | |
1395 | 0x24000102, 0xa8010262, 0xa8010361, 0xb40000df, | |
1396 | 0x24000102, 0x64000065, 0x44000103, 0x9400010b, | |
1397 | 0x24000102, 0x7400006c, 0xa8002020, 0xc8010218, | |
1398 | 0x24000102, 0x64000073, 0x44000104, 0xb801023b, | |
1399 | 0x24000102, 0xa8010203, 0x64000109, 0xb400007b, | |
1400 | 0x24000102, 0x6400006f, 0x44000103, 0xb801020e, | |
1401 | 0x24000102, 0x74000074, 0x54000105, 0xc801028f, | |
1402 | 0x24000102, 0x7400002c, 0x44000104, 0xb8010252, | |
1403 | 0x24000102, 0xa8010241, 0x64000000, 0xb40000cd, | |
1404 | 0x24000102, 0x64000069, 0x44000103, 0xb8010202, | |
1405 | 0x24000102, 0x7400006e, 0x54000020, 0xc8010255, | |
1406 | 0x24000102, 0x64000106, 0x44000104, 0xb8010247, | |
1407 | 0x24000102, 0xa801022e, 0x6400010a, 0xb40000a3, | |
1408 | 0x24000102, 0x64000072, 0x44000103, 0xb8010233, | |
1409 | 0x24000102, 0x74000107, 0x54000105, 0xc80102f3, | |
1410 | 0x24000102, 0x74000064, 0x44000104, 0xb80102c2, | |
1411 | 0x24000102, 0xa8010267, 0x64000061, 0xb40000f6, | |
1412 | 0x24000102, 0x88010265, 0x44000103, 0x84000077, | |
1413 | 0x24000102, 0xb8010368, 0x78010220, 0xb80102f0, | |
1414 | 0x24000102, 0x88010273, 0x44000104, 0xb8010236, | |
1415 | 0x24000102, 0x74000108, 0x64000109, 0xc400011d, | |
1416 | 0x24000102, 0x8801026f, 0x44000103, 0xb8010207, | |
1417 | 0x24000102, 0xb8010370, 0x54000105, 0xc801025c, | |
1418 | 0x24000102, 0xb801030a, 0x44000104, 0xb801024c, | |
1419 | 0x24000102, 0x84000030, 0x88010200, 0xb40000b2, | |
1420 | 0x24000102, 0x88010269, 0x44000103, 0x9400010e, | |
1421 | 0x24000102, 0xb801036d, 0xb8007320, 0xc8010225, | |
1422 | 0x24000102, 0x64000106, 0x44000104, 0xb8010240, | |
1423 | 0x24000102, 0x84000004, 0x6400010a, 0xb400008c, | |
1424 | 0x24000102, 0x88010272, 0x44000103, 0xb801021f, | |
1425 | 0x24000102, 0xb8010375, 0x54000105, 0xc80102b4, | |
1426 | 0x24000102, 0xb8010363, 0x44000104, 0xb801026b, | |
1427 | 0x24000102, 0x84000042, 0x88010261, 0xb40000db, | |
1428 | 0x24000102, 0x64000065, 0x44000103, 0x840000ff, | |
1429 | 0x24000102, 0xb801036c, 0x98010420, 0xc8010213, | |
1430 | 0x24000102, 0x64000073, 0x44000104, 0xb801023a, | |
1431 | 0x24000102, 0x84000001, 0x64000109, 0xb400001d, | |
1432 | 0x24000102, 0x6400006f, 0x44000103, 0xb801020c, | |
1433 | 0x24000102, 0xb8010374, 0x54000105, 0xc801027f, | |
1434 | 0x24000102, 0xb801032c, 0x44000104, 0xb8010250, | |
1435 | 0x24000102, 0x84000032, 0x64000000, 0xb40000c9, | |
1436 | 0x24000102, 0x64000069, 0x44000103, 0xa4000112, | |
1437 | 0x24000102, 0xb801036e, 0x54000020, 0xc801024b, | |
1438 | 0x24000102, 0x64000106, 0x44000104, 0xb8010246, | |
1439 | 0x24000102, 0x8400002d, 0x6400010a, 0xb400009e, | |
1440 | 0x24000102, 0x64000072, 0x44000103, 0xb8010229, | |
1441 | 0x24000102, 0x74000107, 0x54000105, 0xc80102e8, | |
1442 | 0x24000102, 0xb8010364, 0x44000104, 0xb80102c0, | |
1443 | 0x24000102, 0x84000066, 0x64000061, 0xb40000ef, | |
1444 | 0x24000102, 0xb8002065, 0x44000103, 0x84000079, | |
1445 | 0x24000102, 0x74000068, 0x78010220, 0xb4000115, | |
1446 | 0x24000102, 0xb8002073, 0x44000104, 0xb8010238, | |
1447 | 0x24000102, 0x74000108, 0x64000109, 0x36000018, | |
1448 | 0x24000102, 0xb800206f, 0x44000103, 0xb8010209, | |
1449 | 0x24000102, 0x74000070, 0x54000105, 0xc8010271, | |
1450 | 0x24000102, 0x7400000a, 0x44000104, 0xb801024e, | |
1451 | 0x24000102, 0x84000031, 0xb8002000, 0xb40000bb, | |
1452 | 0x24000102, 0xb8002069, 0x44000103, 0x94000110, | |
1453 | 0x24000102, 0x7400006d, 0xc8010820, 0xc801022f, | |
1454 | 0x24000102, 0x64000106, 0x44000104, 0xb8010244, | |
1455 | 0x24000102, 0x8400000d, 0x6400010a, 0xb4000096, | |
1456 | 0x24000102, 0xb8002072, 0x44000103, 0xb8010227, | |
1457 | 0x24000102, 0x74000075, 0x54000105, 0xc80102cf, | |
1458 | 0x24000102, 0x74000063, 0x44000104, 0xb8010278, | |
1459 | 0x24000102, 0x84000062, 0xb8002061, 0xb40000e5, | |
1460 | 0x24000102, 0x64000065, 0x44000103, 0x9400010c, | |
1461 | 0x24000102, 0x7400006c, 0xb8010920, 0xc801021e, | |
1462 | 0x24000102, 0x64000073, 0x44000104, 0xb801023e, | |
1463 | 0x24000102, 0x84000003, 0x64000109, 0xb4000083, | |
1464 | 0x24000102, 0x6400006f, 0x44000103, 0xb801020f, | |
1465 | 0x24000102, 0x74000074, 0x54000105, 0xc80102a0, | |
1466 | 0x24000102, 0x7400002c, 0x44000104, 0xb8010253, | |
1467 | 0x24000102, 0x84000041, 0x64000000, 0xb40000d3, | |
1468 | 0x24000102, 0x64000069, 0x44000103, 0xb8010205, | |
1469 | 0x24000102, 0x7400006e, 0x54000020, 0xc8010257, | |
1470 | 0x24000102, 0x64000106, 0x44000104, 0xb8010248, | |
1471 | 0x24000102, 0x8400002e, 0x6400010a, 0xb40000a9, | |
1472 | 0x24000102, 0x64000072, 0x44000103, 0xb8010234, | |
1473 | 0x24000102, 0x74000107, 0x54000105, 0xc80102f9, | |
1474 | 0x24000102, 0x74000064, 0x44000104, 0xb80102c3, | |
1475 | 0x24000102, 0x84000067, 0x64000061, 0x3e000120, | |
1476 | 0x24000102, 0x88010265, 0x44000103, 0xc8010377, | |
1477 | 0x24000102, 0x98010268, 0x78010220, 0x940000e0, | |
1478 | 0x24000102, 0x88010273, 0x44000104, 0x94000035, | |
1479 | 0x24000102, 0x74000108, 0x64000109, 0xc4000119, | |
1480 | 0x24000102, 0x8801026f, 0x44000103, 0x94000006, | |
1481 | 0x24000102, 0x98010270, 0x54000105, 0xc801025b, | |
1482 | 0x24000102, 0x9801020a, 0x44000104, 0x94000049, | |
1483 | 0x24000102, 0xc8010330, 0x88010200, 0xb40000af, | |
1484 | 0x24000102, 0x88010269, 0x44000103, 0x9400010d, | |
1485 | 0x24000102, 0x9801026d, 0xb8006f20, 0xc8010224, | |
1486 | 0x24000102, 0x64000106, 0x44000104, 0x9400003f, | |
1487 | 0x24000102, 0xc8010304, 0x6400010a, 0xb4000089, | |
1488 | 0x24000102, 0x88010272, 0x44000103, 0x94000010, | |
1489 | 0x24000102, 0x98010275, 0x54000105, 0xc80102b0, | |
1490 | 0x24000102, 0x98010263, 0x44000104, 0x94000054, | |
1491 | 0x24000102, 0xc8010342, 0x88010261, 0xb40000d9, | |
1492 | 0x24000102, 0xc8007365, 0x44000103, 0xc80103ff, | |
1493 | 0x24000102, 0x9801026c, 0x98010320, 0xc8010212, | |
1494 | 0x24000102, 0xc8007373, 0x44000104, 0x94000039, | |
1495 | 0x24000102, 0xc8010301, 0x64000109, 0xb400001a, | |
1496 | 0x24000102, 0xc800736f, 0x44000103, 0x9400000b, | |
1497 | 0x24000102, 0x98010274, 0x54000105, 0xc801027e, | |
1498 | 0x24000102, 0x9801022c, 0x44000104, 0x9400004f, | |
1499 | 0x24000102, 0xc8010332, 0xc8007300, 0xb40000c6, | |
1500 | 0x24000102, 0xc8007369, 0x44000103, 0xa4000113, | |
1501 | 0x24000102, 0x9801026e, 0x54000020, 0xc801024a, | |
1502 | 0x24000102, 0x64000106, 0x44000104, 0x94000045, | |
1503 | 0x24000102, 0xc801032d, 0x6400010a, 0xb400009c, | |
1504 | 0x24000102, 0xc8007372, 0x44000103, 0x94000028, | |
1505 | 0x24000102, 0x74000107, 0x54000105, 0xc80102e7, | |
1506 | 0x24000102, 0x98010264, 0x44000104, 0x94000080, | |
1507 | 0x24000102, 0xc8010366, 0xc8007361, 0xb40000ed, | |
1508 | 0x24000102, 0xa8010465, 0x44000103, 0xc8010379, | |
1509 | 0x24000102, 0x74000068, 0x78010220, 0x940000fe, | |
1510 | 0x24000102, 0xa8010473, 0x44000104, 0x94000037, | |
1511 | 0x24000102, 0x74000108, 0x64000109, 0x36000010, | |
1512 | 0x24000102, 0xa801046f, 0x44000103, 0x94000008, | |
1513 | 0x24000102, 0x74000070, 0x54000105, 0xc801026a, | |
1514 | 0x24000102, 0x7400000a, 0x44000104, 0x9400004d, | |
1515 | 0x24000102, 0xc8010331, 0xa8010400, 0xb40000b9, | |
1516 | 0x24000102, 0xa8010469, 0x44000103, 0x9400010f, | |
1517 | 0x24000102, 0x7400006d, 0xc8007020, 0xc801022b, | |
1518 | 0x24000102, 0x64000106, 0x44000104, 0x94000043, | |
1519 | 0x24000102, 0xc801030d, 0x6400010a, 0xb4000094, | |
1520 | 0x24000102, 0xa8010472, 0x44000103, 0x94000022, | |
1521 | 0x24000102, 0x74000075, 0x54000105, 0xc80102c7, | |
1522 | 0x24000102, 0x74000063, 0x44000104, 0x94000076, | |
1523 | 0x24000102, 0xc8010362, 0xa8010461, 0xb40000e2, | |
1524 | 0x24000102, 0x64000065, 0x44000103, 0x9400010b, | |
1525 | 0x24000102, 0x7400006c, 0xa8010520, 0xc801021c, | |
1526 | 0x24000102, 0x64000073, 0x44000104, 0x9400003b, | |
1527 | 0x24000102, 0xc8010303, 0x64000109, 0xb4000081, | |
1528 | 0x24000102, 0x6400006f, 0x44000103, 0x9400000e, | |
1529 | 0x24000102, 0x74000074, 0x54000105, 0xc8010290, | |
1530 | 0x24000102, 0x7400002c, 0x44000104, 0x94000052, | |
1531 | 0x24000102, 0xc8010341, 0x64000000, 0xb40000d1, | |
1532 | 0x24000102, 0x64000069, 0x44000103, 0x94000002, | |
1533 | 0x24000102, 0x7400006e, 0x54000020, 0xc8010256, | |
1534 | 0x24000102, 0x64000106, 0x44000104, 0x94000047, | |
1535 | 0x24000102, 0xc801032e, 0x6400010a, 0xb40000a7, | |
1536 | 0x24000102, 0x64000072, 0x44000103, 0x94000033, | |
1537 | 0x24000102, 0x74000107, 0x54000105, 0xc80102f8, | |
1538 | 0x24000102, 0x74000064, 0x44000104, 0x940000c2, | |
1539 | 0x24000102, 0xc8010367, 0x64000061, 0xb40000fa, | |
1540 | 0x24000102, 0x88010265, 0x44000103, 0x84000077, | |
1541 | 0x24000102, 0xc8002068, 0x78010220, 0x940000f0, | |
1542 | 0x24000102, 0x88010273, 0x44000104, 0x94000036, | |
1543 | 0x24000102, 0x74000108, 0x64000109, 0x36000000, | |
1544 | 0x24000102, 0x8801026f, 0x44000103, 0x94000007, | |
1545 | 0x24000102, 0xc8002070, 0x54000105, 0xc801025f, | |
1546 | 0x24000102, 0xc800200a, 0x44000104, 0x9400004c, | |
1547 | 0x24000102, 0x84000030, 0x88010200, 0xb40000b5, | |
1548 | 0x24000102, 0x88010269, 0x44000103, 0x9400010e, | |
1549 | 0x24000102, 0xc800206d, 0xc8000a20, 0xc8010226, | |
1550 | 0x24000102, 0x64000106, 0x44000104, 0x94000040, | |
1551 | 0x24000102, 0x84000004, 0x6400010a, 0xb400008e, | |
1552 | 0x24000102, 0x88010272, 0x44000103, 0x9400001f, | |
1553 | 0x24000102, 0xc8002075, 0x54000105, 0xc80102bd, | |
1554 | 0x24000102, 0xc8002063, 0x44000104, 0x9400006b, | |
1555 | 0x24000102, 0x84000042, 0x88010261, 0xb40000dd, | |
1556 | 0x24000102, 0x64000065, 0x44000103, 0x840000ff, | |
1557 | 0x24000102, 0xc800206c, 0x98010420, 0xc8010214, | |
1558 | 0x24000102, 0x64000073, 0x44000104, 0x9400003a, | |
1559 | 0x24000102, 0x84000001, 0x64000109, 0xb400005d, | |
1560 | 0x24000102, 0x6400006f, 0x44000103, 0x9400000c, | |
1561 | 0x24000102, 0xc8002074, 0x54000105, 0xc801028b, | |
1562 | 0x24000102, 0xc800202c, 0x44000104, 0x94000050, | |
1563 | 0x24000102, 0x84000032, 0x64000000, 0xb40000cb, | |
1564 | 0x24000102, 0x64000069, 0x44000103, 0xa4000114, | |
1565 | 0x24000102, 0xc800206e, 0x54000020, 0xc8010251, | |
1566 | 0x24000102, 0x64000106, 0x44000104, 0x94000046, | |
1567 | 0x24000102, 0x8400002d, 0x6400010a, 0xb40000a1, | |
1568 | 0x24000102, 0x64000072, 0x44000103, 0x94000029, | |
1569 | 0x24000102, 0x74000107, 0x54000105, 0xc80102f1, | |
1570 | 0x24000102, 0xc8002064, 0x44000104, 0x940000c0, | |
1571 | 0x24000102, 0x84000066, 0x64000061, 0xb40000f4, | |
1572 | 0x24000102, 0xc8010965, 0x44000103, 0x84000079, | |
1573 | 0x24000102, 0x74000068, 0x78010220, 0xb4000116, | |
1574 | 0x24000102, 0xc8010973, 0x44000104, 0x94000038, | |
1575 | 0x24000102, 0x74000108, 0x64000109, 0xb4000015, | |
1576 | 0x24000102, 0xc801096f, 0x44000103, 0x94000009, | |
1577 | 0x24000102, 0x74000070, 0x54000105, 0xc801027a, | |
1578 | 0x24000102, 0x7400000a, 0x44000104, 0x9400004e, | |
1579 | 0x24000102, 0x84000031, 0xc8010900, 0xb40000be, | |
1580 | 0x24000102, 0xc8010969, 0x44000103, 0x94000110, | |
1581 | 0x24000102, 0x7400006d, 0x54000020, 0xc801023c, | |
1582 | 0x24000102, 0x64000106, 0x44000104, 0x94000044, | |
1583 | 0x24000102, 0x8400000d, 0x6400010a, 0xb4000098, | |
1584 | 0x24000102, 0xc8010972, 0x44000103, 0x94000027, | |
1585 | 0x24000102, 0x74000075, 0x54000105, 0xc80102d0, | |
1586 | 0x24000102, 0x74000063, 0x44000104, 0x94000078, | |
1587 | 0x24000102, 0x84000062, 0xc8010961, 0xb40000e9, | |
1588 | 0x24000102, 0x64000065, 0x44000103, 0x9400010c, | |
1589 | 0x24000102, 0x7400006c, 0xb8000020, 0xc8010221, | |
1590 | 0x24000102, 0x64000073, 0x44000104, 0x9400003e, | |
1591 | 0x24000102, 0x84000003, 0x64000109, 0xb4000085, | |
1592 | 0x24000102, 0x6400006f, 0x44000103, 0x9400000f, | |
1593 | 0x24000102, 0x74000074, 0x54000105, 0xc80102a4, | |
1594 | 0x24000102, 0x7400002c, 0x44000104, 0x94000053, | |
1595 | 0x24000102, 0x84000041, 0x64000000, 0xb40000d5, | |
1596 | 0x24000102, 0x64000069, 0x44000103, 0x94000005, | |
1597 | 0x24000102, 0x7400006e, 0x54000020, 0xc8010258, | |
1598 | 0x24000102, 0x64000106, 0x44000104, 0x94000048, | |
1599 | 0x24000102, 0x8400002e, 0x6400010a, 0xb40000ab, | |
1600 | 0x24000102, 0x64000072, 0x44000103, 0x94000034, | |
1601 | 0x24000102, 0x74000107, 0x54000105, 0xc80102fc, | |
1602 | 0x24000102, 0x74000064, 0x44000104, 0x940000c3, | |
1603 | 0x24000102, 0x84000067, 0x64000061, 0x42000130, | |
1604 | 0x24000102, 0x88010265, 0x44000103, 0xa8010277, | |
1605 | 0x24000102, 0x98010268, 0x78010220, 0x940000e0, | |
1606 | 0x24000102, 0x88010273, 0x44000104, 0x94000035, | |
1607 | 0x24000102, 0x74000108, 0x64000109, 0xa40000fd, | |
1608 | 0x24000102, 0x8801026f, 0x44000103, 0x94000006, | |
1609 | 0x24000102, 0x98010270, 0x54000105, 0xa4000059, | |
1610 | 0x24000102, 0x9801020a, 0x44000104, 0x94000049, | |
1611 | 0x24000102, 0xa8010230, 0x88010200, 0xb40000ae, | |
1612 | 0x24000102, 0x88010269, 0x44000103, 0x9400010d, | |
1613 | 0x24000102, 0x9801026d, 0xb8006920, 0xa4000023, | |
1614 | 0x24000102, 0x64000106, 0x44000104, 0x9400003f, | |
1615 | 0x24000102, 0xa8010204, 0x6400010a, 0xb4000088, | |
1616 | 0x24000102, 0x88010272, 0x44000103, 0x94000010, | |
1617 | 0x24000102, 0x98010275, 0x54000105, 0xa40000a6, | |
1618 | 0x24000102, 0x98010263, 0x44000104, 0x94000054, | |
1619 | 0x24000102, 0xa8010242, 0x88010261, 0xb40000d8, | |
1620 | 0x24000102, 0xc8006f65, 0x44000103, 0xa80102ff, | |
1621 | 0x24000102, 0x9801026c, 0x98010320, 0xa4000011, | |
1622 | 0x24000102, 0xc8006f73, 0x44000104, 0x94000039, | |
1623 | 0x24000102, 0xa8010201, 0x64000109, 0xb4000019, | |
1624 | 0x24000102, 0xc8006f6f, 0x44000103, 0x9400000b, | |
1625 | 0x24000102, 0x98010274, 0x54000105, 0xa400007c, | |
1626 | 0x24000102, 0x9801022c, 0x44000104, 0x9400004f, | |
1627 | 0x24000102, 0xa8010232, 0xc8006f00, 0xb40000c5, | |
1628 | 0x24000102, 0xc8006f69, 0x44000103, 0xa4000111, | |
1629 | 0x24000102, 0x9801026e, 0x54000020, 0xa400003d, | |
1630 | 0x24000102, 0x64000106, 0x44000104, 0x94000045, | |
1631 | 0x24000102, 0xa801022d, 0x6400010a, 0xb400009b, | |
1632 | 0x24000102, 0xc8006f72, 0x44000103, 0x94000028, | |
1633 | 0x24000102, 0x74000107, 0x54000105, 0xa40000e3, | |
1634 | 0x24000102, 0x98010264, 0x44000104, 0x94000080, | |
1635 | 0x24000102, 0xa8010266, 0xc8006f61, 0xb40000ec, | |
1636 | 0x24000102, 0xa8010365, 0x44000103, 0xa8010279, | |
1637 | 0x24000102, 0x74000068, 0x78010220, 0x940000fe, | |
1638 | 0x24000102, 0xa8010373, 0x44000104, 0x94000037, | |
1639 | 0x24000102, 0x74000108, 0x64000109, 0x3600000a, | |
1640 | 0x24000102, 0xa801036f, 0x44000103, 0x94000008, | |
1641 | 0x24000102, 0x74000070, 0x54000105, 0xa4000060, | |
1642 | 0x24000102, 0x7400000a, 0x44000104, 0x9400004d, | |
1643 | 0x24000102, 0xa8010231, 0xa8010300, 0xb40000b8, | |
1644 | 0x24000102, 0xa8010369, 0x44000103, 0x9400010f, | |
1645 | 0x24000102, 0x7400006d, 0xc8006d20, 0xa400002a, | |
1646 | 0x24000102, 0x64000106, 0x44000104, 0x94000043, | |
1647 | 0x24000102, 0xa801020d, 0x6400010a, 0xb4000093, | |
1648 | 0x24000102, 0xa8010372, 0x44000103, 0x94000022, | |
1649 | 0x24000102, 0x74000075, 0x54000105, 0xa40000c1, | |
1650 | 0x24000102, 0x74000063, 0x44000104, 0x94000076, | |
1651 | 0x24000102, 0xa8010262, 0xa8010361, 0xb40000e1, | |
1652 | 0x24000102, 0x64000065, 0x44000103, 0x9400010b, | |
1653 | 0x24000102, 0x7400006c, 0xa8002020, 0xa4000018, | |
1654 | 0x24000102, 0x64000073, 0x44000104, 0x9400003b, | |
1655 | 0x24000102, 0xa8010203, 0x64000109, 0xb400007d, | |
1656 | 0x24000102, 0x6400006f, 0x44000103, 0x9400000e, | |
1657 | 0x24000102, 0x74000074, 0x54000105, 0xa400008f, | |
1658 | 0x24000102, 0x7400002c, 0x44000104, 0x94000052, | |
1659 | 0x24000102, 0xa8010241, 0x64000000, 0xb40000ce, | |
1660 | 0x24000102, 0x64000069, 0x44000103, 0x94000002, | |
1661 | 0x24000102, 0x7400006e, 0x54000020, 0xa4000055, | |
1662 | 0x24000102, 0x64000106, 0x44000104, 0x94000047, | |
1663 | 0x24000102, 0xa801022e, 0x6400010a, 0xb40000a5, | |
1664 | 0x24000102, 0x64000072, 0x44000103, 0x94000033, | |
1665 | 0x24000102, 0x74000107, 0x54000105, 0xa40000f3, | |
1666 | 0x24000102, 0x74000064, 0x44000104, 0x940000c2, | |
1667 | 0x24000102, 0xa8010267, 0x64000061, 0xb40000f7, | |
1668 | 0x24000102, 0x88010265, 0x44000103, 0x84000077, | |
1669 | 0x24000102, 0xb8010468, 0x78010220, 0x940000f0, | |
1670 | 0x24000102, 0x88010273, 0x44000104, 0x94000036, | |
1671 | 0x24000102, 0x74000108, 0x64000109, 0xc400011e, | |
1672 | 0x24000102, 0x8801026f, 0x44000103, 0x94000007, | |
1673 | 0x24000102, 0xb8010470, 0x54000105, 0xa400005c, | |
1674 | 0x24000102, 0xb801040a, 0x44000104, 0x9400004c, | |
1675 | 0x24000102, 0x84000030, 0x88010200, 0xb40000b3, | |
1676 | 0x24000102, 0x88010269, 0x44000103, 0x9400010e, | |
1677 | 0x24000102, 0xb801046d, 0xb8010620, 0xa4000025, | |
1678 | 0x24000102, 0x64000106, 0x44000104, 0x94000040, | |
1679 | 0x24000102, 0x84000004, 0x6400010a, 0xb400008d, | |
1680 | 0x24000102, 0x88010272, 0x44000103, 0x9400001f, | |
1681 | 0x24000102, 0xb8010475, 0x54000105, 0xa40000b4, | |
1682 | 0x24000102, 0xb8010463, 0x44000104, 0x9400006b, | |
1683 | 0x24000102, 0x84000042, 0x88010261, 0xb40000dc, | |
1684 | 0x24000102, 0x64000065, 0x44000103, 0x840000ff, | |
1685 | 0x24000102, 0xb801046c, 0x98010420, 0xa4000013, | |
1686 | 0x24000102, 0x64000073, 0x44000104, 0x9400003a, | |
1687 | 0x24000102, 0x84000001, 0x64000109, 0xb400005a, | |
1688 | 0x24000102, 0x6400006f, 0x44000103, 0x9400000c, | |
1689 | 0x24000102, 0xb8010474, 0x54000105, 0xa400007f, | |
1690 | 0x24000102, 0xb801042c, 0x44000104, 0x94000050, | |
1691 | 0x24000102, 0x84000032, 0x64000000, 0xb40000ca, | |
1692 | 0x24000102, 0x64000069, 0x44000103, 0xa4000112, | |
1693 | 0x24000102, 0xb801046e, 0x54000020, 0xa400004b, | |
1694 | 0x24000102, 0x64000106, 0x44000104, 0x94000046, | |
1695 | 0x24000102, 0x8400002d, 0x6400010a, 0xb400009f, | |
1696 | 0x24000102, 0x64000072, 0x44000103, 0x94000029, | |
1697 | 0x24000102, 0x74000107, 0x54000105, 0xa40000e8, | |
1698 | 0x24000102, 0xb8010464, 0x44000104, 0x940000c0, | |
1699 | 0x24000102, 0x84000066, 0x64000061, 0xb40000f2, | |
1700 | 0x24000102, 0xb8010565, 0x44000103, 0x84000079, | |
1701 | 0x24000102, 0x74000068, 0x78010220, 0xb4000117, | |
1702 | 0x24000102, 0xb8010573, 0x44000104, 0x94000038, | |
1703 | 0x24000102, 0x74000108, 0x64000109, 0x3600001a, | |
1704 | 0x24000102, 0xb801056f, 0x44000103, 0x94000009, | |
1705 | 0x24000102, 0x74000070, 0x54000105, 0xa4000071, | |
1706 | 0x24000102, 0x7400000a, 0x44000104, 0x9400004e, | |
1707 | 0x24000102, 0x84000031, 0xb8010500, 0xb40000bc, | |
1708 | 0x24000102, 0xb8010569, 0x44000103, 0x94000110, | |
1709 | 0x24000102, 0x7400006d, 0x54000020, 0xa400002f, | |
1710 | 0x24000102, 0x64000106, 0x44000104, 0x94000044, | |
1711 | 0x24000102, 0x8400000d, 0x6400010a, 0xb4000097, | |
1712 | 0x24000102, 0xb8010572, 0x44000103, 0x94000027, | |
1713 | 0x24000102, 0x74000075, 0x54000105, 0xa40000cf, | |
1714 | 0x24000102, 0x74000063, 0x44000104, 0x94000078, | |
1715 | 0x24000102, 0x84000062, 0xb8010561, 0xb40000e6, | |
1716 | 0x24000102, 0x64000065, 0x44000103, 0x9400010c, | |
1717 | 0x24000102, 0x7400006c, 0xb8010a20, 0xa400001e, | |
1718 | 0x24000102, 0x64000073, 0x44000104, 0x9400003e, | |
1719 | 0x24000102, 0x84000003, 0x64000109, 0xb4000084, | |
1720 | 0x24000102, 0x6400006f, 0x44000103, 0x9400000f, | |
1721 | 0x24000102, 0x74000074, 0x54000105, 0xa40000a0, | |
1722 | 0x24000102, 0x7400002c, 0x44000104, 0x94000053, | |
1723 | 0x24000102, 0x84000041, 0x64000000, 0xb40000d4, | |
1724 | 0x24000102, 0x64000069, 0x44000103, 0x94000005, | |
1725 | 0x24000102, 0x7400006e, 0x54000020, 0xa4000057, | |
1726 | 0x24000102, 0x64000106, 0x44000104, 0x94000048, | |
1727 | 0x24000102, 0x8400002e, 0x6400010a, 0xb40000aa, | |
1728 | 0x24000102, 0x64000072, 0x44000103, 0x94000034, | |
1729 | 0x24000102, 0x74000107, 0x54000105, 0xa40000f9, | |
1730 | 0x24000102, 0x74000064, 0x44000104, 0x940000c3, | |
1731 | 0x24000102, 0x84000067, 0x64000061, 0xb4000200, | |
1732 | 0x24000102, 0x88010265, 0x44000103, 0x84000077, | |
1733 | 0x24000102, 0x98010268, 0x78010220, 0x940000e0, | |
1734 | 0x24000102, 0x88010273, 0x44000104, 0x94000035, | |
1735 | 0x24000102, 0x74000108, 0x64000109, 0xc400011a, | |
1736 | 0x24000102, 0x8801026f, 0x44000103, 0x94000006, | |
1737 | 0x24000102, 0x98010270, 0x54000105, 0xa400005b, | |
1738 | 0x24000102, 0x9801020a, 0x44000104, 0x94000049, | |
1739 | 0x24000102, 0x84000030, 0x88010200, 0xb40000b1, | |
1740 | 0x24000102, 0x88010269, 0x44000103, 0x9400010d, | |
1741 | 0x24000102, 0x9801026d, 0xb8007220, 0xa4000024, | |
1742 | 0x24000102, 0x64000106, 0x44000104, 0x9400003f, | |
1743 | 0x24000102, 0x84000004, 0x6400010a, 0xb400008a, | |
1744 | 0x24000102, 0x88010272, 0x44000103, 0x94000010, | |
1745 | 0x24000102, 0x98010275, 0x54000105, 0xa40000b0, | |
1746 | 0x24000102, 0x98010263, 0x44000104, 0x94000054, | |
1747 | 0x24000102, 0x84000042, 0x88010261, 0xb40000da, | |
1748 | 0x24000102, 0x64000065, 0x44000103, 0x840000ff, | |
1749 | 0x24000102, 0x9801026c, 0x98010320, 0xa4000012, | |
1750 | 0x24000102, 0x64000073, 0x44000104, 0x94000039, | |
1751 | 0x24000102, 0x84000001, 0x64000109, 0xb400001b, | |
1752 | 0x24000102, 0x6400006f, 0x44000103, 0x9400000b, | |
1753 | 0x24000102, 0x98010274, 0x54000105, 0xa400007e, | |
1754 | 0x24000102, 0x9801022c, 0x44000104, 0x9400004f, | |
1755 | 0x24000102, 0x84000032, 0x64000000, 0xb40000c8, | |
1756 | 0x24000102, 0x64000069, 0x44000103, 0xa4000113, | |
1757 | 0x24000102, 0x9801026e, 0x54000020, 0xa400004a, | |
1758 | 0x24000102, 0x64000106, 0x44000104, 0x94000045, | |
1759 | 0x24000102, 0x8400002d, 0x6400010a, 0xb400009d, | |
1760 | 0x24000102, 0x64000072, 0x44000103, 0x94000028, | |
1761 | 0x24000102, 0x74000107, 0x54000105, 0xa40000e7, | |
1762 | 0x24000102, 0x98010264, 0x44000104, 0x94000080, | |
1763 | 0x24000102, 0x84000066, 0x64000061, 0xb40000ee, | |
1764 | 0x24000102, 0xa8010465, 0x44000103, 0x84000079, | |
1765 | 0x24000102, 0x74000068, 0x78010220, 0x940000fe, | |
1766 | 0x24000102, 0xa8010473, 0x44000104, 0x94000037, | |
1767 | 0x24000102, 0x74000108, 0x64000109, 0x36000012, | |
1768 | 0x24000102, 0xa801046f, 0x44000103, 0x94000008, | |
1769 | 0x24000102, 0x74000070, 0x54000105, 0xa400006a, | |
1770 | 0x24000102, 0x7400000a, 0x44000104, 0x9400004d, | |
1771 | 0x24000102, 0x84000031, 0xa8010400, 0xb40000ba, | |
1772 | 0x24000102, 0xa8010469, 0x44000103, 0x9400010f, | |
1773 | 0x24000102, 0x7400006d, 0xc8007520, 0xa400002b, | |
1774 | 0x24000102, 0x64000106, 0x44000104, 0x94000043, | |
1775 | 0x24000102, 0x8400000d, 0x6400010a, 0xb4000095, | |
1776 | 0x24000102, 0xa8010472, 0x44000103, 0x94000022, | |
1777 | 0x24000102, 0x74000075, 0x54000105, 0xa40000c7, | |
1778 | 0x24000102, 0x74000063, 0x44000104, 0x94000076, | |
1779 | 0x24000102, 0x84000062, 0xa8010461, 0xb40000e4, | |
1780 | 0x24000102, 0x64000065, 0x44000103, 0x9400010b, | |
1781 | 0x24000102, 0x7400006c, 0xa8010520, 0xa400001c, | |
1782 | 0x24000102, 0x64000073, 0x44000104, 0x9400003b, | |
1783 | 0x24000102, 0x84000003, 0x64000109, 0xb4000082, | |
1784 | 0x24000102, 0x6400006f, 0x44000103, 0x9400000e, | |
1785 | 0x24000102, 0x74000074, 0x54000105, 0xa4000090, | |
1786 | 0x24000102, 0x7400002c, 0x44000104, 0x94000052, | |
1787 | 0x24000102, 0x84000041, 0x64000000, 0xb40000d2, | |
1788 | 0x24000102, 0x64000069, 0x44000103, 0x94000002, | |
1789 | 0x24000102, 0x7400006e, 0x54000020, 0xa4000056, | |
1790 | 0x24000102, 0x64000106, 0x44000104, 0x94000047, | |
1791 | 0x24000102, 0x8400002e, 0x6400010a, 0xb40000a8, | |
1792 | 0x24000102, 0x64000072, 0x44000103, 0x94000033, | |
1793 | 0x24000102, 0x74000107, 0x54000105, 0xa40000f8, | |
1794 | 0x24000102, 0x74000064, 0x44000104, 0x940000c2, | |
1795 | 0x24000102, 0x84000067, 0x64000061, 0xb40000fb, | |
1796 | 0x24000102, 0x88010265, 0x44000103, 0x84000077, | |
1797 | 0x24000102, 0x74000068, 0x78010220, 0x940000f0, | |
1798 | 0x24000102, 0x88010273, 0x44000104, 0x94000036, | |
1799 | 0x24000102, 0x74000108, 0x64000109, 0x36000002, | |
1800 | 0x24000102, 0x8801026f, 0x44000103, 0x94000007, | |
1801 | 0x24000102, 0x74000070, 0x54000105, 0xa400005f, | |
1802 | 0x24000102, 0x7400000a, 0x44000104, 0x9400004c, | |
1803 | 0x24000102, 0x84000030, 0x88010200, 0xb40000b6, | |
1804 | 0x24000102, 0x88010269, 0x44000103, 0x9400010e, | |
1805 | 0x24000102, 0x7400006d, 0xc8006320, 0xa4000026, | |
1806 | 0x24000102, 0x64000106, 0x44000104, 0x94000040, | |
1807 | 0x24000102, 0x84000004, 0x6400010a, 0xb4000091, | |
1808 | 0x24000102, 0x88010272, 0x44000103, 0x9400001f, | |
1809 | 0x24000102, 0x74000075, 0x54000105, 0xa40000bd, | |
1810 | 0x24000102, 0x74000063, 0x44000104, 0x9400006b, | |
1811 | 0x24000102, 0x84000042, 0x88010261, 0xb40000de, | |
1812 | 0x24000102, 0x64000065, 0x44000103, 0x840000ff, | |
1813 | 0x24000102, 0x7400006c, 0x98010420, 0xa4000014, | |
1814 | 0x24000102, 0x64000073, 0x44000104, 0x9400003a, | |
1815 | 0x24000102, 0x84000001, 0x64000109, 0xb400005e, | |
1816 | 0x24000102, 0x6400006f, 0x44000103, 0x9400000c, | |
1817 | 0x24000102, 0x74000074, 0x54000105, 0xa400008b, | |
1818 | 0x24000102, 0x7400002c, 0x44000104, 0x94000050, | |
1819 | 0x24000102, 0x84000032, 0x64000000, 0xb40000cc, | |
1820 | 0x24000102, 0x64000069, 0x44000103, 0xa4000114, | |
1821 | 0x24000102, 0x7400006e, 0x54000020, 0xa4000051, | |
1822 | 0x24000102, 0x64000106, 0x44000104, 0x94000046, | |
1823 | 0x24000102, 0x8400002d, 0x6400010a, 0xb40000a2, | |
1824 | 0x24000102, 0x64000072, 0x44000103, 0x94000029, | |
1825 | 0x24000102, 0x74000107, 0x54000105, 0xa40000f1, | |
1826 | 0x24000102, 0x74000064, 0x44000104, 0x940000c0, | |
1827 | 0x24000102, 0x84000066, 0x64000061, 0xb40000f5, | |
1828 | 0x24000102, 0xc8000065, 0x44000103, 0x84000079, | |
1829 | 0x24000102, 0x74000068, 0x78010220, 0xb4000118, | |
1830 | 0x24000102, 0xc8000073, 0x44000104, 0x94000038, | |
1831 | 0x24000102, 0x74000108, 0x64000109, 0xb4000016, | |
1832 | 0x24000102, 0xc800006f, 0x44000103, 0x94000009, | |
1833 | 0x24000102, 0x74000070, 0x54000105, 0xa400007a, | |
1834 | 0x24000102, 0x7400000a, 0x44000104, 0x9400004e, | |
1835 | 0x24000102, 0x84000031, 0xc8000000, 0xb40000bf, | |
1836 | 0x24000102, 0xc8000069, 0x44000103, 0x94000110, | |
1837 | 0x24000102, 0x7400006d, 0x54000020, 0xa400003c, | |
1838 | 0x24000102, 0x64000106, 0x44000104, 0x94000044, | |
1839 | 0x24000102, 0x8400000d, 0x6400010a, 0xb4000099, | |
1840 | 0x24000102, 0xc8000072, 0x44000103, 0x94000027, | |
1841 | 0x24000102, 0x74000075, 0x54000105, 0xa40000d0, | |
1842 | 0x24000102, 0x74000063, 0x44000104, 0x94000078, | |
1843 | 0x24000102, 0x84000062, 0xc8000061, 0xb40000ea, | |
1844 | 0x24000102, 0x64000065, 0x44000103, 0x9400010c, | |
1845 | 0x24000102, 0x7400006c, 0xb8006120, 0xa4000021, | |
1846 | 0x24000102, 0x64000073, 0x44000104, 0x9400003e, | |
1847 | 0x24000102, 0x84000003, 0x64000109, 0xb4000086, | |
1848 | 0x24000102, 0x6400006f, 0x44000103, 0x9400000f, | |
1849 | 0x24000102, 0x74000074, 0x54000105, 0xa40000a4, | |
1850 | 0x24000102, 0x7400002c, 0x44000104, 0x94000053, | |
1851 | 0x24000102, 0x84000041, 0x64000000, 0xb40000d6, | |
1852 | 0x24000102, 0x64000069, 0x44000103, 0x94000005, | |
1853 | 0x24000102, 0x7400006e, 0x54000020, 0xa4000058, | |
1854 | 0x24000102, 0x64000106, 0x44000104, 0x94000048, | |
1855 | 0x24000102, 0x8400002e, 0x6400010a, 0xb40000ac, | |
1856 | 0x24000102, 0x64000072, 0x44000103, 0x94000034, | |
1857 | 0x24000102, 0x74000107, 0x54000105, 0xa40000fc, | |
1858 | 0x24000102, 0x74000064, 0x44000104, 0x940000c3, | |
1859 | 0x24000102, 0x84000067, 0x64000061, 0x46000140, | |
1860 | 0x24000102, 0x88010265, 0x44000103, 0xa8010277, | |
1861 | 0x24000102, 0x98010268, 0x78010220, 0xb80102e0, | |
1862 | 0x24000102, 0x88010273, 0x44000104, 0xb8010235, | |
1863 | 0x24000102, 0x74000108, 0x64000109, 0xa40000fd, | |
1864 | 0x24000102, 0x8801026f, 0x44000103, 0xb8010206, | |
1865 | 0x24000102, 0x98010270, 0x54000105, 0xa4000059, | |
1866 | 0x24000102, 0x9801020a, 0x44000104, 0xb8010249, | |
1867 | 0x24000102, 0xa8010230, 0x88010200, 0xb40000ad, | |
1868 | 0x24000102, 0x88010269, 0x44000103, 0x9400010d, | |
1869 | 0x24000102, 0x9801026d, 0xb8006520, 0xa4000023, | |
1870 | 0x24000102, 0x64000106, 0x44000104, 0xb801023f, | |
1871 | 0x24000102, 0xa8010204, 0x6400010a, 0xb4000087, | |
1872 | 0x24000102, 0x88010272, 0x44000103, 0xb8010210, | |
1873 | 0x24000102, 0x98010275, 0x54000105, 0xa40000a6, | |
1874 | 0x24000102, 0x98010263, 0x44000104, 0xb8010254, | |
1875 | 0x24000102, 0xa8010242, 0x88010261, 0xb40000d7, | |
1876 | 0x24000102, 0xc8006965, 0x44000103, 0xa80102ff, | |
1877 | 0x24000102, 0x9801026c, 0x98010320, 0xa4000011, | |
1878 | 0x24000102, 0xc8006973, 0x44000104, 0xb8010239, | |
1879 | 0x24000102, 0xa8010201, 0x64000109, 0xb4000017, | |
1880 | 0x24000102, 0xc800696f, 0x44000103, 0xb801020b, | |
1881 | 0x24000102, 0x98010274, 0x54000105, 0xa400007c, | |
1882 | 0x24000102, 0x9801022c, 0x44000104, 0xb801024f, | |
1883 | 0x24000102, 0xa8010232, 0xc8006900, 0xb40000c4, | |
1884 | 0x24000102, 0xc8006969, 0x44000103, 0xa4000111, | |
1885 | 0x24000102, 0x9801026e, 0x54000020, 0xa400003d, | |
1886 | 0x24000102, 0x64000106, 0x44000104, 0xb8010245, | |
1887 | 0x24000102, 0xa801022d, 0x6400010a, 0xb400009a, | |
1888 | 0x24000102, 0xc8006972, 0x44000103, 0xb8010228, | |
1889 | 0x24000102, 0x74000107, 0x54000105, 0xa40000e3, | |
1890 | 0x24000102, 0x98010264, 0x44000104, 0xb8010280, | |
1891 | 0x24000102, 0xa8010266, 0xc8006961, 0xb40000eb, | |
1892 | 0x24000102, 0xa8010365, 0x44000103, 0xa8010279, | |
1893 | 0x24000102, 0x74000068, 0x78010220, 0xb80102fe, | |
1894 | 0x24000102, 0xa8010373, 0x44000104, 0xb8010237, | |
1895 | 0x24000102, 0x74000108, 0x64000109, 0x3600000c, | |
1896 | 0x24000102, 0xa801036f, 0x44000103, 0xb8010208, | |
1897 | 0x24000102, 0x74000070, 0x54000105, 0xa4000060, | |
1898 | 0x24000102, 0x7400000a, 0x44000104, 0xb801024d, | |
1899 | 0x24000102, 0xa8010231, 0xa8010300, 0xb40000b7, | |
1900 | 0x24000102, 0xa8010369, 0x44000103, 0x9400010f, | |
1901 | 0x24000102, 0x7400006d, 0xc8006c20, 0xa400002a, | |
1902 | 0x24000102, 0x64000106, 0x44000104, 0xb8010243, | |
1903 | 0x24000102, 0xa801020d, 0x6400010a, 0xb4000092, | |
1904 | 0x24000102, 0xa8010372, 0x44000103, 0xb8010222, | |
1905 | 0x24000102, 0x74000075, 0x54000105, 0xa40000c1, | |
1906 | 0x24000102, 0x74000063, 0x44000104, 0xb8010276, | |
1907 | 0x24000102, 0xa8010262, 0xa8010361, 0xb40000df, | |
1908 | 0x24000102, 0x64000065, 0x44000103, 0x9400010b, | |
1909 | 0x24000102, 0x7400006c, 0xa8002020, 0xa4000018, | |
1910 | 0x24000102, 0x64000073, 0x44000104, 0xb801023b, | |
1911 | 0x24000102, 0xa8010203, 0x64000109, 0xb400007b, | |
1912 | 0x24000102, 0x6400006f, 0x44000103, 0xb801020e, | |
1913 | 0x24000102, 0x74000074, 0x54000105, 0xa400008f, | |
1914 | 0x24000102, 0x7400002c, 0x44000104, 0xb8010252, | |
1915 | 0x24000102, 0xa8010241, 0x64000000, 0xb40000cd, | |
1916 | 0x24000102, 0x64000069, 0x44000103, 0xb8010202, | |
1917 | 0x24000102, 0x7400006e, 0x54000020, 0xa4000055, | |
1918 | 0x24000102, 0x64000106, 0x44000104, 0xb8010247, | |
1919 | 0x24000102, 0xa801022e, 0x6400010a, 0xb40000a3, | |
1920 | 0x24000102, 0x64000072, 0x44000103, 0xb8010233, | |
1921 | 0x24000102, 0x74000107, 0x54000105, 0xa40000f3, | |
1922 | 0x24000102, 0x74000064, 0x44000104, 0xb80102c2, | |
1923 | 0x24000102, 0xa8010267, 0x64000061, 0xb40000f6, | |
1924 | 0x24000102, 0x88010265, 0x44000103, 0x84000077, | |
1925 | 0x24000102, 0xb8010368, 0x78010220, 0xb80102f0, | |
1926 | 0x24000102, 0x88010273, 0x44000104, 0xb8010236, | |
1927 | 0x24000102, 0x74000108, 0x64000109, 0xc400011f, | |
1928 | 0x24000102, 0x8801026f, 0x44000103, 0xb8010207, | |
1929 | 0x24000102, 0xb8010370, 0x54000105, 0xa400005c, | |
1930 | 0x24000102, 0xb801030a, 0x44000104, 0xb801024c, | |
1931 | 0x24000102, 0x84000030, 0x88010200, 0xb40000b2, | |
1932 | 0x24000102, 0x88010269, 0x44000103, 0x9400010e, | |
1933 | 0x24000102, 0xb801036d, 0xb8007320, 0xa4000025, | |
1934 | 0x24000102, 0x64000106, 0x44000104, 0xb8010240, | |
1935 | 0x24000102, 0x84000004, 0x6400010a, 0xb400008c, | |
1936 | 0x24000102, 0x88010272, 0x44000103, 0xb801021f, | |
1937 | 0x24000102, 0xb8010375, 0x54000105, 0xa40000b4, | |
1938 | 0x24000102, 0xb8010363, 0x44000104, 0xb801026b, | |
1939 | 0x24000102, 0x84000042, 0x88010261, 0xb40000db, | |
1940 | 0x24000102, 0x64000065, 0x44000103, 0x840000ff, | |
1941 | 0x24000102, 0xb801036c, 0x98010420, 0xa4000013, | |
1942 | 0x24000102, 0x64000073, 0x44000104, 0xb801023a, | |
1943 | 0x24000102, 0x84000001, 0x64000109, 0xb400001d, | |
1944 | 0x24000102, 0x6400006f, 0x44000103, 0xb801020c, | |
1945 | 0x24000102, 0xb8010374, 0x54000105, 0xa400007f, | |
1946 | 0x24000102, 0xb801032c, 0x44000104, 0xb8010250, | |
1947 | 0x24000102, 0x84000032, 0x64000000, 0xb40000c9, | |
1948 | 0x24000102, 0x64000069, 0x44000103, 0xa4000112, | |
1949 | 0x24000102, 0xb801036e, 0x54000020, 0xa400004b, | |
1950 | 0x24000102, 0x64000106, 0x44000104, 0xb8010246, | |
1951 | 0x24000102, 0x8400002d, 0x6400010a, 0xb400009e, | |
1952 | 0x24000102, 0x64000072, 0x44000103, 0xb8010229, | |
1953 | 0x24000102, 0x74000107, 0x54000105, 0xa40000e8, | |
1954 | 0x24000102, 0xb8010364, 0x44000104, 0xb80102c0, | |
1955 | 0x24000102, 0x84000066, 0x64000061, 0xb40000ef, | |
1956 | 0x24000102, 0xb8002065, 0x44000103, 0x84000079, | |
1957 | 0x24000102, 0x74000068, 0x78010220, 0xb4000115, | |
1958 | 0x24000102, 0xb8002073, 0x44000104, 0xb8010238, | |
1959 | 0x24000102, 0x74000108, 0x64000109, 0x3600001c, | |
1960 | 0x24000102, 0xb800206f, 0x44000103, 0xb8010209, | |
1961 | 0x24000102, 0x74000070, 0x54000105, 0xa4000071, | |
1962 | 0x24000102, 0x7400000a, 0x44000104, 0xb801024e, | |
1963 | 0x24000102, 0x84000031, 0xb8002000, 0xb40000bb, | |
1964 | 0x24000102, 0xb8002069, 0x44000103, 0x94000110, | |
1965 | 0x24000102, 0x7400006d, 0x54000020, 0xa400002f, | |
1966 | 0x24000102, 0x64000106, 0x44000104, 0xb8010244, | |
1967 | 0x24000102, 0x8400000d, 0x6400010a, 0xb4000096, | |
1968 | 0x24000102, 0xb8002072, 0x44000103, 0xb8010227, | |
1969 | 0x24000102, 0x74000075, 0x54000105, 0xa40000cf, | |
1970 | 0x24000102, 0x74000063, 0x44000104, 0xb8010278, | |
1971 | 0x24000102, 0x84000062, 0xb8002061, 0xb40000e5, | |
1972 | 0x24000102, 0x64000065, 0x44000103, 0x9400010c, | |
1973 | 0x24000102, 0x7400006c, 0xb8010920, 0xa400001e, | |
1974 | 0x24000102, 0x64000073, 0x44000104, 0xb801023e, | |
1975 | 0x24000102, 0x84000003, 0x64000109, 0xb4000083, | |
1976 | 0x24000102, 0x6400006f, 0x44000103, 0xb801020f, | |
1977 | 0x24000102, 0x74000074, 0x54000105, 0xa40000a0, | |
1978 | 0x24000102, 0x7400002c, 0x44000104, 0xb8010253, | |
1979 | 0x24000102, 0x84000041, 0x64000000, 0xb40000d3, | |
1980 | 0x24000102, 0x64000069, 0x44000103, 0xb8010205, | |
1981 | 0x24000102, 0x7400006e, 0x54000020, 0xa4000057, | |
1982 | 0x24000102, 0x64000106, 0x44000104, 0xb8010248, | |
1983 | 0x24000102, 0x8400002e, 0x6400010a, 0xb40000a9, | |
1984 | 0x24000102, 0x64000072, 0x44000103, 0xb8010234, | |
1985 | 0x24000102, 0x74000107, 0x54000105, 0xa40000f9, | |
1986 | 0x24000102, 0x74000064, 0x44000104, 0xb80102c3, | |
1987 | 0x24000102, 0x84000067, 0x64000061, 0x3e000128, | |
1988 | 0x24000102, 0x88010265, 0x44000103, 0xc8010477, | |
1989 | 0x24000102, 0x98010268, 0x78010220, 0x940000e0, | |
1990 | 0x24000102, 0x88010273, 0x44000104, 0x94000035, | |
1991 | 0x24000102, 0x74000108, 0x64000109, 0xc400011b, | |
1992 | 0x24000102, 0x8801026f, 0x44000103, 0x94000006, | |
1993 | 0x24000102, 0x98010270, 0x54000105, 0xa400005b, | |
1994 | 0x24000102, 0x9801020a, 0x44000104, 0x94000049, | |
1995 | 0x24000102, 0xc8010430, 0x88010200, 0xb40000af, | |
1996 | 0x24000102, 0x88010269, 0x44000103, 0x9400010d, | |
1997 | 0x24000102, 0x9801026d, 0xb8006f20, 0xa4000024, | |
1998 | 0x24000102, 0x64000106, 0x44000104, 0x9400003f, | |
1999 | 0x24000102, 0xc8010404, 0x6400010a, 0xb4000089, | |
2000 | 0x24000102, 0x88010272, 0x44000103, 0x94000010, | |
2001 | 0x24000102, 0x98010275, 0x54000105, 0xa40000b0, | |
2002 | 0x24000102, 0x98010263, 0x44000104, 0x94000054, | |
2003 | 0x24000102, 0xc8010442, 0x88010261, 0xb40000d9, | |
2004 | 0x24000102, 0xc8010665, 0x44000103, 0xc80104ff, | |
2005 | 0x24000102, 0x9801026c, 0x98010320, 0xa4000012, | |
2006 | 0x24000102, 0xc8010673, 0x44000104, 0x94000039, | |
2007 | 0x24000102, 0xc8010401, 0x64000109, 0xb400001a, | |
2008 | 0x24000102, 0xc801066f, 0x44000103, 0x9400000b, | |
2009 | 0x24000102, 0x98010274, 0x54000105, 0xa400007e, | |
2010 | 0x24000102, 0x9801022c, 0x44000104, 0x9400004f, | |
2011 | 0x24000102, 0xc8010432, 0xc8010600, 0xb40000c6, | |
2012 | 0x24000102, 0xc8010669, 0x44000103, 0xa4000113, | |
2013 | 0x24000102, 0x9801026e, 0x54000020, 0xa400004a, | |
2014 | 0x24000102, 0x64000106, 0x44000104, 0x94000045, | |
2015 | 0x24000102, 0xc801042d, 0x6400010a, 0xb400009c, | |
2016 | 0x24000102, 0xc8010672, 0x44000103, 0x94000028, | |
2017 | 0x24000102, 0x74000107, 0x54000105, 0xa40000e7, | |
2018 | 0x24000102, 0x98010264, 0x44000104, 0x94000080, | |
2019 | 0x24000102, 0xc8010466, 0xc8010661, 0xb40000ed, | |
2020 | 0x24000102, 0xa8010465, 0x44000103, 0xc8010479, | |
2021 | 0x24000102, 0x74000068, 0x78010220, 0x940000fe, | |
2022 | 0x24000102, 0xa8010473, 0x44000104, 0x94000037, | |
2023 | 0x24000102, 0x74000108, 0x64000109, 0x36000014, | |
2024 | 0x24000102, 0xa801046f, 0x44000103, 0x94000008, | |
2025 | 0x24000102, 0x74000070, 0x54000105, 0xa400006a, | |
2026 | 0x24000102, 0x7400000a, 0x44000104, 0x9400004d, | |
2027 | 0x24000102, 0xc8010431, 0xa8010400, 0xb40000b9, | |
2028 | 0x24000102, 0xa8010469, 0x44000103, 0x9400010f, | |
2029 | 0x24000102, 0x7400006d, 0xc8007420, 0xa400002b, | |
2030 | 0x24000102, 0x64000106, 0x44000104, 0x94000043, | |
2031 | 0x24000102, 0xc801040d, 0x6400010a, 0xb4000094, | |
2032 | 0x24000102, 0xa8010472, 0x44000103, 0x94000022, | |
2033 | 0x24000102, 0x74000075, 0x54000105, 0xa40000c7, | |
2034 | 0x24000102, 0x74000063, 0x44000104, 0x94000076, | |
2035 | 0x24000102, 0xc8010462, 0xa8010461, 0xb40000e2, | |
2036 | 0x24000102, 0x64000065, 0x44000103, 0x9400010b, | |
2037 | 0x24000102, 0x7400006c, 0xa8010520, 0xa400001c, | |
2038 | 0x24000102, 0x64000073, 0x44000104, 0x9400003b, | |
2039 | 0x24000102, 0xc8010403, 0x64000109, 0xb4000081, | |
2040 | 0x24000102, 0x6400006f, 0x44000103, 0x9400000e, | |
2041 | 0x24000102, 0x74000074, 0x54000105, 0xa4000090, | |
2042 | 0x24000102, 0x7400002c, 0x44000104, 0x94000052, | |
2043 | 0x24000102, 0xc8010441, 0x64000000, 0xb40000d1, | |
2044 | 0x24000102, 0x64000069, 0x44000103, 0x94000002, | |
2045 | 0x24000102, 0x7400006e, 0x54000020, 0xa4000056, | |
2046 | 0x24000102, 0x64000106, 0x44000104, 0x94000047, | |
2047 | 0x24000102, 0xc801042e, 0x6400010a, 0xb40000a7, | |
2048 | 0x24000102, 0x64000072, 0x44000103, 0x94000033, | |
2049 | 0x24000102, 0x74000107, 0x54000105, 0xa40000f8, | |
2050 | 0x24000102, 0x74000064, 0x44000104, 0x940000c2, | |
2051 | 0x24000102, 0xc8010467, 0x64000061, 0xb40000fa, | |
2052 | 0x24000102, 0x88010265, 0x44000103, 0x84000077, | |
2053 | 0x24000102, 0xc8010568, 0x78010220, 0x940000f0, | |
2054 | 0x24000102, 0x88010273, 0x44000104, 0x94000036, | |
2055 | 0x24000102, 0x74000108, 0x64000109, 0x36000004, | |
2056 | 0x24000102, 0x8801026f, 0x44000103, 0x94000007, | |
2057 | 0x24000102, 0xc8010570, 0x54000105, 0xa400005f, | |
2058 | 0x24000102, 0xc801050a, 0x44000104, 0x9400004c, | |
2059 | 0x24000102, 0x84000030, 0x88010200, 0xb40000b5, | |
2060 | 0x24000102, 0x88010269, 0x44000103, 0x9400010e, | |
2061 | 0x24000102, 0xc801056d, 0xc8002c20, 0xa4000026, | |
2062 | 0x24000102, 0x64000106, 0x44000104, 0x94000040, | |
2063 | 0x24000102, 0x84000004, 0x6400010a, 0xb400008e, | |
2064 | 0x24000102, 0x88010272, 0x44000103, 0x9400001f, | |
2065 | 0x24000102, 0xc8010575, 0x54000105, 0xa40000bd, | |
2066 | 0x24000102, 0xc8010563, 0x44000104, 0x9400006b, | |
2067 | 0x24000102, 0x84000042, 0x88010261, 0xb40000dd, | |
2068 | 0x24000102, 0x64000065, 0x44000103, 0x840000ff, | |
2069 | 0x24000102, 0xc801056c, 0x98010420, 0xa4000014, | |
2070 | 0x24000102, 0x64000073, 0x44000104, 0x9400003a, | |
2071 | 0x24000102, 0x84000001, 0x64000109, 0xb400005d, | |
2072 | 0x24000102, 0x6400006f, 0x44000103, 0x9400000c, | |
2073 | 0x24000102, 0xc8010574, 0x54000105, 0xa400008b, | |
2074 | 0x24000102, 0xc801052c, 0x44000104, 0x94000050, | |
2075 | 0x24000102, 0x84000032, 0x64000000, 0xb40000cb, | |
2076 | 0x24000102, 0x64000069, 0x44000103, 0xa4000114, | |
2077 | 0x24000102, 0xc801056e, 0x54000020, 0xa4000051, | |
2078 | 0x24000102, 0x64000106, 0x44000104, 0x94000046, | |
2079 | 0x24000102, 0x8400002d, 0x6400010a, 0xb40000a1, | |
2080 | 0x24000102, 0x64000072, 0x44000103, 0x94000029, | |
2081 | 0x24000102, 0x74000107, 0x54000105, 0xa40000f1, | |
2082 | 0x24000102, 0xc8010564, 0x44000104, 0x940000c0, | |
2083 | 0x24000102, 0x84000066, 0x64000061, 0xb40000f4, | |
2084 | 0x24000102, 0xc8010a65, 0x44000103, 0x84000079, | |
2085 | 0x24000102, 0x74000068, 0x78010220, 0xb4000116, | |
2086 | 0x24000102, 0xc8010a73, 0x44000104, 0x94000038, | |
2087 | 0x24000102, 0x74000108, 0x64000109, 0xb4000015, | |
2088 | 0x24000102, 0xc8010a6f, 0x44000103, 0x94000009, | |
2089 | 0x24000102, 0x74000070, 0x54000105, 0xa400007a, | |
2090 | 0x24000102, 0x7400000a, 0x44000104, 0x9400004e, | |
2091 | 0x24000102, 0x84000031, 0xc8010a00, 0xb40000be, | |
2092 | 0x24000102, 0xc8010a69, 0x44000103, 0x94000110, | |
2093 | 0x24000102, 0x7400006d, 0x54000020, 0xa400003c, | |
2094 | 0x24000102, 0x64000106, 0x44000104, 0x94000044, | |
2095 | 0x24000102, 0x8400000d, 0x6400010a, 0xb4000098, | |
2096 | 0x24000102, 0xc8010a72, 0x44000103, 0x94000027, | |
2097 | 0x24000102, 0x74000075, 0x54000105, 0xa40000d0, | |
2098 | 0x24000102, 0x74000063, 0x44000104, 0x94000078, | |
2099 | 0x24000102, 0x84000062, 0xc8010a61, 0xb40000e9, | |
2100 | 0x24000102, 0x64000065, 0x44000103, 0x9400010c, | |
2101 | 0x24000102, 0x7400006c, 0xb8000020, 0xa4000021, | |
2102 | 0x24000102, 0x64000073, 0x44000104, 0x9400003e, | |
2103 | 0x24000102, 0x84000003, 0x64000109, 0xb4000085, | |
2104 | 0x24000102, 0x6400006f, 0x44000103, 0x9400000f, | |
2105 | 0x24000102, 0x74000074, 0x54000105, 0xa40000a4, | |
2106 | 0x24000102, 0x7400002c, 0x44000104, 0x94000053, | |
2107 | 0x24000102, 0x84000041, 0x64000000, 0xb40000d5, | |
2108 | 0x24000102, 0x64000069, 0x44000103, 0x94000005, | |
2109 | 0x24000102, 0x7400006e, 0x54000020, 0xa4000058, | |
2110 | 0x24000102, 0x64000106, 0x44000104, 0x94000048, | |
2111 | 0x24000102, 0x8400002e, 0x6400010a, 0xb40000ab, | |
2112 | 0x24000102, 0x64000072, 0x44000103, 0x94000034, | |
2113 | 0x24000102, 0x74000107, 0x54000105, 0xa40000fc, | |
2114 | 0x24000102, 0x74000064, 0x44000104, 0x940000c3, | |
2115 | 0x24000102, 0x84000067, 0x64000061, 0x46000160, | |
2116 | 0x24000102, 0x88010265, 0x44000103, 0xa8010277, | |
2117 | 0x24000102, 0x98010268, 0x78010220, 0x940000e0, | |
2118 | 0x24000102, 0x88010273, 0x44000104, 0x94000035, | |
2119 | 0x24000102, 0x74000108, 0x64000109, 0xa40000fd, | |
2120 | 0x24000102, 0x8801026f, 0x44000103, 0x94000006, | |
2121 | 0x24000102, 0x98010270, 0x54000105, 0xa4000059, | |
2122 | 0x24000102, 0x9801020a, 0x44000104, 0x94000049, | |
2123 | 0x24000102, 0xa8010230, 0x88010200, 0xb40000ae, | |
2124 | 0x24000102, 0x88010269, 0x44000103, 0x9400010d, | |
2125 | 0x24000102, 0x9801026d, 0xb8006920, 0xa4000023, | |
2126 | 0x24000102, 0x64000106, 0x44000104, 0x9400003f, | |
2127 | 0x24000102, 0xa8010204, 0x6400010a, 0xb4000088, | |
2128 | 0x24000102, 0x88010272, 0x44000103, 0x94000010, | |
2129 | 0x24000102, 0x98010275, 0x54000105, 0xa40000a6, | |
2130 | 0x24000102, 0x98010263, 0x44000104, 0x94000054, | |
2131 | 0x24000102, 0xa8010242, 0x88010261, 0xb40000d8, | |
2132 | 0x24000102, 0xc8007265, 0x44000103, 0xa80102ff, | |
2133 | 0x24000102, 0x9801026c, 0x98010320, 0xa4000011, | |
2134 | 0x24000102, 0xc8007273, 0x44000104, 0x94000039, | |
2135 | 0x24000102, 0xa8010201, 0x64000109, 0xb4000019, | |
2136 | 0x24000102, 0xc800726f, 0x44000103, 0x9400000b, | |
2137 | 0x24000102, 0x98010274, 0x54000105, 0xa400007c, | |
2138 | 0x24000102, 0x9801022c, 0x44000104, 0x9400004f, | |
2139 | 0x24000102, 0xa8010232, 0xc8007200, 0xb40000c5, | |
2140 | 0x24000102, 0xc8007269, 0x44000103, 0xa4000111, | |
2141 | 0x24000102, 0x9801026e, 0x54000020, 0xa400003d, | |
2142 | 0x24000102, 0x64000106, 0x44000104, 0x94000045, | |
2143 | 0x24000102, 0xa801022d, 0x6400010a, 0xb400009b, | |
2144 | 0x24000102, 0xc8007272, 0x44000103, 0x94000028, | |
2145 | 0x24000102, 0x74000107, 0x54000105, 0xa40000e3, | |
2146 | 0x24000102, 0x98010264, 0x44000104, 0x94000080, | |
2147 | 0x24000102, 0xa8010266, 0xc8007261, 0xb40000ec, | |
2148 | 0x24000102, 0xa8010365, 0x44000103, 0xa8010279, | |
2149 | 0x24000102, 0x74000068, 0x78010220, 0x940000fe, | |
2150 | 0x24000102, 0xa8010373, 0x44000104, 0x94000037, | |
2151 | 0x24000102, 0x74000108, 0x64000109, 0x3600000e, | |
2152 | 0x24000102, 0xa801036f, 0x44000103, 0x94000008, | |
2153 | 0x24000102, 0x74000070, 0x54000105, 0xa4000060, | |
2154 | 0x24000102, 0x7400000a, 0x44000104, 0x9400004d, | |
2155 | 0x24000102, 0xa8010231, 0xa8010300, 0xb40000b8, | |
2156 | 0x24000102, 0xa8010369, 0x44000103, 0x9400010f, | |
2157 | 0x24000102, 0x7400006d, 0xc8006e20, 0xa400002a, | |
2158 | 0x24000102, 0x64000106, 0x44000104, 0x94000043, | |
2159 | 0x24000102, 0xa801020d, 0x6400010a, 0xb4000093, | |
2160 | 0x24000102, 0xa8010372, 0x44000103, 0x94000022, | |
2161 | 0x24000102, 0x74000075, 0x54000105, 0xa40000c1, | |
2162 | 0x24000102, 0x74000063, 0x44000104, 0x94000076, | |
2163 | 0x24000102, 0xa8010262, 0xa8010361, 0xb40000e1, | |
2164 | 0x24000102, 0x64000065, 0x44000103, 0x9400010b, | |
2165 | 0x24000102, 0x7400006c, 0xa8002020, 0xa4000018, | |
2166 | 0x24000102, 0x64000073, 0x44000104, 0x9400003b, | |
2167 | 0x24000102, 0xa8010203, 0x64000109, 0xb400007d, | |
2168 | 0x24000102, 0x6400006f, 0x44000103, 0x9400000e, | |
2169 | 0x24000102, 0x74000074, 0x54000105, 0xa400008f, | |
2170 | 0x24000102, 0x7400002c, 0x44000104, 0x94000052, | |
2171 | 0x24000102, 0xa8010241, 0x64000000, 0xb40000ce, | |
2172 | 0x24000102, 0x64000069, 0x44000103, 0x94000002, | |
2173 | 0x24000102, 0x7400006e, 0x54000020, 0xa4000055, | |
2174 | 0x24000102, 0x64000106, 0x44000104, 0x94000047, | |
2175 | 0x24000102, 0xa801022e, 0x6400010a, 0xb40000a5, | |
2176 | 0x24000102, 0x64000072, 0x44000103, 0x94000033, | |
2177 | 0x24000102, 0x74000107, 0x54000105, 0xa40000f3, | |
2178 | 0x24000102, 0x74000064, 0x44000104, 0x940000c2, | |
2179 | 0x24000102, 0xa8010267, 0x64000061, 0xb40000f7, | |
2180 | 0x24000102, 0x88010265, 0x44000103, 0x84000077, | |
2181 | 0x24000102, 0xb8010468, 0x78010220, 0x940000f0, | |
2182 | 0x24000102, 0x88010273, 0x44000104, 0x94000036, | |
2183 | 0x24000102, 0x74000108, 0x64000109, 0xc4000120, | |
2184 | 0x24000102, 0x8801026f, 0x44000103, 0x94000007, | |
2185 | 0x24000102, 0xb8010470, 0x54000105, 0xa400005c, | |
2186 | 0x24000102, 0xb801040a, 0x44000104, 0x9400004c, | |
2187 | 0x24000102, 0x84000030, 0x88010200, 0xb40000b3, | |
2188 | 0x24000102, 0x88010269, 0x44000103, 0x9400010e, | |
2189 | 0x24000102, 0xb801046d, 0xb8010620, 0xa4000025, | |
2190 | 0x24000102, 0x64000106, 0x44000104, 0x94000040, | |
2191 | 0x24000102, 0x84000004, 0x6400010a, 0xb400008d, | |
2192 | 0x24000102, 0x88010272, 0x44000103, 0x9400001f, | |
2193 | 0x24000102, 0xb8010475, 0x54000105, 0xa40000b4, | |
2194 | 0x24000102, 0xb8010463, 0x44000104, 0x9400006b, | |
2195 | 0x24000102, 0x84000042, 0x88010261, 0xb40000dc, | |
2196 | 0x24000102, 0x64000065, 0x44000103, 0x840000ff, | |
2197 | 0x24000102, 0xb801046c, 0x98010420, 0xa4000013, | |
2198 | 0x24000102, 0x64000073, 0x44000104, 0x9400003a, | |
2199 | 0x24000102, 0x84000001, 0x64000109, 0xb400005a, | |
2200 | 0x24000102, 0x6400006f, 0x44000103, 0x9400000c, | |
2201 | 0x24000102, 0xb8010474, 0x54000105, 0xa400007f, | |
2202 | 0x24000102, 0xb801042c, 0x44000104, 0x94000050, | |
2203 | 0x24000102, 0x84000032, 0x64000000, 0xb40000ca, | |
2204 | 0x24000102, 0x64000069, 0x44000103, 0xa4000112, | |
2205 | 0x24000102, 0xb801046e, 0x54000020, 0xa400004b, | |
2206 | 0x24000102, 0x64000106, 0x44000104, 0x94000046, | |
2207 | 0x24000102, 0x8400002d, 0x6400010a, 0xb400009f, | |
2208 | 0x24000102, 0x64000072, 0x44000103, 0x94000029, | |
2209 | 0x24000102, 0x74000107, 0x54000105, 0xa40000e8, | |
2210 | 0x24000102, 0xb8010464, 0x44000104, 0x940000c0, | |
2211 | 0x24000102, 0x84000066, 0x64000061, 0xb40000f2, | |
2212 | 0x24000102, 0xb8010565, 0x44000103, 0x84000079, | |
2213 | 0x24000102, 0x74000068, 0x78010220, 0xb4000117, | |
2214 | 0x24000102, 0xb8010573, 0x44000104, 0x94000038, | |
2215 | 0x24000102, 0x74000108, 0x64000109, 0x3600001e, | |
2216 | 0x24000102, 0xb801056f, 0x44000103, 0x94000009, | |
2217 | 0x24000102, 0x74000070, 0x54000105, 0xa4000071, | |
2218 | 0x24000102, 0x7400000a, 0x44000104, 0x9400004e, | |
2219 | 0x24000102, 0x84000031, 0xb8010500, 0xb40000bc, | |
2220 | 0x24000102, 0xb8010569, 0x44000103, 0x94000110, | |
2221 | 0x24000102, 0x7400006d, 0x54000020, 0xa400002f, | |
2222 | 0x24000102, 0x64000106, 0x44000104, 0x94000044, | |
2223 | 0x24000102, 0x8400000d, 0x6400010a, 0xb4000097, | |
2224 | 0x24000102, 0xb8010572, 0x44000103, 0x94000027, | |
2225 | 0x24000102, 0x74000075, 0x54000105, 0xa40000cf, | |
2226 | 0x24000102, 0x74000063, 0x44000104, 0x94000078, | |
2227 | 0x24000102, 0x84000062, 0xb8010561, 0xb40000e6, | |
2228 | 0x24000102, 0x64000065, 0x44000103, 0x9400010c, | |
2229 | 0x24000102, 0x7400006c, 0xb8010a20, 0xa400001e, | |
2230 | 0x24000102, 0x64000073, 0x44000104, 0x9400003e, | |
2231 | 0x24000102, 0x84000003, 0x64000109, 0xb4000084, | |
2232 | 0x24000102, 0x6400006f, 0x44000103, 0x9400000f, | |
2233 | 0x24000102, 0x74000074, 0x54000105, 0xa40000a0, | |
2234 | 0x24000102, 0x7400002c, 0x44000104, 0x94000053, | |
2235 | 0x24000102, 0x84000041, 0x64000000, 0xb40000d4, | |
2236 | 0x24000102, 0x64000069, 0x44000103, 0x94000005, | |
2237 | 0x24000102, 0x7400006e, 0x54000020, 0xa4000057, | |
2238 | 0x24000102, 0x64000106, 0x44000104, 0x94000048, | |
2239 | 0x24000102, 0x8400002e, 0x6400010a, 0xb40000aa, | |
2240 | 0x24000102, 0x64000072, 0x44000103, 0x94000034, | |
2241 | 0x24000102, 0x74000107, 0x54000105, 0xa40000f9, | |
2242 | 0x24000102, 0x74000064, 0x44000104, 0x940000c3, | |
2243 | 0x24000102, 0x84000067, 0x64000061, 0xb4000200, | |
2244 | 0x24000102, 0x88010265, 0x44000103, 0x84000077, | |
2245 | 0x24000102, 0x98010268, 0x78010220, 0x940000e0, | |
2246 | 0x24000102, 0x88010273, 0x44000104, 0x94000035, | |
2247 | 0x24000102, 0x74000108, 0x64000109, 0xc400011c, | |
2248 | 0x24000102, 0x8801026f, 0x44000103, 0x94000006, | |
2249 | 0x24000102, 0x98010270, 0x54000105, 0xa400005b, | |
2250 | 0x24000102, 0x9801020a, 0x44000104, 0x94000049, | |
2251 | 0x24000102, 0x84000030, 0x88010200, 0xb40000b1, | |
2252 | 0x24000102, 0x88010269, 0x44000103, 0x9400010d, | |
2253 | 0x24000102, 0x9801026d, 0xb8007220, 0xa4000024, | |
2254 | 0x24000102, 0x64000106, 0x44000104, 0x9400003f, | |
2255 | 0x24000102, 0x84000004, 0x6400010a, 0xb400008a, | |
2256 | 0x24000102, 0x88010272, 0x44000103, 0x94000010, | |
2257 | 0x24000102, 0x98010275, 0x54000105, 0xa40000b0, | |
2258 | 0x24000102, 0x98010263, 0x44000104, 0x94000054, | |
2259 | 0x24000102, 0x84000042, 0x88010261, 0xb40000da, | |
2260 | 0x24000102, 0x64000065, 0x44000103, 0x840000ff, | |
2261 | 0x24000102, 0x9801026c, 0x98010320, 0xa4000012, | |
2262 | 0x24000102, 0x64000073, 0x44000104, 0x94000039, | |
2263 | 0x24000102, 0x84000001, 0x64000109, 0xb400001b, | |
2264 | 0x24000102, 0x6400006f, 0x44000103, 0x9400000b, | |
2265 | 0x24000102, 0x98010274, 0x54000105, 0xa400007e, | |
2266 | 0x24000102, 0x9801022c, 0x44000104, 0x9400004f, | |
2267 | 0x24000102, 0x84000032, 0x64000000, 0xb40000c8, | |
2268 | 0x24000102, 0x64000069, 0x44000103, 0xa4000113, | |
2269 | 0x24000102, 0x9801026e, 0x54000020, 0xa400004a, | |
2270 | 0x24000102, 0x64000106, 0x44000104, 0x94000045, | |
2271 | 0x24000102, 0x8400002d, 0x6400010a, 0xb400009d, | |
2272 | 0x24000102, 0x64000072, 0x44000103, 0x94000028, | |
2273 | 0x24000102, 0x74000107, 0x54000105, 0xa40000e7, | |
2274 | 0x24000102, 0x98010264, 0x44000104, 0x94000080, | |
2275 | 0x24000102, 0x84000066, 0x64000061, 0xb40000ee, | |
2276 | 0x24000102, 0xa8010465, 0x44000103, 0x84000079, | |
2277 | 0x24000102, 0x74000068, 0x78010220, 0x940000fe, | |
2278 | 0x24000102, 0xa8010473, 0x44000104, 0x94000037, | |
2279 | 0x24000102, 0x74000108, 0x64000109, 0x36000016, | |
2280 | 0x24000102, 0xa801046f, 0x44000103, 0x94000008, | |
2281 | 0x24000102, 0x74000070, 0x54000105, 0xa400006a, | |
2282 | 0x24000102, 0x7400000a, 0x44000104, 0x9400004d, | |
2283 | 0x24000102, 0x84000031, 0xa8010400, 0xb40000ba, | |
2284 | 0x24000102, 0xa8010469, 0x44000103, 0x9400010f, | |
2285 | 0x24000102, 0x7400006d, 0xc8010720, 0xa400002b, | |
2286 | 0x24000102, 0x64000106, 0x44000104, 0x94000043, | |
2287 | 0x24000102, 0x8400000d, 0x6400010a, 0xb4000095, | |
2288 | 0x24000102, 0xa8010472, 0x44000103, 0x94000022, | |
2289 | 0x24000102, 0x74000075, 0x54000105, 0xa40000c7, | |
2290 | 0x24000102, 0x74000063, 0x44000104, 0x94000076, | |
2291 | 0x24000102, 0x84000062, 0xa8010461, 0xb40000e4, | |
2292 | 0x24000102, 0x64000065, 0x44000103, 0x9400010b, | |
2293 | 0x24000102, 0x7400006c, 0xa8010520, 0xa400001c, | |
2294 | 0x24000102, 0x64000073, 0x44000104, 0x9400003b, | |
2295 | 0x24000102, 0x84000003, 0x64000109, 0xb4000082, | |
2296 | 0x24000102, 0x6400006f, 0x44000103, 0x9400000e, | |
2297 | 0x24000102, 0x74000074, 0x54000105, 0xa4000090, | |
2298 | 0x24000102, 0x7400002c, 0x44000104, 0x94000052, | |
2299 | 0x24000102, 0x84000041, 0x64000000, 0xb40000d2, | |
2300 | 0x24000102, 0x64000069, 0x44000103, 0x94000002, | |
2301 | 0x24000102, 0x7400006e, 0x54000020, 0xa4000056, | |
2302 | 0x24000102, 0x64000106, 0x44000104, 0x94000047, | |
2303 | 0x24000102, 0x8400002e, 0x6400010a, 0xb40000a8, | |
2304 | 0x24000102, 0x64000072, 0x44000103, 0x94000033, | |
2305 | 0x24000102, 0x74000107, 0x54000105, 0xa40000f8, | |
2306 | 0x24000102, 0x74000064, 0x44000104, 0x940000c2, | |
2307 | 0x24000102, 0x84000067, 0x64000061, 0xb40000fb, | |
2308 | 0x24000102, 0x88010265, 0x44000103, 0x84000077, | |
2309 | 0x24000102, 0x74000068, 0x78010220, 0x940000f0, | |
2310 | 0x24000102, 0x88010273, 0x44000104, 0x94000036, | |
2311 | 0x24000102, 0x74000108, 0x64000109, 0x36000006, | |
2312 | 0x24000102, 0x8801026f, 0x44000103, 0x94000007, | |
2313 | 0x24000102, 0x74000070, 0x54000105, 0xa400005f, | |
2314 | 0x24000102, 0x7400000a, 0x44000104, 0x9400004c, | |
2315 | 0x24000102, 0x84000030, 0x88010200, 0xb40000b6, | |
2316 | 0x24000102, 0x88010269, 0x44000103, 0x9400010e, | |
2317 | 0x24000102, 0x7400006d, 0xc8006420, 0xa4000026, | |
2318 | 0x24000102, 0x64000106, 0x44000104, 0x94000040, | |
2319 | 0x24000102, 0x84000004, 0x6400010a, 0xb4000091, | |
2320 | 0x24000102, 0x88010272, 0x44000103, 0x9400001f, | |
2321 | 0x24000102, 0x74000075, 0x54000105, 0xa40000bd, | |
2322 | 0x24000102, 0x74000063, 0x44000104, 0x9400006b, | |
2323 | 0x24000102, 0x84000042, 0x88010261, 0xb40000de, | |
2324 | 0x24000102, 0x64000065, 0x44000103, 0x840000ff, | |
2325 | 0x24000102, 0x7400006c, 0x98010420, 0xa4000014, | |
2326 | 0x24000102, 0x64000073, 0x44000104, 0x9400003a, | |
2327 | 0x24000102, 0x84000001, 0x64000109, 0xb400005e, | |
2328 | 0x24000102, 0x6400006f, 0x44000103, 0x9400000c, | |
2329 | 0x24000102, 0x74000074, 0x54000105, 0xa400008b, | |
2330 | 0x24000102, 0x7400002c, 0x44000104, 0x94000050, | |
2331 | 0x24000102, 0x84000032, 0x64000000, 0xb40000cc, | |
2332 | 0x24000102, 0x64000069, 0x44000103, 0xa4000114, | |
2333 | 0x24000102, 0x7400006e, 0x54000020, 0xa4000051, | |
2334 | 0x24000102, 0x64000106, 0x44000104, 0x94000046, | |
2335 | 0x24000102, 0x8400002d, 0x6400010a, 0xb40000a2, | |
2336 | 0x24000102, 0x64000072, 0x44000103, 0x94000029, | |
2337 | 0x24000102, 0x74000107, 0x54000105, 0xa40000f1, | |
2338 | 0x24000102, 0x74000064, 0x44000104, 0x940000c0, | |
2339 | 0x24000102, 0x84000066, 0x64000061, 0xb40000f5, | |
2340 | 0x24000102, 0xc8006165, 0x44000103, 0x84000079, | |
2341 | 0x24000102, 0x74000068, 0x78010220, 0xb4000118, | |
2342 | 0x24000102, 0xc8006173, 0x44000104, 0x94000038, | |
2343 | 0x24000102, 0x74000108, 0x64000109, 0xb4000016, | |
2344 | 0x24000102, 0xc800616f, 0x44000103, 0x94000009, | |
2345 | 0x24000102, 0x74000070, 0x54000105, 0xa400007a, | |
2346 | 0x24000102, 0x7400000a, 0x44000104, 0x9400004e, | |
2347 | 0x24000102, 0x84000031, 0xc8006100, 0xb40000bf, | |
2348 | 0x24000102, 0xc8006169, 0x44000103, 0x94000110, | |
2349 | 0x24000102, 0x7400006d, 0x54000020, 0xa400003c, | |
2350 | 0x24000102, 0x64000106, 0x44000104, 0x94000044, | |
2351 | 0x24000102, 0x8400000d, 0x6400010a, 0xb4000099, | |
2352 | 0x24000102, 0xc8006172, 0x44000103, 0x94000027, | |
2353 | 0x24000102, 0x74000075, 0x54000105, 0xa40000d0, | |
2354 | 0x24000102, 0x74000063, 0x44000104, 0x94000078, | |
2355 | 0x24000102, 0x84000062, 0xc8006161, 0xb40000ea, | |
2356 | 0x24000102, 0x64000065, 0x44000103, 0x9400010c, | |
2357 | 0x24000102, 0x7400006c, 0xb8006120, 0xa4000021, | |
2358 | 0x24000102, 0x64000073, 0x44000104, 0x9400003e, | |
2359 | 0x24000102, 0x84000003, 0x64000109, 0xb4000086, | |
2360 | 0x24000102, 0x6400006f, 0x44000103, 0x9400000f, | |
2361 | 0x24000102, 0x74000074, 0x54000105, 0xa40000a4, | |
2362 | 0x24000102, 0x7400002c, 0x44000104, 0x94000053, | |
2363 | 0x24000102, 0x84000041, 0x64000000, 0xb40000d6, | |
2364 | 0x24000102, 0x64000069, 0x44000103, 0x94000005, | |
2365 | 0x24000102, 0x7400006e, 0x54000020, 0xa4000058, | |
2366 | 0x24000102, 0x64000106, 0x44000104, 0x94000048, | |
2367 | 0x24000102, 0x8400002e, 0x6400010a, 0xb40000ac, | |
2368 | 0x24000102, 0x64000072, 0x44000103, 0x94000034, | |
2369 | 0x24000102, 0x74000107, 0x54000105, 0xa40000fc, | |
2370 | 0x24000102, 0x74000064, 0x44000104, 0x940000c3, | |
2371 | 0x24000102, 0x84000067, 0x64000061, 0x52000020 }, | |
2372 | ||
2373 | .long_code_lookup = { | |
2374 | 0x3521, 0x3525, 0x3522, 0x3526, 0x3523, 0x3527, 0x3524, 0x3528, | |
2375 | 0x3529, 0x352d, 0x352a, 0x352e, 0x352b, 0x352f, 0x352c, 0x3530, | |
2376 | 0x3531, 0x3535, 0x3532, 0x3536, 0x3533, 0x3537, 0x3534, 0x3538, | |
2377 | 0x3539, 0x353d, 0x353a, 0x353e, 0x353b, 0x353f, 0x353c, 0x3540, | |
2378 | 0x49a1, 0x3d00, 0x49a2, 0x51c1, 0x49a3, 0x3d01, 0x49a4, 0x51e1, | |
2379 | 0x49a5, 0x3d00, 0x49a6, 0x51c2, 0x49a7, 0x3d01, 0x49a8, 0x51e2, | |
2380 | 0x49a9, 0x3d00, 0x49aa, 0x51c3, 0x49ab, 0x3d01, 0x49ac, 0x51e3, | |
2381 | 0x49ad, 0x3d00, 0x49ae, 0x51c4, 0x49af, 0x3d01, 0x49b0, 0x51e4, | |
2382 | 0x49b1, 0x3d00, 0x49b2, 0x51c5, 0x49b3, 0x3d01, 0x49b4, 0x51e5, | |
2383 | 0x49b5, 0x3d00, 0x49b6, 0x51c6, 0x49b7, 0x3d01, 0x49b8, 0x51e6, | |
2384 | 0x49b9, 0x3d00, 0x49ba, 0x51c7, 0x49bb, 0x3d01, 0x49bc, 0x51e7, | |
2385 | 0x49bd, 0x3d00, 0x49be, 0x51c8, 0x49bf, 0x3d01, 0x49c0, 0x51e8, | |
2386 | 0x49a1, 0x3d00, 0x49a2, 0x51c9, 0x49a3, 0x3d01, 0x49a4, 0x51e9, | |
2387 | 0x49a5, 0x3d00, 0x49a6, 0x51ca, 0x49a7, 0x3d01, 0x49a8, 0x51ea, | |
2388 | 0x49a9, 0x3d00, 0x49aa, 0x51cb, 0x49ab, 0x3d01, 0x49ac, 0x51eb, | |
2389 | 0x49ad, 0x3d00, 0x49ae, 0x51cc, 0x49af, 0x3d01, 0x49b0, 0x51ec, | |
2390 | 0x49b1, 0x3d00, 0x49b2, 0x51cd, 0x49b3, 0x3d01, 0x49b4, 0x51ed, | |
2391 | 0x49b5, 0x3d00, 0x49b6, 0x51ce, 0x49b7, 0x3d01, 0x49b8, 0x51ee, | |
2392 | 0x49b9, 0x3d00, 0x49ba, 0x51cf, 0x49bb, 0x3d01, 0x49bc, 0x51ef, | |
2393 | 0x49bd, 0x3d00, 0x49be, 0x51d0, 0x49bf, 0x3d01, 0x49c0, 0x51f0, | |
2394 | 0x49a1, 0x3d00, 0x49a2, 0x51d1, 0x49a3, 0x3d01, 0x49a4, 0x51f1, | |
2395 | 0x49a5, 0x3d00, 0x49a6, 0x51d2, 0x49a7, 0x3d01, 0x49a8, 0x51f2, | |
2396 | 0x49a9, 0x3d00, 0x49aa, 0x51d3, 0x49ab, 0x3d01, 0x49ac, 0x51f3, | |
2397 | 0x49ad, 0x3d00, 0x49ae, 0x51d4, 0x49af, 0x3d01, 0x49b0, 0x51f4, | |
2398 | 0x49b1, 0x3d00, 0x49b2, 0x51d5, 0x49b3, 0x3d01, 0x49b4, 0x51f5, | |
2399 | 0x49b5, 0x3d00, 0x49b6, 0x51d6, 0x49b7, 0x3d01, 0x49b8, 0x51f6, | |
2400 | 0x49b9, 0x3d00, 0x49ba, 0x51d7, 0x49bb, 0x3d01, 0x49bc, 0x51f7, | |
2401 | 0x49bd, 0x3d00, 0x49be, 0x51d8, 0x49bf, 0x3d01, 0x49c0, 0x51f8, | |
2402 | 0x49a1, 0x3d00, 0x49a2, 0x51d9, 0x49a3, 0x3d01, 0x49a4, 0x51f9, | |
2403 | 0x49a5, 0x3d00, 0x49a6, 0x51da, 0x49a7, 0x3d01, 0x49a8, 0x51fa, | |
2404 | 0x49a9, 0x3d00, 0x49aa, 0x51db, 0x49ab, 0x3d01, 0x49ac, 0x51fb, | |
2405 | 0x49ad, 0x3d00, 0x49ae, 0x51dc, 0x49af, 0x3d01, 0x49b0, 0x51fc, | |
2406 | 0x49b1, 0x3d00, 0x49b2, 0x51dd, 0x49b3, 0x3d01, 0x49b4, 0x51fd, | |
2407 | 0x49b5, 0x3d00, 0x49b6, 0x51de, 0x49b7, 0x3d01, 0x49b8, 0x51fe, | |
2408 | 0x49b9, 0x3d00, 0x49ba, 0x51df, 0x49bb, 0x3d01, 0x49bc, 0x51ff, | |
2409 | 0x49bd, 0x3d00, 0x49be, 0x51e0, 0x49bf, 0x3d01, 0x49c0, 0x5200, | |
2410 | 0x3d41, 0x3d43, 0x3d45, 0x3d47, 0x3d49, 0x3d4b, 0x3d4d, 0x3d4f, | |
2411 | 0x3d42, 0x3d44, 0x3d46, 0x3d48, 0x3d4a, 0x3d4c, 0x3d4e, 0x3d50, | |
2412 | 0x4151, 0x4152, 0x4153, 0x4154, 0x4155, 0x4156, 0x4157, 0x4158, | |
2413 | 0x4159, 0x415a, 0x415b, 0x415c, 0x415d, 0x415e, 0x415f, 0x4160, | |
2414 | 0x4561, 0x4571, 0x4562, 0x4572, 0x4563, 0x4573, 0x4564, 0x4574, | |
2415 | 0x4565, 0x4575, 0x4566, 0x4576, 0x4567, 0x4577, 0x4568, 0x4578, | |
2416 | 0x4569, 0x4579, 0x456a, 0x457a, 0x456b, 0x457b, 0x456c, 0x457c, | |
2417 | 0x456d, 0x457d, 0x456e, 0x457e, 0x456f, 0x457f, 0x4570, 0x4580, | |
2418 | 0x4581, 0x4582, 0x4583, 0x4584, 0x4585, 0x4586, 0x4587, 0x4588, | |
2419 | 0x4589, 0x458a, 0x458b, 0x458c, 0x458d, 0x458e, 0x458f, 0x4590, | |
2420 | 0x4591, 0x4592, 0x4593, 0x4594, 0x4595, 0x4596, 0x4597, 0x4598, | |
2421 | 0x4599, 0x459a, 0x459b, 0x459c, 0x459d, 0x459e, 0x459f, 0x45a0, | |
2422 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2423 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2424 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2425 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2426 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2427 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2428 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2429 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2430 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2431 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2432 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2433 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2434 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2435 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2436 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2437 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2438 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2439 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2440 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2441 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2442 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2443 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2444 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2445 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2446 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2447 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2448 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2449 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2450 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2451 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2452 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2453 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2454 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2455 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2456 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2457 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2458 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2459 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2460 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2461 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2462 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2463 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2464 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2465 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2466 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2467 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2468 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2469 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2470 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2471 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2472 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2473 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2474 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2475 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2476 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2477 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2478 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2479 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2480 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2481 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2482 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2483 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2484 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2485 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2486 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2487 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2488 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2489 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2490 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2491 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2492 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2493 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2494 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2495 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2496 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2497 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2498 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2499 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2500 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2501 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2502 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2503 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2504 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2505 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2506 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2507 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2508 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2509 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2510 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2511 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2512 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2513 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2514 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2515 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2516 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2517 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2518 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2519 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2520 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2521 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2522 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2523 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2524 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2525 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2526 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2527 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2528 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2529 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2530 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2531 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 } | |
2532 | }; | |
2533 | ||
2534 | struct inflate_huff_code_small pregen_dist_huff_code = { | |
2535 | .short_code_lookup = { | |
2536 | 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, | |
2537 | 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069, | |
2538 | 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047, | |
2539 | 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3825, | |
2540 | 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, | |
2541 | 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd, | |
2542 | 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068, | |
2543 | 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4000, | |
2544 | 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, | |
2545 | 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069, | |
2546 | 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047, | |
2547 | 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3846, | |
2548 | 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, | |
2549 | 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd, | |
2550 | 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068, | |
2551 | 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4024, | |
2552 | 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, | |
2553 | 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069, | |
2554 | 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047, | |
2555 | 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3825, | |
2556 | 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, | |
2557 | 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd, | |
2558 | 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068, | |
2559 | 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4002, | |
2560 | 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, | |
2561 | 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069, | |
2562 | 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047, | |
2563 | 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3846, | |
2564 | 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, | |
2565 | 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd, | |
2566 | 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068, | |
2567 | 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4801, | |
2568 | 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, | |
2569 | 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069, | |
2570 | 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047, | |
2571 | 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3825, | |
2572 | 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, | |
2573 | 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd, | |
2574 | 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068, | |
2575 | 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4000, | |
2576 | 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, | |
2577 | 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069, | |
2578 | 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047, | |
2579 | 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3846, | |
2580 | 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, | |
2581 | 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd, | |
2582 | 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068, | |
2583 | 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4024, | |
2584 | 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, | |
2585 | 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069, | |
2586 | 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047, | |
2587 | 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3825, | |
2588 | 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, | |
2589 | 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd, | |
2590 | 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068, | |
2591 | 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4002, | |
2592 | 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, | |
2593 | 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069, | |
2594 | 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047, | |
2595 | 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3846, | |
2596 | 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, | |
2597 | 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd, | |
2598 | 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068, | |
2599 | 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4803, | |
2600 | 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, | |
2601 | 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069, | |
2602 | 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047, | |
2603 | 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3825, | |
2604 | 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, | |
2605 | 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd, | |
2606 | 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068, | |
2607 | 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4000, | |
2608 | 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, | |
2609 | 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069, | |
2610 | 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047, | |
2611 | 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3846, | |
2612 | 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, | |
2613 | 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd, | |
2614 | 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068, | |
2615 | 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4024, | |
2616 | 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, | |
2617 | 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069, | |
2618 | 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047, | |
2619 | 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3825, | |
2620 | 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, | |
2621 | 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd, | |
2622 | 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068, | |
2623 | 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4002, | |
2624 | 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, | |
2625 | 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069, | |
2626 | 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047, | |
2627 | 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3846, | |
2628 | 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, | |
2629 | 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd, | |
2630 | 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068, | |
2631 | 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4801, | |
2632 | 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, | |
2633 | 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069, | |
2634 | 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047, | |
2635 | 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3825, | |
2636 | 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, | |
2637 | 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd, | |
2638 | 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068, | |
2639 | 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4000, | |
2640 | 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, | |
2641 | 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069, | |
2642 | 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047, | |
2643 | 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3846, | |
2644 | 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, | |
2645 | 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd, | |
2646 | 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068, | |
2647 | 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4024, | |
2648 | 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, | |
2649 | 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069, | |
2650 | 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047, | |
2651 | 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3825, | |
2652 | 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, | |
2653 | 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd, | |
2654 | 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068, | |
2655 | 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4002, | |
2656 | 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, | |
2657 | 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069, | |
2658 | 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047, | |
2659 | 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3846, | |
2660 | 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc, | |
2661 | 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd, | |
2662 | 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068, | |
2663 | 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4803 }, | |
2664 | ||
2665 | .long_code_lookup = { | |
2666 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2667 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2668 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2669 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2670 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2671 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2672 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2673 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2674 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, | |
2675 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 } | |
2676 | }; | |
2677 |
0 | 0 | /********************************************************************** |
1 | Copyright(c) 2019 Arm Corporation All rights reserved. | |
1 | Copyright(c) 2020 Arm Corporation All rights reserved. | |
2 | 2 | |
3 | 3 | Redistribution and use in source and binary forms, with or without |
4 | 4 | modification, are permitted provided that the following conditions |
216 | 216 | _func_entry; \ |
217 | 217 | }) |
218 | 218 | |
219 | /** | |
220 | * Micro-Architector definitions | |
221 | * Reference: https://developer.arm.com/docs/ddi0595/f/aarch64-system-registers/midr_el1 | |
222 | */ | |
223 | ||
224 | #define CPU_IMPLEMENTER_RESERVE 0x00 | |
225 | #define CPU_IMPLEMENTER_ARM 0x41 | |
226 | ||
227 | ||
228 | #define CPU_PART_CORTEX_A57 0xD07 | |
229 | #define CPU_PART_CORTEX_A72 0xD08 | |
230 | #define CPU_PART_NEOVERSE_N1 0xD0C | |
231 | ||
232 | #define MICRO_ARCH_ID(imp,part) \ | |
233 | (((CPU_IMPLEMENTER_##imp&0xff)<<24)|((CPU_PART_##part&0xfff)<<4)) | |
234 | ||
235 | #ifndef HWCAP_CPUID | |
236 | #define HWCAP_CPUID (1<<11) | |
237 | #endif | |
238 | ||
239 | /** | |
240 | * @brief get_micro_arch_id | |
241 | * | |
242 | * read micro-architector register instruction if possible.This function | |
243 | * provides microarchitecture information and make microarchitecture optimization | |
244 | * possible. | |
245 | * | |
246 | * Read system registers(MRS) is forbidden in userspace. If executed, it | |
247 | * will raise illegal instruction error. Kernel provides a solution for | |
248 | * this issue. The solution depends on HWCAP_CPUID flags. Reference(1) | |
249 | * describes how to use it. It provides a "illegal insstruction" handler | |
250 | * in kernel space, the handler will execute MRS and return the correct | |
251 | * value to userspace. | |
252 | * | |
253 | * To avoid too many kernel trap, this function MUST be only called in | |
254 | * dispatcher. And HWCAP must be match,That will make sure there are no | |
255 | * illegal instruction errors. HWCAP_CPUID should be available to get the | |
256 | * best performance. | |
257 | * | |
258 | * NOTICE: | |
259 | * - HWCAP_CPUID should be available. Otherwise it returns reserve value | |
260 | * - It MUST be called inside dispather. | |
261 | * - It MUST meet the HWCAP requirements | |
262 | * | |
263 | * Example: | |
264 | * DEFINE_INTERFACE_DISPATCHER(crc32_iscsi) | |
265 | * { | |
266 | * unsigned long auxval = getauxval(AT_HWCAP); | |
267 | * // MUST do the judgement is MUST. | |
268 | * if ((HWCAP_CRC32 | HWCAP_PMULL) == (auxval & (HWCAP_CRC32 | HWCAP_PMULL))) { | |
269 | * switch (get_micro_arch_id()) { | |
270 | * case MICRO_ARCH_ID(ARM, CORTEX_A57): | |
271 | * return PROVIDER_INFO(crc32_pmull_crc_for_a57); | |
272 | * case MICRO_ARCH_ID(ARM, CORTEX_A72): | |
273 | * return PROVIDER_INFO(crc32_pmull_crc_for_a72); | |
274 | * case MICRO_ARCH_ID(ARM, NEOVERSE_N1): | |
275 | * return PROVIDER_INFO(crc32_pmull_crc_for_n1); | |
276 | * case default: | |
277 | * return PROVIDER_INFO(crc32_pmull_crc_for_others); | |
278 | * } | |
279 | * } | |
280 | * return PROVIDER_BASIC(crc32_iscsi); | |
281 | * } | |
282 | * KNOWN ISSUE: | |
283 | * On a heterogeneous system (big.LITTLE), it will work but the performance | |
284 | * might not be the best one as expected. | |
285 | * | |
286 | * If this function is called on the big core, it will return the function | |
287 | * optimized for the big core. | |
288 | * | |
289 | * If execution is then scheduled to the little core. It will still work (1), | |
290 | * but the function won't be optimized for the little core, thus the performance | |
291 | * won't be as expected. | |
292 | * | |
293 | * References: | |
294 | * - [CPU Feature detection](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/arm64/cpu-feature-registers.rst?h=v5.5) | |
295 | * | |
296 | */ | |
297 | static inline uint32_t get_micro_arch_id(void) | |
298 | { | |
299 | uint32_t id=CPU_IMPLEMENTER_RESERVE; | |
300 | if ((getauxval(AT_HWCAP) & HWCAP_CPUID)) { | |
301 | /** Here will trap into kernel space */ | |
302 | asm("mrs %0, MIDR_EL1 " : "=r" (id)); | |
303 | } | |
304 | return id&0xff00fff0; | |
305 | } | |
306 | ||
307 | ||
308 | ||
219 | 309 | #endif /* __ASSEMBLY__ */ |
220 | 310 | #endif |
313 | 313 | #define IGZIP_NO_HIST 0 |
314 | 314 | #define IGZIP_HIST 1 |
315 | 315 | #define IGZIP_DICT_HIST 2 |
316 | #define IGZIP_DICT_HASH_SET 3 | |
316 | 317 | |
317 | 318 | /** @brief Holds Bit Buffer information*/ |
318 | 319 | struct BitBuf2 { |
684 | 685 | */ |
685 | 686 | int isal_deflate_set_dict(struct isal_zstream *stream, uint8_t *dict, uint32_t dict_len); |
686 | 687 | |
688 | /** @brief Structure for holding processed dictionary information */ | |
689 | ||
690 | struct isal_dict { | |
691 | uint32_t params; | |
692 | uint32_t level; | |
693 | uint32_t hist_size; | |
694 | uint32_t hash_size; | |
695 | uint8_t history[ISAL_DEF_HIST_SIZE]; | |
696 | uint16_t hashtable[IGZIP_LVL3_HASH_SIZE]; | |
697 | }; | |
698 | ||
699 | /** | |
700 | * @brief Process dictionary to reuse later | |
701 | * | |
702 | * Processes a dictionary so that the generated output can be reused to reset a | |
703 | * new deflate stream more quickly than isal_deflate_set_dict() alone. This | |
704 | * function is paired with isal_deflate_reset_dict() when using the same | |
705 | * dictionary on multiple deflate objects. The stream.level must be set prior to | |
706 | * calling this function to process the dictionary correctly. If the dictionary | |
707 | * is longer than IGZIP_HIST_SIZE, only the last IGZIP_HIST_SIZE bytes will be | |
708 | * used. | |
709 | * | |
710 | * @param stream Structure holding state information on the compression streams. | |
711 | * @param dict_str: Structure to hold processed dictionary info to reuse later. | |
712 | * @param dict: Array containing dictionary to use. | |
713 | * @param dict_len: Length of dict. | |
714 | * @returns COMP_OK, | |
715 | * ISAL_INVALID_STATE (dictionary could not be processed) | |
716 | */ | |
717 | int isal_deflate_process_dict(struct isal_zstream *stream, struct isal_dict *dict_str, | |
718 | uint8_t *dict, uint32_t dict_len); | |
719 | ||
720 | /** | |
721 | * @brief Reset compression dictionary to use | |
722 | * | |
723 | * Similar to isal_deflate_set_dict() but on pre-processed dictionary | |
724 | * data. Pairing with isal_deflate_process_dict() can reduce the processing time | |
725 | * on subsequent compression with dictionary especially on small files. | |
726 | * | |
727 | * Like isal_deflate_set_dict(), this function is to be called after | |
728 | * isal_deflate_init, or after completing a SYNC_FLUSH or FULL_FLUSH and before | |
729 | * the next call do isal_deflate. Changing compression level between dictionary | |
730 | * process and reset will cause return of ISAL_INVALID_STATE. | |
731 | * | |
732 | * @param stream Structure holding state information on the compression streams. | |
733 | * @param dict_str: Structure with pre-processed dictionary info. | |
734 | * @returns COMP_OK, | |
735 | * ISAL_INVALID_STATE or other (dictionary could not be reset) | |
736 | */ | |
737 | int isal_deflate_reset_dict(struct isal_zstream *stream, struct isal_dict *dict_str); | |
738 | ||
739 | ||
687 | 740 | /** |
688 | 741 | * @brief Fast data (deflate) compression for storage applications. |
689 | 742 | * |
68 | 68 | mbin_def_ptr %1_mbinit |
69 | 69 | |
70 | 70 | section .text |
71 | global %1:ISAL_SYM_TYPE_FUNCTION | |
71 | mk_global %1, function | |
72 | 72 | %1_mbinit: |
73 | endbranch | |
73 | 74 | ;;; only called the first time to setup hardware match |
74 | 75 | call %1_dispatch_init |
75 | 76 | ;;; falls thru to execute the hw optimized code |
76 | 77 | %1: |
78 | endbranch | |
77 | 79 | jmp mbin_ptr_sz [%1_dispatched] |
78 | 80 | %endmacro |
79 | 81 |
28 | 28 | |
29 | 29 | %ifndef _REG_SIZES_ASM_ |
30 | 30 | %define _REG_SIZES_ASM_ |
31 | ||
32 | %ifdef __NASM_VER__ | |
33 | %ifidn __OUTPUT_FORMAT__, win64 | |
34 | %error nasm not supported in windows | |
35 | %else | |
36 | %define endproc_frame | |
37 | %endif | |
38 | %endif | |
39 | 31 | |
40 | 32 | %ifndef AS_FEATURE_LEVEL |
41 | 33 | %define AS_FEATURE_LEVEL 4 |
207 | 199 | section .text |
208 | 200 | %endif |
209 | 201 | %ifidn __OUTPUT_FORMAT__,elf64 |
202 | %define __x86_64__ | |
210 | 203 | section .note.GNU-stack noalloc noexec nowrite progbits |
211 | 204 | section .text |
205 | %endif | |
206 | %ifidn __OUTPUT_FORMAT__,win64 | |
207 | %define __x86_64__ | |
208 | %endif | |
209 | %ifidn __OUTPUT_FORMAT__,macho64 | |
210 | %define __x86_64__ | |
211 | %endif | |
212 | ||
213 | %ifdef __x86_64__ | |
214 | %define endbranch db 0xf3, 0x0f, 0x1e, 0xfa | |
215 | %else | |
216 | %define endbranch db 0xf3, 0x0f, 0x1e, 0xfb | |
212 | 217 | %endif |
213 | 218 | |
214 | 219 | %ifdef REL_TEXT |
219 | 224 | %define WRT_OPT |
220 | 225 | %endif |
221 | 226 | |
227 | %macro mk_global 1-3 | |
228 | %ifdef __NASM_VER__ | |
229 | %ifidn __OUTPUT_FORMAT__, macho64 | |
230 | global %1 | |
231 | %elifidn __OUTPUT_FORMAT__, win64 | |
232 | global %1 | |
233 | %else | |
234 | global %1:%2 %3 | |
235 | %endif | |
236 | %else | |
237 | global %1:%2 %3 | |
238 | %endif | |
239 | %endmacro | |
240 | ||
241 | ||
242 | ; Fixes for nasm lack of MS proc helpers | |
243 | %ifdef __NASM_VER__ | |
244 | %ifidn __OUTPUT_FORMAT__, win64 | |
245 | %macro alloc_stack 1 | |
246 | sub rsp, %1 | |
247 | %endmacro | |
248 | ||
249 | %macro proc_frame 1 | |
250 | %1: | |
251 | %endmacro | |
252 | ||
253 | %macro save_xmm128 2 | |
254 | movdqa [rsp + %2], %1 | |
255 | %endmacro | |
256 | ||
257 | %macro save_reg 2 | |
258 | mov [rsp + %2], %1 | |
259 | %endmacro | |
260 | ||
261 | %macro rex_push_reg 1 | |
262 | push %1 | |
263 | %endmacro | |
264 | ||
265 | %macro push_reg 1 | |
266 | push %1 | |
267 | %endmacro | |
268 | ||
269 | %define end_prolog | |
270 | %endif | |
271 | ||
272 | %define endproc_frame | |
273 | %endif | |
274 | ||
222 | 275 | %ifidn __OUTPUT_FORMAT__, macho64 |
223 | 276 | %define elf64 macho64 |
224 | 277 | mac_equ equ 1 |
225 | %ifdef __NASM_VER__ | |
226 | %define ISAL_SYM_TYPE_FUNCTION | |
227 | %define ISAL_SYM_TYPE_DATA_INTERNAL | |
228 | %else | |
229 | %define ISAL_SYM_TYPE_FUNCTION function | |
230 | %define ISAL_SYM_TYPE_DATA_INTERNAL data internal | |
231 | %endif | |
232 | %else | |
233 | %define ISAL_SYM_TYPE_FUNCTION function | |
234 | %define ISAL_SYM_TYPE_DATA_INTERNAL data internal | |
235 | 278 | %endif |
236 | 279 | |
237 | 280 | %macro slversion 4 |
0 | 0 | LIBRARY isa-l |
1 | VERSION 2.29 | |
1 | VERSION 2.30 | |
2 | 2 | EXPORTS |
3 | 3 | |
4 | 4 | ec_encode_data_sse @1 |
112 | 112 | isal_zero_detect @109 |
113 | 113 | isal_gzip_header_init @110 |
114 | 114 | isal_adler32 @111 |
115 | isal_deflate_process_dict @112 | |
116 | isal_deflate_reset_dict @113 |
38 | 38 | # trace - get simulator trace |
39 | 39 | # clean - remove object files |
40 | 40 | |
41 | version ?= 2.29.0 | |
41 | version ?= 2.30.0 | |
42 | 42 | host_cpu ?= $(shell uname -m | sed -e 's/amd/x86_/') |
43 | 43 | arch ?= $(shell uname | grep -v -e Linux -e BSD ) |
44 | 44 |
39 | 39 | %define tmpb r11b |
40 | 40 | %define tmp3 arg4 |
41 | 41 | %define return rax |
42 | %define func(x) x: | |
42 | %define func(x) x: endbranch | |
43 | 43 | %define FUNC_SAVE |
44 | 44 | %define FUNC_RESTORE |
45 | 45 | %endif |
72 | 72 | section .text |
73 | 73 | |
74 | 74 | align 16 |
75 | global mem_zero_detect_avx:ISAL_SYM_TYPE_FUNCTION | |
75 | mk_global mem_zero_detect_avx, function | |
76 | 76 | func(mem_zero_detect_avx) |
77 | 77 | FUNC_SAVE |
78 | 78 | mov pos, 0 |
39 | 39 | %define tmpb r11b |
40 | 40 | %define tmp3 arg4 |
41 | 41 | %define return rax |
42 | %define func(x) x: | |
42 | %define func(x) x: endbranch | |
43 | 43 | %define FUNC_SAVE |
44 | 44 | %define FUNC_RESTORE |
45 | 45 | %endif |
72 | 72 | section .text |
73 | 73 | |
74 | 74 | align 16 |
75 | global mem_zero_detect_sse:ISAL_SYM_TYPE_FUNCTION | |
75 | mk_global mem_zero_detect_sse, function | |
76 | 76 | func(mem_zero_detect_sse) |
77 | 77 | FUNC_SAVE |
78 | 78 | mov pos, 0 |
0 | 0 | .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.11. |
1 | .TH IGZIP "1" "February 2020" "igzip command line interface 2.29.0" "User Commands" | |
1 | .TH IGZIP "1" "November 2020" "igzip command line interface 2.30.0" "User Commands" | |
2 | 2 | .SH NAME |
3 | 3 | igzip \- compress or decompress files similar to gzip |
4 | 4 | .SH SYNOPSIS |
816 | 816 | int suffix_index = 0; |
817 | 817 | uint32_t file_time; |
818 | 818 | |
819 | // Allocate mem and setup to hold gzip header info | |
819 | 820 | if (infile_name_len == stdin_file_name_len && |
820 | 821 | infile_name != NULL && |
821 | 822 | memcmp(infile_name, stdin_file_name, infile_name_len) == 0) { |
883 | 884 | state.next_in = inbuf; |
884 | 885 | state.avail_in = fread_safe(state.next_in, 1, inbuf_size, in, infile_name); |
885 | 886 | |
887 | // Actually read and save the header info | |
886 | 888 | ret = isal_read_gzip_header(&state, &gz_hdr); |
887 | 889 | if (ret != ISAL_DECOMP_OK) { |
888 | 890 | log_print(ERROR, "igzip: Error invalid gzip header found for file %s\n", |
914 | 916 | goto decompress_file_cleanup; |
915 | 917 | } |
916 | 918 | |
919 | // Start reading in compressed data and decompress | |
917 | 920 | do { |
918 | 921 | if (state.avail_in == 0) { |
919 | 922 | state.next_in = inbuf; |
935 | 938 | if (out != NULL) |
936 | 939 | fwrite_safe(outbuf, 1, state.next_out - outbuf, out, outfile_name); |
937 | 940 | |
938 | } while (!feof(in) || state.avail_out == 0); | |
941 | } while (state.block_state != ISAL_BLOCK_FINISH // while not done | |
942 | && (!feof(in) || state.avail_out == 0) // and work to do | |
943 | ); | |
944 | ||
945 | // Add the following to look for and decode additional concatenated files | |
946 | if (!feof(in) && state.avail_in == 0) { | |
947 | state.next_in = inbuf; | |
948 | state.avail_in = fread_safe(state.next_in, 1, inbuf_size, in, infile_name); | |
949 | } | |
950 | ||
951 | while (state.avail_in > 0 && state.next_in[0] == 31) { | |
952 | // Look for magic numbers for gzip header. Follows the gzread() decision | |
953 | // whether to treat as trailing junk | |
954 | if (state.avail_in > 1 && state.next_in[1] != 139) | |
955 | break; | |
956 | ||
957 | isal_inflate_reset(&state); | |
958 | state.crc_flag = ISAL_GZIP; // Let isal_inflate() process extra headers | |
959 | do { | |
960 | if (state.avail_in == 0 && !feof(in)) { | |
961 | state.next_in = inbuf; | |
962 | state.avail_in = | |
963 | fread_safe(state.next_in, 1, inbuf_size, in, infile_name); | |
964 | } | |
965 | ||
966 | state.next_out = outbuf; | |
967 | state.avail_out = outbuf_size; | |
968 | ||
969 | ret = isal_inflate(&state); | |
970 | if (ret != ISAL_DECOMP_OK) { | |
971 | log_print(ERROR, | |
972 | "igzip: Error while decompressing extra concatenated" | |
973 | "gzip files on %s\n", infile_name); | |
974 | goto decompress_file_cleanup; | |
975 | } | |
976 | ||
977 | if (out != NULL) | |
978 | fwrite_safe(outbuf, 1, state.next_out - outbuf, out, | |
979 | outfile_name); | |
980 | ||
981 | } while (state.block_state != ISAL_BLOCK_FINISH | |
982 | && (!feof(in) || state.avail_out == 0)); | |
983 | ||
984 | if (!feof(in) && state.avail_in == 0) { | |
985 | state.next_in = inbuf; | |
986 | state.avail_in = | |
987 | fread_safe(state.next_in, 1, inbuf_size, in, infile_name); | |
988 | } | |
989 | } | |
939 | 990 | |
940 | 991 | if (state.block_state != ISAL_BLOCK_FINISH) |
941 | 992 | log_print(ERROR, "igzip: Error %s does not contain a complete gzip file\n", |
75 | 75 | cat $TEST_FILE | $IGZIP | $IGZIP -d | $DIFF $TEST_FILE - || ret=1 |
76 | 76 | cat $TEST_FILE | $IGZIP - | $IGZIP -d - | $DIFF $TEST_FILE - || ret=1 |
77 | 77 | pass_check $ret "Piping compression and decompression" |
78 | ||
79 | # Test multiple concatenated gzip files | |
80 | ret=0 | |
81 | (for i in `seq 3`; do $IGZIP -c $TEST_FILE ; done) | $IGZIP -t || ret=1 | |
82 | pass_check $ret "Multiple gzip concatenated files" | |
83 | ||
84 | if command -V md5sum >/dev/null 2>&1; then | |
85 | sum1=$((for i in `seq 15`; do $IGZIP -c $TEST_FILE; done) | $IGZIP -cd | md5sum) | |
86 | sum2=$((for i in `seq 15`; do cat $TEST_FILE; done) | md5sum) | |
87 | [[ "$sum1" == "$sum2" ]] && ret=0 || ret=1 | |
88 | pass_check $ret "Multiple large gzip concat test" | |
89 | clear_dir | |
90 | else | |
91 | echo "Skip: Multiple large gzip concat test" | |
92 | fi | |
93 | ||
78 | 94 | |
79 | 95 | #Test outifle options |
80 | 96 | $IGZIP $TEST_FILE -o $file2$ds && $IGZIP $file2$ds -d -o $file1 && \ |
45 | 45 | %define tmp r11 |
46 | 46 | %define tmp3 arg4 |
47 | 47 | %define return rax |
48 | %define func(x) x: | |
48 | %define func(x) x: endbranch | |
49 | 49 | %define FUNC_SAVE |
50 | 50 | %define FUNC_RESTORE |
51 | 51 | %endif |
121 | 121 | section .text |
122 | 122 | |
123 | 123 | align 16 |
124 | global pq_check_sse:ISAL_SYM_TYPE_FUNCTION | |
124 | mk_global pq_check_sse, function | |
125 | 125 | func(pq_check_sse) |
126 | 126 | FUNC_SAVE |
127 | 127 | sub vec, 3 ;Keep as offset to last source |
45 | 45 | %define tmp r11 |
46 | 46 | %define return rax |
47 | 47 | %define PS 8 |
48 | %define func(x) x: | |
48 | %define func(x) x: endbranch | |
49 | 49 | %define FUNC_SAVE |
50 | 50 | %define FUNC_RESTORE |
51 | 51 | |
78 | 78 | %define arg1 ecx |
79 | 79 | %define return eax |
80 | 80 | %define PS 4 |
81 | %define func(x) x: | |
81 | %define func(x) x: endbranch | |
82 | 82 | %define arg(x) [ebp+8+PS*x] |
83 | 83 | %define arg2 edi ; must sav/restore |
84 | 84 | %define arg3 esi |
140 | 140 | section .text |
141 | 141 | |
142 | 142 | align 16 |
143 | global pq_check_sse:ISAL_SYM_TYPE_FUNCTION | |
143 | mk_global pq_check_sse, function | |
144 | 144 | func(pq_check_sse) |
145 | 145 | FUNC_SAVE |
146 | 146 | sub vec, 3 ;Keep as offset to last source |
45 | 45 | %define tmp r11 |
46 | 46 | %define tmp3 arg4 |
47 | 47 | %define return rax |
48 | %define func(x) x: | |
48 | %define func(x) x: endbranch | |
49 | 49 | %define FUNC_SAVE |
50 | 50 | %define FUNC_RESTORE |
51 | 51 | %endif |
62 | 62 | %define func(x) proc_frame x |
63 | 63 | %macro FUNC_SAVE 0 |
64 | 64 | alloc_stack stack_size |
65 | save_xmm128 xmm6, 0*16 | |
66 | save_xmm128 xmm7, 1*16 | |
67 | save_xmm128 xmm8, 2*16 | |
68 | save_xmm128 xmm9, 3*16 | |
69 | save_xmm128 xmm10, 4*16 | |
70 | save_xmm128 xmm11, 5*16 | |
71 | save_xmm128 xmm14, 6*16 | |
72 | save_xmm128 xmm15, 7*16 | |
65 | vmovdqa [rsp + 0*16], xmm6 | |
66 | vmovdqa [rsp + 1*16], xmm7 | |
67 | vmovdqa [rsp + 2*16], xmm8 | |
68 | vmovdqa [rsp + 3*16], xmm9 | |
69 | vmovdqa [rsp + 4*16], xmm10 | |
70 | vmovdqa [rsp + 5*16], xmm11 | |
71 | vmovdqa [rsp + 6*16], xmm14 | |
72 | vmovdqa [rsp + 7*16], xmm15 | |
73 | 73 | end_prolog |
74 | 74 | %endmacro |
75 | 75 | |
76 | 76 | %macro FUNC_RESTORE 0 |
77 | movdqa xmm6, [rsp + 0*16] | |
78 | movdqa xmm7, [rsp + 1*16] | |
79 | movdqa xmm8, [rsp + 2*16] | |
80 | movdqa xmm9, [rsp + 3*16] | |
81 | movdqa xmm10, [rsp + 4*16] | |
82 | movdqa xmm11, [rsp + 5*16] | |
83 | movdqa xmm14, [rsp + 6*16] | |
84 | movdqa xmm15, [rsp + 7*16] | |
77 | vmovdqa xmm6, [rsp + 0*16] | |
78 | vmovdqa xmm7, [rsp + 1*16] | |
79 | vmovdqa xmm8, [rsp + 2*16] | |
80 | vmovdqa xmm9, [rsp + 3*16] | |
81 | vmovdqa xmm10, [rsp + 4*16] | |
82 | vmovdqa xmm11, [rsp + 5*16] | |
83 | vmovdqa xmm14, [rsp + 6*16] | |
84 | vmovdqa xmm15, [rsp + 7*16] | |
85 | 85 | add rsp, stack_size |
86 | 86 | %endmacro |
87 | 87 | %endif |
124 | 124 | section .text |
125 | 125 | |
126 | 126 | align 16 |
127 | global pq_gen_avx:ISAL_SYM_TYPE_FUNCTION | |
127 | mk_global pq_gen_avx, function | |
128 | 128 | func(pq_gen_avx) |
129 | 129 | FUNC_SAVE |
130 | 130 | sub vec, 3 ;Keep as offset to last source |
45 | 45 | %define tmp r11 |
46 | 46 | %define tmp3 arg4 |
47 | 47 | %define return rax |
48 | %define func(x) x: | |
48 | %define func(x) x: endbranch | |
49 | 49 | %define FUNC_SAVE |
50 | 50 | %define FUNC_RESTORE |
51 | 51 | %endif |
125 | 125 | section .text |
126 | 126 | |
127 | 127 | align 16 |
128 | global pq_gen_avx2:ISAL_SYM_TYPE_FUNCTION | |
128 | mk_global pq_gen_avx2, function | |
129 | 129 | func(pq_gen_avx2) |
130 | 130 | FUNC_SAVE |
131 | 131 | sub vec, 3 ;Keep as offset to last source |
48 | 48 | %define tmp r11 |
49 | 49 | %define tmp3 arg4 |
50 | 50 | %define return rax |
51 | %define func(x) x: | |
51 | %define func(x) x: endbranch | |
52 | 52 | %define FUNC_SAVE |
53 | 53 | %define FUNC_RESTORE |
54 | 54 | %endif |
122 | 122 | section .text |
123 | 123 | |
124 | 124 | align 16 |
125 | global pq_gen_avx512:ISAL_SYM_TYPE_FUNCTION | |
125 | mk_global pq_gen_avx512, function | |
126 | 126 | func(pq_gen_avx512) |
127 | 127 | FUNC_SAVE |
128 | 128 | sub vec, 3 ;Keep as offset to last source |
45 | 45 | %define tmp r11 |
46 | 46 | %define tmp3 arg4 |
47 | 47 | %define return rax |
48 | %define func(x) x: | |
48 | %define func(x) x: endbranch | |
49 | 49 | %define FUNC_SAVE |
50 | 50 | %define FUNC_RESTORE |
51 | 51 | %endif |
121 | 121 | section .text |
122 | 122 | |
123 | 123 | align 16 |
124 | global pq_gen_sse:ISAL_SYM_TYPE_FUNCTION | |
124 | mk_global pq_gen_sse, function | |
125 | 125 | func(pq_gen_sse) |
126 | 126 | FUNC_SAVE |
127 | 127 | sub vec, 3 ;Keep as offset to last source |
45 | 45 | %define tmp r11 |
46 | 46 | %define return rax |
47 | 47 | %define PS 8 |
48 | %define func(x) x: | |
48 | %define func(x) x: endbranch | |
49 | 49 | %define FUNC_SAVE |
50 | 50 | %define FUNC_RESTORE |
51 | 51 | |
77 | 77 | %define arg1 ecx |
78 | 78 | %define return eax |
79 | 79 | %define PS 4 |
80 | %define func(x) x: | |
80 | %define func(x) x: endbranch | |
81 | 81 | %define arg(x) [ebp+8+PS*x] |
82 | 82 | %define arg2 edi ; must sav/restore |
83 | 83 | %define arg3 esi |
139 | 139 | section .text |
140 | 140 | |
141 | 141 | align 16 |
142 | global pq_gen_sse:ISAL_SYM_TYPE_FUNCTION | |
142 | mk_global pq_gen_sse, function | |
143 | 143 | func(pq_gen_sse) |
144 | 144 | FUNC_SAVE |
145 | 145 | sub vec, 3 ;Keep as offset to last source |
71 | 71 | ;;;; |
72 | 72 | ; pq_check multibinary function |
73 | 73 | ;;;; |
74 | global pq_check:ISAL_SYM_TYPE_FUNCTION | |
74 | mk_global pq_check, function | |
75 | 75 | pq_check_mbinit: |
76 | endbranch | |
76 | 77 | call pq_check_dispatch_init |
77 | 78 | pq_check: |
79 | endbranch | |
78 | 80 | jmp qword [pq_check_dispatched] |
79 | 81 | |
80 | 82 | pq_check_dispatch_init: |
103 | 105 | ;;;; |
104 | 106 | ; xor_check multibinary function |
105 | 107 | ;;;; |
106 | global xor_check:ISAL_SYM_TYPE_FUNCTION | |
108 | mk_global xor_check, function | |
107 | 109 | xor_check_mbinit: |
110 | endbranch | |
108 | 111 | call xor_check_dispatch_init |
109 | 112 | xor_check: |
113 | endbranch | |
110 | 114 | jmp qword [xor_check_dispatched] |
111 | 115 | |
112 | 116 | xor_check_dispatch_init: |
48 | 48 | %define tmp3 arg4 |
49 | 49 | %define return rax |
50 | 50 | %define PS 8 |
51 | %define func(x) x: | |
51 | %define func(x) x: endbranch | |
52 | 52 | %define FUNC_SAVE |
53 | 53 | %define FUNC_RESTORE |
54 | 54 | |
87 | 87 | %define tmp3 edx |
88 | 88 | %define return eax |
89 | 89 | %define PS 4 |
90 | %define func(x) x: | |
90 | %define func(x) x: endbranch | |
91 | 91 | %define arg(x) [ebp+8+PS*x] |
92 | 92 | %define arg2 edi ; must sav/restore |
93 | 93 | %define arg3 esi |
136 | 136 | section .text |
137 | 137 | |
138 | 138 | align 16 |
139 | global xor_check_sse:ISAL_SYM_TYPE_FUNCTION | |
139 | mk_global xor_check_sse, function | |
140 | 140 | func(xor_check_sse) |
141 | 141 | FUNC_SAVE |
142 | 142 | %ifidn PS,8 ;64-bit code |
44 | 44 | %define arg5 r9 |
45 | 45 | %define tmp r11 |
46 | 46 | %define tmp3 arg4 |
47 | %define func(x) x: | |
47 | %define func(x) x: endbranch | |
48 | 48 | %define return rax |
49 | 49 | %define FUNC_SAVE |
50 | 50 | %define FUNC_RESTORE |
99 | 99 | section .text |
100 | 100 | |
101 | 101 | align 16 |
102 | global xor_gen_avx:ISAL_SYM_TYPE_FUNCTION | |
102 | mk_global xor_gen_avx, function | |
103 | 103 | func(xor_gen_avx) |
104 | 104 | |
105 | 105 | FUNC_SAVE |
46 | 46 | %define arg5 r9 |
47 | 47 | %define tmp r11 |
48 | 48 | %define tmp3 arg4 |
49 | %define func(x) x: | |
49 | %define func(x) x: endbranch | |
50 | 50 | %define return rax |
51 | 51 | %define FUNC_SAVE |
52 | 52 | %define FUNC_RESTORE |
102 | 102 | section .text |
103 | 103 | |
104 | 104 | align 16 |
105 | global xor_gen_avx512:ISAL_SYM_TYPE_FUNCTION | |
105 | mk_global xor_gen_avx512, function | |
106 | 106 | func(xor_gen_avx512) |
107 | 107 | FUNC_SAVE |
108 | 108 | sub vec, 2 ;Keep as offset to last source |
48 | 48 | %define tmp3 arg4 |
49 | 49 | %define return rax |
50 | 50 | %define PS 8 |
51 | %define func(x) x: | |
51 | %define func(x) x: endbranch | |
52 | 52 | %define FUNC_SAVE |
53 | 53 | %define FUNC_RESTORE |
54 | 54 | |
87 | 87 | %define tmp3 edx |
88 | 88 | %define return eax |
89 | 89 | %define PS 4 |
90 | %define func(x) x: | |
90 | %define func(x) x: endbranch | |
91 | 91 | %define arg(x) [ebp+8+PS*x] |
92 | 92 | %define arg2 edi ; must sav/restore |
93 | 93 | %define arg3 esi |
136 | 136 | section .text |
137 | 137 | |
138 | 138 | align 16 |
139 | global xor_gen_sse:ISAL_SYM_TYPE_FUNCTION | |
139 | mk_global xor_gen_sse, function | |
140 | 140 | func(xor_gen_sse) |
141 | 141 | FUNC_SAVE |
142 | 142 | %ifidn PS,8 ;64-bit code |
0 | # Regenerate nmake file from makefiles or check its consistency | |
1 | ||
2 | test_nmake_file: tst.nmake | |
3 | @diff -u Makefile.nmake tst.nmake || (echo Potential nmake consistency issue; $(RM) tst.nmake; false;) | |
4 | @echo No nmake consistency issues | |
5 | @$(RM) tst.nmake | |
6 | ||
7 | FORCE: | |
8 | Makefile.nmake tst.nmake: FORCE | |
9 | @echo Regenerating $@ | |
10 | @echo '########################################################################' > $@ | |
11 | @cat LICENSE | sed -e 's/^/#/ ' >> $@ | |
12 | @echo '########################################################################' >> $@ | |
13 | @echo '' >> $@ | |
14 | @echo '# This file can be auto-regenerated with $$make -f Makefile.unx Makefile.nmake' >> $@ | |
15 | @echo '' >> $@ | |
16 | @echo -n 'objs =' >> $@ | |
17 | @$(foreach o, $(subst /,\\,$(objs:.o=.obj)), printf " %s\n\t%s" \\ $(o) >> $@; ) | |
18 | @echo '' >> $@ | |
19 | @echo '' >> $@ | |
20 | @echo 'INCLUDES = $(INCLUDE)' >> $@ | |
21 | @echo '# Modern asm feature level, consider upgrading nasm/yasm before decreasing feature_level' >> $@ | |
22 | @echo 'FEAT_FLAGS = -DHAVE_AS_KNOWS_AVX512 -DAS_FEATURE_LEVEL=10' >> $@ | |
23 | @echo 'CFLAGS_REL = -O2 -DNDEBUG /Z7 /MD /Gy' >> $@ | |
24 | @echo 'CFLAGS_DBG = -Od -DDEBUG /Z7 /MDd' >> $@ | |
25 | @echo 'LINKFLAGS = -nologo -incremental:no -debug' >> $@ | |
26 | @echo 'CFLAGS = $$(CFLAGS_REL) -nologo -D_USE_MATH_DEFINES $$(FEAT_FLAGS) $$(INCLUDES) $$(D)' >> $@ | |
27 | @echo 'AFLAGS = -f win64 $$(FEAT_FLAGS) $$(INCLUDES) $$(D)' >> $@ | |
28 | @echo 'CC = cl' >> $@ | |
29 | @echo '# or CC = icl -Qstd=c99' >> $@ | |
30 | @echo 'AS = nasm' >> $@ | |
31 | @echo '' >> $@ | |
32 | @echo 'lib: bin static dll' >> $@ | |
33 | @echo 'static: bin isa-l_static.lib' >> $@ | |
34 | @echo 'dll: bin isa-l.dll' >> $@ | |
35 | @echo '' >> $@ | |
36 | @echo 'bin: ; -mkdir $$@' >> $@ | |
37 | @echo '' >> $@ | |
38 | @echo 'isa-l_static.lib: $$(objs)' >> $@ | |
39 | @echo ' lib -out:$$@ @<<' >> $@ | |
40 | @echo '$$?' >> $@ | |
41 | @echo '<<' >> $@ | |
42 | @echo '' >> $@ | |
43 | @echo 'isa-l.dll: $$(objs)' >> $@ | |
44 | @echo ' link -out:$$@ -dll -def:isa-l.def $$(LINKFLAGS) @<<' >> $@ | |
45 | @echo '$$?' >> $@ | |
46 | @echo '<<' >> $@ | |
47 | @echo '' >> $@ | |
48 | @$(foreach b, $(units), \ | |
49 | printf "{%s}.c.obj:\n\t\$$(CC) \$$(CFLAGS) /c -Fo\$$@ \$$?\n{%s}.asm.obj:\n\t\$$(AS) \$$(AFLAGS) -o \$$@ \$$?\n\n" $(b) $(b) >> $@; ) | |
50 | @echo '' >> $@ | |
51 | ifneq (,$(examples)) | |
52 | @echo "# Examples" >> $@ | |
53 | @echo -n 'ex =' >> $@ | |
54 | @$(foreach ex, $(notdir $(examples)), printf " %s\n\t%s.exe" \\ $(ex) >> $@; ) | |
55 | @echo '' >> $@ | |
56 | @echo '' >> $@ | |
57 | @echo 'ex: lib $$(ex)' >> $@ | |
58 | @echo '' >> $@ | |
59 | @echo '$$(ex): $$(@B).obj' >> $@ | |
60 | endif | |
61 | @echo '' >> $@ | |
62 | @echo '.obj.exe:' >> $@ | |
63 | @echo ' link /out:$$@ $$(LINKFLAGS) isa-l.lib $$?' >> $@ | |
64 | @echo '' >> $@ | |
65 | @echo '# Check tests' >> $@ | |
66 | @echo -n 'checks =' >> $@ | |
67 | @$(foreach check, $(notdir $(check_tests)), printf " %s\n\t%s.exe" \\ $(check) >> $@; ) | |
68 | @echo '' >> $@ | |
69 | @echo '' >> $@ | |
70 | @echo 'checks: lib $$(checks)' >> $@ | |
71 | @echo '$$(checks): $$(@B).obj' >> $@ | |
72 | @echo 'check: $$(checks)' >> $@ | |
73 | @echo ' !$$?' >> $@ | |
74 | @echo '' >> $@ | |
75 | @echo '# Unit tests' >> $@ | |
76 | @echo -n 'tests =' >> $@ | |
77 | @$(foreach test, $(notdir $(unit_tests)), printf " %s\n\t%s.exe" \\ $(test) >> $@; ) | |
78 | @echo '' >> $@ | |
79 | @echo '' >> $@ | |
80 | @echo 'tests: lib $$(tests)' >> $@ | |
81 | @echo '$$(tests): $$(@B).obj' >> $@ | |
82 | @echo '' >> $@ | |
83 | @echo '# Performance tests' >> $@ | |
84 | @echo -n 'perfs =' >> $@ | |
85 | @$(foreach perf, $(notdir $(perf_tests)), printf " %s\n\t%s.exe" \\ $(perf) >> $@; ) | |
86 | @echo '' >> $@ | |
87 | @echo '' >> $@ | |
88 | @echo 'perfs: lib $$(perfs)' >> $@ | |
89 | @echo '$$(perfs): $$(@B).obj' >> $@ | |
90 | @echo '' >> $@ | |
91 | @echo -n 'progs =' >> $@ | |
92 | @$(foreach prog, $(notdir $(bin_PROGRAMS)), printf " %s\n\t%s.exe" \\ $(prog) >> $@; ) | |
93 | @echo '' >> $@ | |
94 | @echo '' >> $@ | |
95 | @echo 'progs: lib $$(progs)' >> $@ | |
96 | @$(foreach p, $(notdir $(bin_PROGRAMS)), \ | |
97 | printf "%s.exe: %s\n\tlink /out:\$$@ \$$(LINKFLAGS) isa-l.lib \$$?\n" $(p) $(subst /,\\,$(programs_$(p)_SOURCES:.c=.obj)) >> $@; ) | |
98 | @echo '' >> $@ | |
99 | @echo 'clean:' >> $@ | |
100 | @echo ' -if exist *.obj del *.obj' >> $@ | |
101 | @echo ' -if exist bin\*.obj del bin\*.obj' >> $@ | |
102 | @echo ' -if exist isa-l_static.lib del isa-l_static.lib' >> $@ | |
103 | @echo ' -if exist *.exe del *.exe' >> $@ | |
104 | @echo ' -if exist *.pdb del *.pdb' >> $@ | |
105 | @echo ' -if exist isa-l.lib del isa-l.lib' >> $@ | |
106 | @echo ' -if exist isa-l.dll del isa-l.dll' >> $@ | |
107 | @echo ' -if exist isa-l.exp del isa-l.exp' >> $@ | |
108 | @echo '' >> $@ | |
109 | $(if $(findstring igzip,$(units)),@echo 'zlib.lib:' >> $@ ) | |
110 | @cat $(foreach unit,$(units), $(unit)/Makefile.am) | sed \ | |
111 | -e '/: /!d' \ | |
112 | -e 's/\([^ :]*\)[ ]*/\1.exe /g' \ | |
113 | -e :c -e 's/:\(.*\).exe/:\1/;tc' \ | |
114 | -e 's/\.o[ $$]/.obj /g' \ | |
115 | -e 's/\.o\.exe[ ]:/.obj:/g' \ | |
116 | -e '/CFLAGS_.*+=/d' \ | |
117 | -e '/:.*\%.*:/d' \ | |
118 | -e 's/ :/:/' \ | |
119 | -e 's/LDLIBS *+=//' \ | |
120 | -e 's/-lz/zlib.lib/' \ | |
121 | -e 's/ $$//' \ | |
122 | >> $@ |
0 | #/bin/sh | |
1 | ||
2 | # Filter out unnecessary options added by automake | |
3 | ||
4 | while [ -n "$*" ]; do | |
5 | case "$1" in | |
6 | -o ) | |
7 | # Supported options with arg | |
8 | options="$options $1 $2" | |
9 | shift | |
10 | object="$1" | |
11 | shift | |
12 | ;; | |
13 | -f | -D ) | |
14 | # Supported options with arg | |
15 | options="$options $1 $2" | |
16 | shift | |
17 | shift | |
18 | ;; | |
19 | -I | -i ) | |
20 | options="$options $1 $2/" | |
21 | shift | |
22 | shift | |
23 | ;; | |
24 | --prefix* ) | |
25 | # Supported options without arg | |
26 | options="$options $1" | |
27 | shift | |
28 | ;; | |
29 | -I* | -i* ) | |
30 | options="$options $1/" | |
31 | shift | |
32 | ;; | |
33 | -D* ) # For defines we need to remove spaces | |
34 | case "$1" in | |
35 | *' '* ) ;; | |
36 | *) options="$options $1" ;; | |
37 | esac | |
38 | shift | |
39 | ;; | |
40 | #-blah ) | |
41 | # Unsupported options with args - none known | |
42 | -* ) | |
43 | # Unsupported options with no args | |
44 | shift | |
45 | ;; | |
46 | * ) | |
47 | args="$args $1" | |
48 | shift | |
49 | ;; | |
50 | esac | |
51 | done | |
52 | ||
53 | nasm $options $args | |
54 | $CET_LD -r -z ibt -z shstk -o $object.tmp $object | |
55 | mv $object.tmp $object |
157 | 157 | # Test custom hufftables |
158 | 158 | test_start "generate_custom_hufftables" |
159 | 159 | ./generate_custom_hufftables $in_file |
160 | $MAKE -f Makefile.unx clean | |
160 | 161 | $MAKE -f Makefile.unx -j $cpus D="NO_STATIC_INFLATE_H" checks |
161 | 162 | ./igzip_rand_test $in_file |
162 | ./generate_static_inflate | |
163 | diff -q static_inflate.h igzip/static_inflate.h | |
164 | rm -rf static_inflate.h | |
165 | 163 | rm -rf hufftables_c.c |
166 | 164 | test_end "generate_custom_hufftables" $? |
167 | 165 | |
168 | 166 | msg+=$'Custom hufftable build: Pass\n' |
169 | 167 | |
170 | 168 | $MAKE -f Makefile.unx clean |
169 | ||
170 | test_start "nmake_file_consistency" | |
171 | $MAKE -f Makefile.unx host_cpu="x86_64" test_nmake_file | |
172 | test_end "nmake_file_consistency" $? | |
173 | msg+=$'Nmake file consistency: Pass\n' | |
171 | 174 | |
172 | 175 | # noarch build |
173 | 176 | test_start "noarch_build" |
0 | #/bin/sh | |
1 | ||
2 | # Filter out unnecessary options added by automake | |
3 | ||
4 | while [ -n "$*" ]; do | |
5 | case "$1" in | |
6 | -o ) | |
7 | # Supported options with arg | |
8 | options="$options $1 $2" | |
9 | shift | |
10 | object="$1" | |
11 | shift | |
12 | ;; | |
13 | -f | -I | -i | -D ) | |
14 | # Supported options with arg | |
15 | options="$options $1 $2" | |
16 | shift | |
17 | shift | |
18 | ;; | |
19 | -I* | -i* | --prefix* ) | |
20 | # Supported options without arg | |
21 | options="$options $1" | |
22 | shift | |
23 | ;; | |
24 | -D* ) # For defines we need to remove spaces | |
25 | case "$1" in | |
26 | *' '* ) ;; | |
27 | *) options="$options $1" ;; | |
28 | esac | |
29 | shift | |
30 | ;; | |
31 | #-blah ) | |
32 | # Unsupported options with args - none known | |
33 | -* ) | |
34 | # Unsupported options with no args | |
35 | shift | |
36 | ;; | |
37 | * ) | |
38 | args="$args $1" | |
39 | shift | |
40 | ;; | |
41 | esac | |
42 | done | |
43 | ||
44 | yasm $options $args | |
45 | $CET_LD -r -z ibt -z shstk -o $object.tmp $object | |
46 | mv $object.tmp $object |