Codebase list libisal / 5c82c6b
Merge tag '2.30.0' into debian/victoria ISA-L 2.30 release Ondřej Nový 3 years ago
159 changed file(s) with 5767 addition(s) and 4398 deletion(s). Raw diff Collapse all Expand all
+0
-89
.drone.yml less more
0 kind: pipeline
1 name: arm64-linux-gcc-5.4
2
3 platform:
4 os: linux
5 arch: arm64
6
7 steps:
8 - name: arm64-linux-gcc-5.4
9 image: ubuntu:xenial
10 environment:
11 C_COMPILER: gcc
12 commands:
13 - if [ -n "$C_COMPILER" ]; then export CC="$C_COMPILER"; fi
14 - if [ -n "$AS_ASSEMBL" ]; then export AS="$AS_ASSEMBL"; fi
15 - apt-get -qq update
16 - apt-get install -qq -y build-essential git indent libtool libz-dev yasm autoconf
17 - if [ -n "$CC" ]; then $CC --version; fi
18 - if [ -n "$AS" ]; then $AS --version; fi
19 - ./tools/test_autorun.sh "$TEST_TYPE"
20
21 ---
22 kind: pipeline
23 name: arm64-linux-gcc-4.7
24
25 platform:
26 os: linux
27 arch: arm64
28
29 steps:
30 - name: arm64-linux-gcc-4.7
31 image: ubuntu:xenial
32 environment:
33 C_COMPILER: gcc-4.7
34 commands:
35 - if [ -n "$C_COMPILER" ]; then export CC="$C_COMPILER"; fi
36 - if [ -n "$AS_ASSEMBL" ]; then export AS="$AS_ASSEMBL"; fi
37 - apt-get -qq update
38 - apt-get install -qq -y build-essential git indent libtool libz-dev software-properties-common yasm autoconf
39 - add-apt-repository -y ppa:ubuntu-toolchain-r/test
40 - apt-get -qq update
41 - apt-get install -qq -y g++-4.7
42 - if [ -n "$CC" ]; then $CC --version; fi
43 - if [ -n "$AS" ]; then $AS --version; fi
44 - ./tools/test_autorun.sh "$TEST_TYPE"
45
46 ---
47 kind: pipeline
48 name: arm64-linux-gcc-6
49
50 platform:
51 os: linux
52 arch: arm64
53
54 steps:
55 - name: arm64-linux-gcc-6
56 image: debian:9
57 environment:
58 C_COMPILER: gcc
59 commands:
60 - if [ -n "$C_COMPILER" ]; then export CC="$C_COMPILER"; fi
61 - if [ -n "$AS_ASSEMBL" ]; then export AS="$AS_ASSEMBL"; fi
62 - apt-get -q update
63 - apt-get install -y build-essential git indent libtool libz-dev software-properties-common yasm autoconf
64 - if [ -n "$CC" ]; then $CC --version; fi
65 - if [ -n "$AS" ]; then $AS --version; fi
66 - ./tools/test_autorun.sh "$TEST_TYPE"
67
68 ---
69 kind: pipeline
70 name: arm64-linux-extended-tests
71
72 platform:
73 os: linux
74 arch: arm64
75
76 steps:
77 - name: arm64-linux-extended-tests
78 image: ubuntu:xenial
79 environment:
80 TEST_TYPE: ext
81 commands:
82 - if [ -n "$C_COMPILER" ]; then export CC="$C_COMPILER"; fi
83 - if [ -n "$AS_ASSEMBL" ]; then export AS="$AS_ASSEMBL"; fi
84 - apt-get -qq update
85 - apt-get install -qq -y build-essential git indent libtool libz-dev software-properties-common yasm autoconf
86 - if [ -n "$CC" ]; then $CC --version; fi
87 - if [ -n "$AS" ]; then $AS --version; fi
88 - ./tools/test_autorun.sh "$TEST_TYPE"
1212 include \
1313 README.md \
1414 CONTRIBUTING.md \
15 Release_notes.txt
15 Release_notes.txt \
16 doc/test.md \
17 doc/build.md
1618
1719 EXCLUDE = include/test.h include/types.h include/unaligned.h
1820 EXCLUDE_PATTERNS = */include/*_multibinary.h
5050
5151 # LIB version info not necessarily the same as package version
5252 LIBISAL_CURRENT=2
53 LIBISAL_REVISION=29
53 LIBISAL_REVISION=30
5454 LIBISAL_AGE=0
5555
5656 lib_LTLIBRARIES = libisal.la
116116 @echo Completed run: $<
117117
118118 # Support for yasm/nasm/gas
119 if INTEL_CET_ENABLED
120 export CET_LD=$(LD)
121 endif
119122 if USE_YASM
123 if INTEL_CET_ENABLED
124 as_filter = ${srcdir}/tools/yasm-cet-filter.sh
125 else
120126 as_filter = ${srcdir}/tools/yasm-filter.sh
121127 endif
128 endif
122129 if USE_NASM
130 if INTEL_CET_ENABLED
131 as_filter = ${srcdir}/tools/nasm-cet-filter.sh
132 else
123133 as_filter = ${srcdir}/tools/nasm-filter.sh
134 endif
124135 endif
125136 if CPU_AARCH64
126137 as_filter = $(CC) -D__ASSEMBLY__
128139
129140 CCAS = $(as_filter)
130141 EXTRA_DIST += tools/yasm-filter.sh tools/nasm-filter.sh
142 EXTRA_DIST += tools/yasm-cet-filter.sh tools/nasm-cet-filter.sh
131143
132144 AM_CFLAGS = ${my_CFLAGS} ${INCLUDE} $(src_include) ${D}
133145 if CPU_AARCH64
00 ########################################################################
1 # Copyright(c) 2011-2016 Intel Corporation All rights reserved.
1 # Copyright(c) 2011-2017 Intel Corporation All rights reserved.
22 #
33 # Redistribution and use in source and binary forms, with or without
44 # modification, are permitted provided that the following conditions
2626 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2727 ########################################################################
2828
29 objs = \
29 # This file can be auto-regenerated with $make -f Makefile.unx Makefile.nmake
30
31 objs = \
3032 bin\ec_base.obj \
33 bin\raid_base.obj \
34 bin\crc_base.obj \
35 bin\crc64_base.obj \
36 bin\igzip.obj \
37 bin\hufftables_c.obj \
38 bin\igzip_base.obj \
39 bin\igzip_icf_base.obj \
40 bin\adler32_base.obj \
41 bin\flatten_ll.obj \
42 bin\encode_df.obj \
43 bin\igzip_icf_body.obj \
44 bin\huff_codes.obj \
45 bin\igzip_inflate.obj \
46 bin\mem_zero_detect_base.obj \
3147 bin\ec_highlevel_func.obj \
32 bin\ec_multibinary.obj \
33 bin\gf_2vect_dot_prod_avx.obj \
34 bin\gf_2vect_dot_prod_avx2.obj \
35 bin\gf_2vect_dot_prod_avx512.obj \
36 bin\gf_2vect_dot_prod_sse.obj \
37 bin\gf_2vect_mad_avx.obj \
38 bin\gf_2vect_mad_avx2.obj \
39 bin\gf_2vect_mad_avx512.obj \
40 bin\gf_2vect_mad_sse.obj \
41 bin\gf_3vect_dot_prod_avx.obj \
42 bin\gf_3vect_dot_prod_avx2.obj \
43 bin\gf_3vect_dot_prod_avx512.obj \
44 bin\gf_3vect_dot_prod_sse.obj \
45 bin\gf_3vect_mad_avx.obj \
46 bin\gf_3vect_mad_avx2.obj \
47 bin\gf_3vect_mad_avx512.obj \
48 bin\gf_3vect_mad_sse.obj \
49 bin\gf_4vect_dot_prod_avx.obj \
50 bin\gf_4vect_dot_prod_avx2.obj \
51 bin\gf_4vect_dot_prod_avx512.obj \
52 bin\gf_4vect_dot_prod_sse.obj \
53 bin\gf_4vect_mad_avx.obj \
54 bin\gf_4vect_mad_avx2.obj \
55 bin\gf_4vect_mad_avx512.obj \
56 bin\gf_4vect_mad_sse.obj \
57 bin\gf_5vect_dot_prod_avx.obj \
58 bin\gf_5vect_dot_prod_avx2.obj \
59 bin\gf_5vect_dot_prod_sse.obj \
60 bin\gf_5vect_mad_avx.obj \
61 bin\gf_5vect_mad_avx2.obj \
62 bin\gf_5vect_mad_sse.obj \
63 bin\gf_6vect_dot_prod_avx.obj \
64 bin\gf_6vect_dot_prod_avx2.obj \
65 bin\gf_6vect_dot_prod_sse.obj \
66 bin\gf_6vect_mad_avx.obj \
67 bin\gf_6vect_mad_avx2.obj \
68 bin\gf_6vect_mad_sse.obj \
48 bin\gf_vect_mul_sse.obj \
49 bin\gf_vect_mul_avx.obj \
50 bin\gf_vect_dot_prod_sse.obj \
6951 bin\gf_vect_dot_prod_avx.obj \
7052 bin\gf_vect_dot_prod_avx2.obj \
53 bin\gf_2vect_dot_prod_sse.obj \
54 bin\gf_3vect_dot_prod_sse.obj \
55 bin\gf_4vect_dot_prod_sse.obj \
56 bin\gf_5vect_dot_prod_sse.obj \
57 bin\gf_6vect_dot_prod_sse.obj \
58 bin\gf_2vect_dot_prod_avx.obj \
59 bin\gf_3vect_dot_prod_avx.obj \
60 bin\gf_4vect_dot_prod_avx.obj \
61 bin\gf_5vect_dot_prod_avx.obj \
62 bin\gf_6vect_dot_prod_avx.obj \
63 bin\gf_2vect_dot_prod_avx2.obj \
64 bin\gf_3vect_dot_prod_avx2.obj \
65 bin\gf_4vect_dot_prod_avx2.obj \
66 bin\gf_5vect_dot_prod_avx2.obj \
67 bin\gf_6vect_dot_prod_avx2.obj \
68 bin\gf_vect_mad_sse.obj \
69 bin\gf_2vect_mad_sse.obj \
70 bin\gf_3vect_mad_sse.obj \
71 bin\gf_4vect_mad_sse.obj \
72 bin\gf_5vect_mad_sse.obj \
73 bin\gf_6vect_mad_sse.obj \
74 bin\gf_vect_mad_avx.obj \
75 bin\gf_2vect_mad_avx.obj \
76 bin\gf_3vect_mad_avx.obj \
77 bin\gf_4vect_mad_avx.obj \
78 bin\gf_5vect_mad_avx.obj \
79 bin\gf_6vect_mad_avx.obj \
80 bin\gf_vect_mad_avx2.obj \
81 bin\gf_2vect_mad_avx2.obj \
82 bin\gf_3vect_mad_avx2.obj \
83 bin\gf_4vect_mad_avx2.obj \
84 bin\gf_5vect_mad_avx2.obj \
85 bin\gf_6vect_mad_avx2.obj \
86 bin\ec_multibinary.obj \
7187 bin\gf_vect_dot_prod_avx512.obj \
72 bin\gf_vect_dot_prod_sse.obj \
73 bin\gf_vect_mad_avx.obj \
74 bin\gf_vect_mad_avx2.obj \
88 bin\gf_2vect_dot_prod_avx512.obj \
89 bin\gf_3vect_dot_prod_avx512.obj \
90 bin\gf_4vect_dot_prod_avx512.obj \
91 bin\gf_5vect_dot_prod_avx512.obj \
92 bin\gf_6vect_dot_prod_avx512.obj \
7593 bin\gf_vect_mad_avx512.obj \
76 bin\gf_vect_mad_sse.obj \
77 bin\gf_vect_mul_avx.obj \
78 bin\gf_vect_mul_sse.obj \
94 bin\gf_2vect_mad_avx512.obj \
95 bin\gf_3vect_mad_avx512.obj \
96 bin\gf_4vect_mad_avx512.obj \
97 bin\gf_5vect_mad_avx512.obj \
98 bin\gf_6vect_mad_avx512.obj \
99 bin\xor_gen_sse.obj \
100 bin\pq_gen_sse.obj \
101 bin\xor_check_sse.obj \
79102 bin\pq_check_sse.obj \
80103 bin\pq_gen_avx.obj \
104 bin\xor_gen_avx.obj \
81105 bin\pq_gen_avx2.obj \
106 bin\xor_gen_avx512.obj \
82107 bin\pq_gen_avx512.obj \
83 bin\pq_gen_sse.obj \
84 bin\raid_base.obj \
85108 bin\raid_multibinary.obj \
86 bin\xor_check_sse.obj \
87 bin\xor_gen_avx.obj \
88 bin\xor_gen_avx512.obj \
89 bin\xor_gen_sse.obj \
90109 bin\crc16_t10dif_01.obj \
91110 bin\crc16_t10dif_by4.obj \
92111 bin\crc16_t10dif_02.obj \
112 bin\crc16_t10dif_by16_10.obj \
93113 bin\crc16_t10dif_copy_by4.obj \
94114 bin\crc16_t10dif_copy_by4_02.obj \
95115 bin\crc32_ieee_01.obj \
96116 bin\crc32_ieee_02.obj \
97117 bin\crc32_ieee_by4.obj \
118 bin\crc32_ieee_by16_10.obj \
119 bin\crc32_iscsi_01.obj \
98120 bin\crc32_iscsi_00.obj \
99 bin\crc32_iscsi_01.obj \
100 bin\crc64_base.obj \
121 bin\crc32_iscsi_by16_10.obj \
122 bin\crc_multibinary.obj \
123 bin\crc64_multibinary.obj \
124 bin\crc64_ecma_refl_by8.obj \
125 bin\crc64_ecma_refl_by16_10.obj \
101126 bin\crc64_ecma_norm_by8.obj \
102 bin\crc64_ecma_refl_by8.obj \
127 bin\crc64_ecma_norm_by16_10.obj \
128 bin\crc64_iso_refl_by8.obj \
129 bin\crc64_iso_refl_by16_10.obj \
103130 bin\crc64_iso_norm_by8.obj \
104 bin\crc64_iso_refl_by8.obj \
131 bin\crc64_iso_norm_by16_10.obj \
132 bin\crc64_jones_refl_by8.obj \
133 bin\crc64_jones_refl_by16_10.obj \
105134 bin\crc64_jones_norm_by8.obj \
106 bin\crc64_jones_refl_by8.obj \
107 bin\crc64_multibinary.obj \
108 bin\crc_base.obj \
109 bin\adler32_base.obj \
110 bin\crc_multibinary.obj \
111 bin\huff_codes.obj \
112 bin\hufftables_c.obj \
113 bin\igzip.obj \
114 bin\igzip_base.obj \
135 bin\crc64_jones_norm_by16_10.obj \
136 bin\crc32_gzip_refl_by8.obj \
137 bin\crc32_gzip_refl_by8_02.obj \
138 bin\crc32_gzip_refl_by16_10.obj \
115139 bin\igzip_body.obj \
116 bin\igzip_decode_block_stateless_01.obj \
117 bin\igzip_decode_block_stateless_04.obj \
118140 bin\igzip_finish.obj \
119 bin\flatten_ll.obj \
120 bin\encode_df.obj \
121 bin\encode_df_04.obj \
122 bin\proc_heap.obj \
123141 bin\igzip_icf_body_h1_gr_bt.obj \
124142 bin\igzip_icf_finish.obj \
125 bin\igzip_icf_base.obj \
126 bin\igzip_inflate.obj \
127 bin\igzip_inflate_multibinary.obj \
143 bin\rfc1951_lookup.obj \
144 bin\adler32_sse.obj \
145 bin\adler32_avx2_4.obj \
128146 bin\igzip_multibinary.obj \
129147 bin\igzip_update_histogram_01.obj \
130148 bin\igzip_update_histogram_04.obj \
131 bin\rfc1951_lookup.obj \
132 bin\crc32_gzip_refl_by8.obj \
133 bin\crc32_gzip_refl_by8_02.obj \
134 bin\crc32_gzip_refl_by16_10.obj \
135 bin\adler32_sse.obj \
136 bin\adler32_avx2_4.obj \
149 bin\igzip_decode_block_stateless_01.obj \
150 bin\igzip_decode_block_stateless_04.obj \
151 bin\igzip_inflate_multibinary.obj \
152 bin\encode_df_04.obj \
153 bin\encode_df_06.obj \
154 bin\proc_heap.obj \
137155 bin\igzip_deflate_hash.obj \
156 bin\igzip_gen_icf_map_lh1_06.obj \
138157 bin\igzip_gen_icf_map_lh1_04.obj \
139 bin\igzip_gen_icf_map_lh1_06.obj \
140158 bin\igzip_set_long_icf_fg_04.obj \
141159 bin\igzip_set_long_icf_fg_06.obj \
142 bin\igzip_icf_body.obj \
143160 bin\mem_zero_detect_avx.obj \
144 bin\mem_zero_detect_base.obj \
145 bin\mem_multibinary.obj \
146 bin\mem_zero_detect_sse.obj
147
148 INCLUDES = -I./ -Ierasure_code/ -Iraid/ -Icrc/ -Iigzip/ -Iinclude/ -Imem/
149 LINKFLAGS = /nologo
150 CFLAGS = -O2 -D NDEBUG /nologo -D_USE_MATH_DEFINES -Qstd=c99 $(INCLUDES) $(D)
151 AFLAGS = -f win64 $(INCLUDES) $(D)
152 CC = icl
153 AS = yasm
161 bin\mem_zero_detect_sse.obj \
162 bin\mem_multibinary.obj
163
164 INCLUDES = -I./ -Ierasure_code/ -Iraid/ -Icrc/ -Iigzip/ -Iprograms/ -Imem/ -Iinclude/ -Itests/fuzz/ -Iexamples/ec/
165 # Modern asm feature level, consider upgrading nasm/yasm before decreasing feature_level
166 FEAT_FLAGS = -DHAVE_AS_KNOWS_AVX512 -DAS_FEATURE_LEVEL=10
167 CFLAGS_REL = -O2 -DNDEBUG /Z7 /MD /Gy
168 CFLAGS_DBG = -Od -DDEBUG /Z7 /MDd
169 LINKFLAGS = -nologo -incremental:no -debug
170 CFLAGS = $(CFLAGS_REL) -nologo -D_USE_MATH_DEFINES $(FEAT_FLAGS) $(INCLUDES) $(D)
171 AFLAGS = -f win64 $(FEAT_FLAGS) $(INCLUDES) $(D)
172 CC = cl
173 # or CC = icl -Qstd=c99
174 AS = nasm
154175
155176 lib: bin static dll
156177 static: bin isa-l_static.lib
164185 <<
165186
166187 isa-l.dll: $(objs)
167 link -out:$@ -dll -def:isa-l.def @<<
188 link -out:$@ -dll -def:isa-l.def $(LINKFLAGS) @<<
168189 $?
169190 <<
170191
188209 {igzip}.asm.obj:
189210 $(AS) $(AFLAGS) -o $@ $?
190211
212 {programs}.c.obj:
213 $(CC) $(CFLAGS) /c -Fo$@ $?
214 {programs}.asm.obj:
215 $(AS) $(AFLAGS) -o $@ $?
216
191217 {mem}.c.obj:
192218 $(CC) $(CFLAGS) /c -Fo$@ $?
193219 {mem}.asm.obj:
194220 $(AS) $(AFLAGS) -o $@ $?
195221
222
196223 # Examples
197 ex = xor_example.exe crc_simple_test.exe crc64_example.exe igzip_example.exe igzip_sync_flush_example.exe
224 ex = \
225 xor_example.exe \
226 crc_simple_test.exe \
227 crc64_example.exe \
228 igzip_example.exe \
229 igzip_sync_flush_example.exe \
230 ec_simple_example.exe \
231 ec_piggyback_example.exe
232
198233 ex: lib $(ex)
199234
200235 $(ex): $(@B).obj
214249 pq_check_test.exe \
215250 crc16_t10dif_test.exe \
216251 crc16_t10dif_copy_test.exe \
252 crc64_funcs_test.exe \
217253 crc32_funcs_test.exe \
218 crc64_funcs_test.exe \
254 igzip_rand_test.exe \
219255 igzip_wrapper_hdr_test.exe \
220 igzip_rand_test.exe \
256 checksum32_funcs_test.exe \
221257 mem_zero_detect_test.exe
222258
223259 checks: lib $(checks)
243279 gf_vect_dot_prod_1tbl.exe \
244280 erasure_code_perf.exe \
245281 erasure_code_base_perf.exe \
246 erasure_code_sse_perf.exe \
247282 erasure_code_update_perf.exe \
248283 xor_gen_perf.exe \
249284 pq_gen_perf.exe \
250285 crc16_t10dif_perf.exe \
286 crc16_t10dif_copy_perf.exe \
287 crc16_t10dif_op_perf.exe \
251288 crc32_ieee_perf.exe \
252289 crc32_iscsi_perf.exe \
253 igzip_perf.exe \
254 igzip_sync_flush_perf.exe \
290 crc64_funcs_perf.exe \
255291 crc32_gzip_refl_perf.exe \
292 adler32_perf.exe \
256293 mem_zero_detect_perf.exe
257294
258295 perfs: lib $(perfs)
259296 $(perfs): $(@B).obj
297
298 progs = \
299 igzip.exe
300
301 progs: lib $(progs)
302 igzip.exe: programs\igzip_cli.obj
303 link /out:$@ $(LINKFLAGS) isa-l.lib $?
260304
261305 clean:
262306 -if exist *.obj del *.obj
263307 -if exist bin\*.obj del bin\*.obj
264308 -if exist isa-l_static.lib del isa-l_static.lib
265309 -if exist *.exe del *.exe
310 -if exist *.pdb del *.pdb
266311 -if exist isa-l.lib del isa-l.lib
267312 -if exist isa-l.dll del isa-l.dll
313 -if exist isa-l.exp del isa-l.exp
268314
269315 zlib.lib:
316 igzip_perf.exe: zlib.lib
270317 igzip_inflate_test.exe: zlib.lib
5050 lib_name := bin/isa-l.a
5151
5252 include make.inc
53 include tools/gen_nmake.mk
5354
5455 VPATH = . $(units) include tests/fuzz examples/ec
11 =================================================
22
33 [![Build Status](https://travis-ci.org/intel/isa-l.svg?branch=master)](https://travis-ci.org/intel/isa-l)
4 [![Package on conda-forge](https://img.shields.io/conda/v/conda-forge/isa-l.svg)](https://anaconda.org/conda-forge/isa-l)
45
56 ISA-L is a collection of optimized low-level functions targeting storage
67 applications. ISA-L includes:
6061
6162 nmake -f Makefile.nmake
6263
64 or see [details on setting up environment here](doc/build.md).
65
6366 ### Other make targets
6467 Other targets include:
6568 * `make check` : create and run tests
0 v2.29 Intel Intelligent Storage Acceleration Library Release Notes
0 v2.30 Intel Intelligent Storage Acceleration Library Release Notes
11 ==================================================================
22
33 RELEASE NOTE CONTENTS
1414
1515 2. FIXED ISSUES
1616 ---------------
17 v2.30
18
19 * Intel CET support.
20 * Windows nasm support fix.
21
1722 v2.28
1823
1924 * Fix documentation on gf_vect_mad(). Min length listed as 32 instead of
108113
109114 3. CHANGE LOG & FEATURES ADDED
110115 ------------------------------
116 v2.30
117
118 * Igzip compression enhancements.
119 - New functions for dictionary acceleration. Split dictionary processing and
120 resetting can greatly accelerate the performance of compressing many small
121 files with a dictionary.
122 - New static level 0 header decode tables. Accelerates decompressing small
123 files that are level 0 compressed by skipping the known header parsing.
124 - New feature for igzip cli tool: support for concatenated .gz files. On
125 decompression, igzip will process a series of independent, concatenated .gz
126 files into one output stream.
127
128 * CRC Improvements
129 - New vclmul version of crc32_iscsi().
130 - Updates for aarch64.
131
111132 v2.29
112133
113134 * CRC Improvements
22
33 AC_PREREQ(2.69)
44 AC_INIT([libisal],
5 [2.29.0],
5 [2.30.0],
66 [sg.support.isal@intel.com],
77 [isa-l],
88 [http://01.org/storage-acceleration-library])
5050
5151 # Check for programs
5252 AC_PROG_CC_STDC
53 AC_PROG_LD
5354 AC_USE_SYSTEM_EXTENSIONS
5455 AM_SILENT_RULES([yes])
5556 LT_INIT
6768
6869 # If this build is for x86, look for yasm and nasm
6970 if test x"$is_x86" = x"yes"; then
71 AC_MSG_CHECKING([whether Intel CET is enabled])
72 AC_TRY_COMPILE([],[
73 #ifndef __CET__
74 # error CET is not enabled
75 #endif],
76 [AC_MSG_RESULT([yes])
77 intel_cet_enabled=yes],
78 [AC_MSG_RESULT([no])
79 intel_cet_enabled=no])
80
81
7082 # Pick an assembler yasm or nasm
7183 if test x"$AS" = x""; then
7284 # Check for yasm and yasm features
248260 AM_CONDITIONAL(DARWIN, test "x" = "y")
249261 fi
250262
263 AM_CONDITIONAL(INTEL_CET_ENABLED, [test x"$intel_cet_enabled" = x"yes"])
251264
252265 # Check for header files
253266 AC_CHECK_HEADERS([limits.h stdint.h stdlib.h string.h])
4949 crc/crc32_ieee_by16_10.asm \
5050 crc/crc32_iscsi_01.asm \
5151 crc/crc32_iscsi_00.asm \
52 crc/crc32_iscsi_by16_10.asm \
5253 crc/crc_multibinary.asm \
5354 crc/crc64_multibinary.asm \
5455 crc/crc64_ecma_refl_by8.asm \
00 ########################################################################
1 # Copyright(c) 2019 Arm Corporation All rights reserved.
1 # Copyright(c) 2020 Arm Corporation All rights reserved.
22 #
33 # Redistribution and use in source and binary forms, with or without
44 # modification, are permitted provided that the following conditions
3333 lsrc_aarch64 += \
3434 crc/aarch64/crc16_t10dif_pmull.S \
3535 crc/aarch64/crc16_t10dif_copy_pmull.S \
36 crc/aarch64/crc32_iscsi_refl_pmull.S \
37 crc/aarch64/crc32_iscsi_refl_hw_fold.S \
3836 crc/aarch64/crc32_ieee_norm_pmull.S \
39 crc/aarch64/crc32_gzip_refl_pmull.S \
40 crc/aarch64/crc32_gzip_refl_hw_fold.S \
4137 crc/aarch64/crc64_ecma_refl_pmull.S \
4238 crc/aarch64/crc64_ecma_norm_pmull.S \
4339 crc/aarch64/crc64_iso_refl_pmull.S \
4440 crc/aarch64/crc64_iso_norm_pmull.S \
4541 crc/aarch64/crc64_jones_refl_pmull.S \
4642 crc/aarch64/crc64_jones_norm_pmull.S
43
44 #CRC32/CRC32C for micro-architecture
45 lsrc_aarch64 += \
46 crc/aarch64/crc32_iscsi_refl_pmull.S \
47 crc/aarch64/crc32_gzip_refl_pmull.S \
48 crc/aarch64/crc32_iscsi_3crc_fold.S \
49 crc/aarch64/crc32_gzip_refl_3crc_fold.S \
50 crc/aarch64/crc32_iscsi_crc_ext.S \
51 crc/aarch64/crc32_gzip_refl_crc_ext.S \
52 crc/aarch64/crc32_mix_default.S \
53 crc/aarch64/crc32c_mix_default.S \
54 crc/aarch64/crc32_mix_neoverse_n1.S \
55 crc/aarch64/crc32c_mix_neoverse_n1.S
56
0 /**********************************************************************
1 Copyright(c) 2020 Arm Corporation All rights reserved.
2
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
5 are met:
6 * Redistributions of source code must retain the above copyright
7 notice, this list of conditions and the following disclaimer.
8 * Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in
10 the documentation and/or other materials provided with the
11 distribution.
12 * Neither the name of Arm Corporation nor the names of its
13 contributors may be used to endorse or promote products derived
14 from this software without specific prior written permission.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 **********************************************************************/
28
29
30
31
32 .macro crc32_hw_common poly_type
33
34 .ifc \poly_type,crc32
35 mvn wCRC,wCRC
36 .endif
37 cbz LEN, .zero_length_ret
38 tbz BUF, 0, .align_short
39 ldrb wdata,[BUF],1
40 sub LEN,LEN,1
41 crc32_u8 wCRC,wCRC,wdata
42 .align_short:
43 tst BUF,2
44 ccmp LEN,1,0,ne
45 bhi .align_short_2
46 tst BUF,4
47 ccmp LEN,3,0,ne
48 bhi .align_word
49
50 .align_finish:
51
52 cmp LEN, 63
53 bls .loop_16B
54 .loop_64B:
55 ldp data0, data1, [BUF],#16
56 prfm pldl2keep,[BUF,2048]
57 sub LEN,LEN,#64
58 ldp data2, data3, [BUF],#16
59 prfm pldl1keep,[BUF,256]
60 cmp LEN,#64
61 crc32_u64 wCRC, wCRC, data0
62 crc32_u64 wCRC, wCRC, data1
63 ldp data0, data1, [BUF],#16
64 crc32_u64 wCRC, wCRC, data2
65 crc32_u64 wCRC, wCRC, data3
66 ldp data2, data3, [BUF],#16
67 crc32_u64 wCRC, wCRC, data0
68 crc32_u64 wCRC, wCRC, data1
69 crc32_u64 wCRC, wCRC, data2
70 crc32_u64 wCRC, wCRC, data3
71 bge .loop_64B
72
73 .loop_16B:
74 cmp LEN, 15
75 bls .less_16B
76 ldp data0, data1, [BUF],#16
77 sub LEN,LEN,#16
78 cmp LEN,15
79 crc32_u64 wCRC, wCRC, data0
80 crc32_u64 wCRC, wCRC, data1
81 bls .less_16B
82 ldp data0, data1, [BUF],#16
83 sub LEN,LEN,#16
84 cmp LEN,15
85 crc32_u64 wCRC, wCRC, data0
86 crc32_u64 wCRC, wCRC, data1
87 bls .less_16B
88 ldp data0, data1, [BUF],#16
89 sub LEN,LEN,#16 //MUST less than 16B
90 crc32_u64 wCRC, wCRC, data0
91 crc32_u64 wCRC, wCRC, data1
92 .less_16B:
93 cmp LEN, 7
94 bls .less_8B
95 ldr data0, [BUF], 8
96 sub LEN, LEN, #8
97 crc32_u64 wCRC, wCRC, data0
98 .less_8B:
99 cmp LEN, 3
100 bls .less_4B
101 ldr wdata, [BUF], 4
102 sub LEN, LEN, #4
103 crc32_u32 wCRC, wCRC, wdata
104 .less_4B:
105 cmp LEN, 1
106 bls .less_2B
107 ldrh wdata, [BUF], 2
108 sub LEN, LEN, #2
109 crc32_u16 wCRC, wCRC, wdata
110 .less_2B:
111 cbz LEN, .zero_length_ret
112 ldrb wdata, [BUF]
113 crc32_u8 wCRC, wCRC, wdata
114 .zero_length_ret:
115 .ifc \poly_type,crc32
116 mvn w0, wCRC
117 .else
118 mov w0, wCRC
119 .endif
120 ret
121 .align_short_2:
122 ldrh wdata, [BUF], 2
123 sub LEN, LEN, 2
124 tst BUF, 4
125 crc32_u16 wCRC, wCRC, wdata
126 ccmp LEN, 3, 0, ne
127 bls .align_finish
128 .align_word:
129 ldr wdata, [BUF], 4
130 sub LEN, LEN, #4
131 crc32_u32 wCRC, wCRC, wdata
132 b .align_finish
133 .endm
134
135 .macro crc32_3crc_fold poly_type
136 .ifc \poly_type,crc32
137 mvn wCRC,wCRC
138 .endif
139 cbz LEN, .zero_length_ret
140 tbz BUF, 0, .align_short
141 ldrb wdata,[BUF],1
142 sub LEN,LEN,1
143 crc32_u8 wCRC,wCRC,wdata
144 .align_short:
145 tst BUF,2
146 ccmp LEN,1,0,ne
147 bhi .align_short_2
148 tst BUF,4
149 ccmp LEN,3,0,ne
150 bhi .align_word
151
152 .align_finish:
153 cmp LEN,1023
154 adr const_adr, .Lconstants
155 bls 1f
156 ldp dconst0,dconst1,[const_adr]
157 2:
158 ldr crc0_data0,[ptr_crc0],8
159 prfm pldl2keep,[ptr_crc0,3*1024-8]
160 mov crc1,0
161 mov crc2,0
162 add ptr_crc1,ptr_crc0,336
163 add ptr_crc2,ptr_crc0,336*2
164 crc32_u64 crc0,crc0,crc0_data0
165 .set offset,0
166 .set ptr_offset,8
167 .rept 5
168 ldp crc0_data0,crc0_data1,[ptr_crc0],16
169 ldp crc1_data0,crc1_data1,[ptr_crc1],16
170 .set offset,offset+64
171 .set ptr_offset,ptr_offset+16
172 prfm pldl2keep,[ptr_crc0,3*1024-ptr_offset+offset]
173 crc32_u64 crc0,crc0,crc0_data0
174 crc32_u64 crc0,crc0,crc0_data1
175 ldp crc2_data0,crc2_data1,[ptr_crc2],16
176 crc32_u64 crc1,crc1,crc1_data0
177 crc32_u64 crc1,crc1,crc1_data1
178 crc32_u64 crc2,crc2,crc2_data0
179 crc32_u64 crc2,crc2,crc2_data1
180 .endr
181 .set l1_offset,0
182 .rept 10
183 ldp crc0_data0,crc0_data1,[ptr_crc0],16
184 ldp crc1_data0,crc1_data1,[ptr_crc1],16
185 .set offset,offset+64
186 .set ptr_offset,ptr_offset+16
187 prfm pldl2keep,[ptr_crc0,3*1024-ptr_offset+offset]
188 prfm pldl1keep,[ptr_crc0,2*1024-ptr_offset+l1_offset]
189 .set l1_offset,l1_offset+64
190 crc32_u64 crc0,crc0,crc0_data0
191 crc32_u64 crc0,crc0,crc0_data1
192 ldp crc2_data0,crc2_data1,[ptr_crc2],16
193 crc32_u64 crc1,crc1,crc1_data0
194 crc32_u64 crc1,crc1,crc1_data1
195 crc32_u64 crc2,crc2,crc2_data0
196 crc32_u64 crc2,crc2,crc2_data1
197 .endr
198
199 .rept 6
200 ldp crc0_data0,crc0_data1,[ptr_crc0],16
201 ldp crc1_data0,crc1_data1,[ptr_crc1],16
202 .set ptr_offset,ptr_offset+16
203 prfm pldl1keep,[ptr_crc0,2*1024-ptr_offset+l1_offset]
204 .set l1_offset,l1_offset+64
205 crc32_u64 crc0,crc0,crc0_data0
206 crc32_u64 crc0,crc0,crc0_data1
207 ldp crc2_data0,crc2_data1,[ptr_crc2],16
208 crc32_u64 crc1,crc1,crc1_data0
209 crc32_u64 crc1,crc1,crc1_data1
210 crc32_u64 crc2,crc2,crc2_data0
211 crc32_u64 crc2,crc2,crc2_data1
212 .endr
213 ldr crc2_data0,[ptr_crc2]
214 fmov dtmp0,xcrc0
215 fmov dtmp1,xcrc1
216 crc32_u64 crc2,crc2,crc2_data0
217 add ptr_crc0,ptr_crc0,1024-(336+8)
218 pmull vtmp0.1q,vtmp0.1d,vconst0.1d
219 sub LEN,LEN,1024
220 pmull vtmp1.1q,vtmp1.1d,vconst1.1d
221 cmp LEN,1024
222 fmov xcrc0,dtmp0
223 fmov xcrc1,dtmp1
224 crc32_u64 crc0,wzr,xcrc0
225 crc32_u64 crc1,wzr,xcrc1
226
227 eor crc0,crc0,crc2
228 eor crc0,crc0,crc1
229
230 bhs 2b
231 1:
232 cmp LEN, 63
233 bls .loop_16B
234 .loop_64B:
235 ldp data0, data1, [BUF],#16
236 sub LEN,LEN,#64
237 ldp data2, data3, [BUF],#16
238 cmp LEN,#64
239 crc32_u64 wCRC, wCRC, data0
240 crc32_u64 wCRC, wCRC, data1
241 ldp data0, data1, [BUF],#16
242 crc32_u64 wCRC, wCRC, data2
243 crc32_u64 wCRC, wCRC, data3
244 ldp data2, data3, [BUF],#16
245 crc32_u64 wCRC, wCRC, data0
246 crc32_u64 wCRC, wCRC, data1
247 crc32_u64 wCRC, wCRC, data2
248 crc32_u64 wCRC, wCRC, data3
249 bge .loop_64B
250
251 .loop_16B:
252 cmp LEN, 15
253 bls .less_16B
254 ldp data0, data1, [BUF],#16
255 sub LEN,LEN,#16
256 cmp LEN,15
257 crc32_u64 wCRC, wCRC, data0
258 crc32_u64 wCRC, wCRC, data1
259 bls .less_16B
260 ldp data0, data1, [BUF],#16
261 sub LEN,LEN,#16
262 cmp LEN,15
263 crc32_u64 wCRC, wCRC, data0
264 crc32_u64 wCRC, wCRC, data1
265 bls .less_16B
266 ldp data0, data1, [BUF],#16
267 sub LEN,LEN,#16 //MUST less than 16B
268 crc32_u64 wCRC, wCRC, data0
269 crc32_u64 wCRC, wCRC, data1
270 .less_16B:
271 cmp LEN, 7
272 bls .less_8B
273 ldr data0, [BUF], 8
274 sub LEN, LEN, #8
275 crc32_u64 wCRC, wCRC, data0
276 .less_8B:
277 cmp LEN, 3
278 bls .less_4B
279 ldr wdata, [BUF], 4
280 sub LEN, LEN, #4
281 crc32_u32 wCRC, wCRC, wdata
282 .less_4B:
283 cmp LEN, 1
284 bls .less_2B
285 ldrh wdata, [BUF], 2
286 sub LEN, LEN, #2
287 crc32_u16 wCRC, wCRC, wdata
288 .less_2B:
289 cbz LEN, .zero_length_ret
290 ldrb wdata, [BUF]
291 crc32_u8 wCRC, wCRC, wdata
292 .zero_length_ret:
293 .ifc \poly_type,crc32
294 mvn w0, wCRC
295 .else
296 mov w0, wCRC
297 .endif
298 ret
299 .align_short_2:
300 ldrh wdata, [BUF], 2
301 sub LEN, LEN, 2
302 tst BUF, 4
303 crc32_u16 wCRC, wCRC, wdata
304 ccmp LEN, 3, 0, ne
305 bls .align_finish
306 .align_word:
307 ldr wdata, [BUF], 4
308 sub LEN, LEN, #4
309 crc32_u32 wCRC, wCRC, wdata
310 b .align_finish
311 .Lconstants:
312 .ifc \poly_type,crc32
313 .quad 0xb486819b
314 .quad 0x76278617
315 .else
316 .quad 0xe417f38a
317 .quad 0x8f158014
318 .endif
319
320 .endm
0 /**********************************************************************
1 Copyright(c) 2020 Arm Corporation All rights reserved.
2
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
5 are met:
6 * Redistributions of source code must retain the above copyright
7 notice, this list of conditions and the following disclaimer.
8 * Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in
10 the documentation and/or other materials provided with the
11 distribution.
12 * Neither the name of Arm Corporation nor the names of its
13 contributors may be used to endorse or promote products derived
14 from this software without specific prior written permission.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 **********************************************************************/
28
29
30
31
32 .macro crc32_hw_common poly_type
33 cbz LEN, .zero_length_ret
34 .ifc \poly_type,crc32
35 mvn wCRC,wCRC
36 .endif
37 tbz BUF, 0, .align_short
38 ldrb wdata,[BUF],1
39 sub LEN,LEN,1
40 crc32_u8 wCRC,wCRC,wdata
41 .align_short:
42 tst BUF,2
43 ccmp LEN,1,0,ne
44 bhi .align_short_2
45 tst BUF,4
46 ccmp LEN,3,0,ne
47 bhi .align_word
48
49 .align_finish:
50
51 cmp LEN, 63
52 bls .loop_16B
53 .loop_64B:
54 ldp data0, data1, [BUF],#16
55 sub LEN,LEN,#64
56 ldp data2, data3, [BUF],#16
57 cmp LEN,#64
58 crc32_u64 wCRC, wCRC, data0
59 crc32_u64 wCRC, wCRC, data1
60 ldp data0, data1, [BUF],#16
61 crc32_u64 wCRC, wCRC, data2
62 crc32_u64 wCRC, wCRC, data3
63 ldp data2, data3, [BUF],#16
64 crc32_u64 wCRC, wCRC, data0
65 crc32_u64 wCRC, wCRC, data1
66 crc32_u64 wCRC, wCRC, data2
67 crc32_u64 wCRC, wCRC, data3
68 bge .loop_64B
69
70 .loop_16B:
71 cmp LEN, 15
72 bls .less_16B
73 ldp data0, data1, [BUF],#16
74 sub LEN,LEN,#16
75 cmp LEN,15
76 crc32_u64 wCRC, wCRC, data0
77 crc32_u64 wCRC, wCRC, data1
78 bls .less_16B
79 ldp data0, data1, [BUF],#16
80 sub LEN,LEN,#16
81 cmp LEN,15
82 crc32_u64 wCRC, wCRC, data0
83 crc32_u64 wCRC, wCRC, data1
84 bls .less_16B
85 ldp data0, data1, [BUF],#16
86 sub LEN,LEN,#16 //MUST less than 16B
87 crc32_u64 wCRC, wCRC, data0
88 crc32_u64 wCRC, wCRC, data1
89 .less_16B:
90 cmp LEN, 7
91 bls .less_8B
92 ldr data0, [BUF], 8
93 sub LEN, LEN, #8
94 crc32_u64 wCRC, wCRC, data0
95 .less_8B:
96 cmp LEN, 3
97 bls .less_4B
98 ldr wdata, [BUF], 4
99 sub LEN, LEN, #4
100 crc32_u32 wCRC, wCRC, wdata
101 .less_4B:
102 cmp LEN, 1
103 bls .less_2B
104 ldrh wdata, [BUF], 2
105 sub LEN, LEN, #2
106 crc32_u16 wCRC, wCRC, wdata
107 .less_2B:
108 cbz LEN, .finish_exit
109 ldrb wdata, [BUF]
110 crc32_u8 wCRC, wCRC, wdata
111 .finish_exit:
112 .ifc \poly_type,crc32
113 mvn w0, wCRC
114 .else
115 mov w0, wCRC
116 .endif
117 ret
118 .zero_length_ret:
119 mov w0, wCRC
120 ret
121 .align_short_2:
122 ldrh wdata, [BUF], 2
123 sub LEN, LEN, 2
124 tst BUF, 4
125 crc32_u16 wCRC, wCRC, wdata
126 ccmp LEN, 3, 0, ne
127 bls .align_finish
128 .align_word:
129 ldr wdata, [BUF], 4
130 sub LEN, LEN, #4
131 crc32_u32 wCRC, wCRC, wdata
132 b .align_finish
133
134 .endm
0 /**********************************************************************
1 Copyright(c) 2020 Arm Corporation All rights reserved.
2
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
5 are met:
6 * Redistributions of source code must retain the above copyright
7 notice, this list of conditions and the following disclaimer.
8 * Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in
10 the documentation and/or other materials provided with the
11 distribution.
12 * Neither the name of Arm Corporation nor the names of its
13 contributors may be used to endorse or promote products derived
14 from this software without specific prior written permission.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 **********************************************************************/
28
29
30 .macro declare_var_vector_reg name:req,reg:req
31 \name\()_q .req q\reg
32 \name\()_v .req v\reg
33 \name\()_s .req s\reg
34 \name\()_d .req d\reg
35 .endm
36 declare_var_vector_reg k1k2,20
37 declare_var_vector_reg k3k4,21
38 declare_var_vector_reg poly,22
39 declare_var_vector_reg k5k0,23
40 declare_var_vector_reg mask,24
41 declare_var_vector_reg fold_poly,25
42
43 declare_var_vector_reg tmp0,0
44 declare_var_vector_reg tmp1,1
45 declare_var_vector_reg tmp2,2
46 declare_var_vector_reg tmp3,3
47 declare_var_vector_reg tmp4,4
48 declare_var_vector_reg tmp5,5
49 declare_var_vector_reg tmp6,6
50 declare_var_vector_reg tmp7,7
51 declare_var_vector_reg pmull_data0,16
52 declare_var_vector_reg pmull_data1,17
53 declare_var_vector_reg pmull_data2,18
54 declare_var_vector_reg pmull_data3,19
55
56 vzr .req v26
57
58 const_addr .req x3
59 crc_blk_ptr .req x4
60 pmull_blk_ptr .req x5
61 crc_data0 .req x6
62 crc_data1 .req x7
63 crc_data2 .req x9
64 crc_data3 .req x10
65 wPmull .req w11
66 xPmull .req x11
67
68 data0 .req x4
69 data1 .req x5
70 data2 .req x6
71 data3 .req x7
72 wdata .req w4
73
74 .macro pmull_fold
75
76 pmull2 tmp4_v.1q, tmp0_v.2d, k1k2_v.2d
77 pmull2 tmp5_v.1q, tmp1_v.2d, k1k2_v.2d
78 pmull2 tmp6_v.1q, tmp2_v.2d, k1k2_v.2d
79 pmull2 tmp7_v.1q, tmp3_v.2d, k1k2_v.2d
80
81 pmull tmp0_v.1q, tmp0_v.1d, k1k2_v.1d
82 pmull tmp1_v.1q, tmp1_v.1d, k1k2_v.1d
83 pmull tmp2_v.1q, tmp2_v.1d, k1k2_v.1d
84 pmull tmp3_v.1q, tmp3_v.1d, k1k2_v.1d
85 ld1 {pmull_data0_v.16b-pmull_data3_v.16b},[pmull_blk_ptr],#64
86 crc32_u64 wCRC,wCRC,crc_data0
87 crc32_u64 wCRC,wCRC,crc_data1
88 ldp crc_data0,crc_data1,[crc_blk_ptr],16
89 crc32_u64 wCRC,wCRC,crc_data2
90 crc32_u64 wCRC,wCRC,crc_data3
91 ldp crc_data2,crc_data3,[crc_blk_ptr],16
92
93 eor tmp0_v.16b, tmp0_v.16b, tmp4_v.16b
94 eor tmp1_v.16b, tmp1_v.16b, tmp5_v.16b
95 eor tmp2_v.16b, tmp2_v.16b, tmp6_v.16b
96 eor tmp3_v.16b, tmp3_v.16b, tmp7_v.16b
97
98 crc32_u64 wCRC,wCRC,crc_data0
99 crc32_u64 wCRC,wCRC,crc_data1
100 ldp crc_data0,crc_data1,[crc_blk_ptr],16
101 crc32_u64 wCRC,wCRC,crc_data2
102 crc32_u64 wCRC,wCRC,crc_data3
103 ldp crc_data2,crc_data3,[crc_blk_ptr],16
104 eor tmp0_v.16b, tmp0_v.16b, v16.16b
105 eor tmp1_v.16b, tmp1_v.16b, v17.16b
106 eor tmp2_v.16b, tmp2_v.16b, v18.16b
107 eor tmp3_v.16b, tmp3_v.16b, v19.16b
108 crc32_u64 wCRC,wCRC,crc_data0
109 crc32_u64 wCRC,wCRC,crc_data1
110 ldp crc_data0,crc_data1,[crc_blk_ptr],16
111 crc32_u64 wCRC,wCRC,crc_data2
112 crc32_u64 wCRC,wCRC,crc_data3
113 ldp crc_data2,crc_data3,[crc_blk_ptr],16
114 .endm
115
116
117
118 .macro crc32_common_mix poly_type
119 .set MIX_BLK_SIZE,2048
120
121 .ifc \poly_type,crc32
122 mvn wCRC,wCRC
123 .endif
124 cmp LEN,MIX_BLK_SIZE-1
125 adr const_addr, .Lconstants
126 bls start_final
127 ld1 {k1k2_v.16b,k3k4_v.16b,poly_v.16b},[const_addr],#48
128 movi vzr.16b, #0
129 ld1 {k5k0_v.8b,mask_v.8b,fold_poly_v.8b},[const_addr]
130
131 loop_2048:
132 ld1 {tmp0_v.16b-tmp3_v.16b}, [BUF]
133 add pmull_blk_ptr,BUF,0x40
134 add crc_blk_ptr, BUF,512
135 mov tmp4_v.16b,vzr.16b
136 fmov tmp4_s, wCRC
137 ldp crc_data0,crc_data1,[crc_blk_ptr],16
138 eor tmp0_v.16b,tmp0_v.16b,tmp4_v.16b
139 mov wCRC, 0
140 sub LEN,LEN,MIX_BLK_SIZE
141 cmp LEN,MIX_BLK_SIZE
142 ldp crc_data2,crc_data3,[crc_blk_ptr],16
143 crc32_u64 wCRC,wCRC,crc_data0
144 crc32_u64 wCRC,wCRC,crc_data1
145 ldp crc_data0,crc_data1,[crc_blk_ptr],16
146 crc32_u64 wCRC,wCRC,crc_data2
147 crc32_u64 wCRC,wCRC,crc_data3
148 ldp crc_data2,crc_data3,[crc_blk_ptr],16
149
150 pmull_fold
151 pmull_fold
152 pmull_fold
153 pmull_fold
154 pmull_fold
155 pmull_fold
156 pmull_fold
157
158 /* Folding cache line into 128bit */
159 pmull2 tmp4_v.1q, tmp0_v.2d, k3k4_v.2d
160 crc32_u64 wCRC,wCRC,crc_data0
161 crc32_u64 wCRC,wCRC,crc_data1
162 ldp crc_data0,crc_data1,[crc_blk_ptr],16
163 crc32_u64 wCRC,wCRC,crc_data2
164 crc32_u64 wCRC,wCRC,crc_data3
165 ldp crc_data2,crc_data3,[crc_blk_ptr],16
166 crc32_u64 wCRC,wCRC,crc_data0
167 crc32_u64 wCRC,wCRC,crc_data1
168 ldp crc_data0,crc_data1,[crc_blk_ptr],16
169 pmull tmp0_v.1q, tmp0_v.1d, k3k4_v.1d
170 crc32_u64 wCRC,wCRC,crc_data2
171 crc32_u64 wCRC,wCRC,crc_data3
172 ldp crc_data2,crc_data3,[crc_blk_ptr],16
173 crc32_u64 wCRC,wCRC,crc_data0
174 crc32_u64 wCRC,wCRC,crc_data1
175 ldp crc_data0,crc_data1,[crc_blk_ptr],16
176 crc32_u64 wCRC,wCRC,crc_data2
177 crc32_u64 wCRC,wCRC,crc_data3
178 ldp crc_data2,crc_data3,[crc_blk_ptr],16
179 eor tmp0_v.16b, tmp0_v.16b, tmp4_v.16b
180 crc32_u64 wCRC,wCRC,crc_data0
181 crc32_u64 wCRC,wCRC,crc_data1
182 ldp crc_data0,crc_data1,[crc_blk_ptr],16
183 eor tmp0_v.16b, tmp0_v.16b, tmp1_v.16b
184 crc32_u64 wCRC,wCRC,crc_data2
185 crc32_u64 wCRC,wCRC,crc_data3
186 ldp crc_data2,crc_data3,[crc_blk_ptr],16
187 crc32_u64 wCRC,wCRC,crc_data0
188 crc32_u64 wCRC,wCRC,crc_data1
189 ldp crc_data0,crc_data1,[crc_blk_ptr],16
190 pmull2 tmp4_v.1q, tmp0_v.2d, k3k4_v.2d
191 crc32_u64 wCRC,wCRC,crc_data2
192 crc32_u64 wCRC,wCRC,crc_data3
193 ldp crc_data2,crc_data3,[crc_blk_ptr],16
194 pmull tmp0_v.1q, tmp0_v.1d, k3k4_v.1d
195 crc32_u64 wCRC,wCRC,crc_data0
196 crc32_u64 wCRC,wCRC,crc_data1
197 ldp crc_data0,crc_data1,[crc_blk_ptr],16
198 eor tmp0_v.16b, tmp0_v.16b, tmp4_v.16b
199 crc32_u64 wCRC,wCRC,crc_data2
200 crc32_u64 wCRC,wCRC,crc_data3
201 ldp crc_data2,crc_data3,[crc_blk_ptr],16
202 crc32_u64 wCRC,wCRC,crc_data0
203 crc32_u64 wCRC,wCRC,crc_data1
204 ldp crc_data0,crc_data1,[crc_blk_ptr],16
205 crc32_u64 wCRC,wCRC,crc_data2
206 crc32_u64 wCRC,wCRC,crc_data3
207 ldp crc_data2,crc_data3,[crc_blk_ptr],16
208 eor tmp0_v.16b, tmp0_v.16b, tmp2_v.16b
209 crc32_u64 wCRC,wCRC,crc_data0
210 crc32_u64 wCRC,wCRC,crc_data1
211 ldp crc_data0,crc_data1,[crc_blk_ptr],16
212 pmull2 tmp4_v.1q, tmp0_v.2d, k3k4_v.2d
213 crc32_u64 wCRC,wCRC,crc_data2
214 crc32_u64 wCRC,wCRC,crc_data3
215 ldp crc_data2,crc_data3,[crc_blk_ptr],16
216 crc32_u64 wCRC,wCRC,crc_data0
217 crc32_u64 wCRC,wCRC,crc_data1
218 ldp crc_data0,crc_data1,[crc_blk_ptr],16
219 crc32_u64 wCRC,wCRC,crc_data2
220 crc32_u64 wCRC,wCRC,crc_data3
221 ldp crc_data2,crc_data3,[crc_blk_ptr],16
222 pmull tmp0_v.1q, tmp0_v.1d, k3k4_v.1d
223 crc32_u64 wCRC,wCRC,crc_data0
224 crc32_u64 wCRC,wCRC,crc_data1
225 ldp crc_data0,crc_data1,[crc_blk_ptr],16
226 crc32_u64 wCRC,wCRC,crc_data2
227 crc32_u64 wCRC,wCRC,crc_data3
228 ldp crc_data2,crc_data3,[crc_blk_ptr],16
229 eor tmp0_v.16b, tmp0_v.16b, tmp4_v.16b
230 crc32_u64 wCRC,wCRC,crc_data0
231 crc32_u64 wCRC,wCRC,crc_data1
232 ldp crc_data0,crc_data1,[crc_blk_ptr],16
233 crc32_u64 wCRC,wCRC,crc_data2
234 crc32_u64 wCRC,wCRC,crc_data3
235 ldp crc_data2,crc_data3,[crc_blk_ptr],16
236 crc32_u64 wCRC,wCRC,crc_data0
237 crc32_u64 wCRC,wCRC,crc_data1
238 ldp crc_data0,crc_data1,[crc_blk_ptr],16
239 eor tmp0_v.16b, tmp0_v.16b, tmp3_v.16b
240 crc32_u64 wCRC,wCRC,crc_data2
241 crc32_u64 wCRC,wCRC,crc_data3
242 ldp crc_data2,crc_data3,[crc_blk_ptr],16
243 crc32_u64 wCRC,wCRC,crc_data0
244 crc32_u64 wCRC,wCRC,crc_data1
245 ldp crc_data0,crc_data1,[crc_blk_ptr],16
246
247
248 /**
249 * perform the last 64 bit fold, also
250 * adds 32 zeroes to the input stream
251 */
252 ext tmp1_v.16b, tmp0_v.16b, tmp0_v.16b, #8
253 crc32_u64 wCRC,wCRC,crc_data2
254 crc32_u64 wCRC,wCRC,crc_data3
255 ldp crc_data2,crc_data3,[crc_blk_ptr],16
256 crc32_u64 wCRC,wCRC,crc_data0
257 crc32_u64 wCRC,wCRC,crc_data1
258 ldp crc_data0,crc_data1,[crc_blk_ptr],16
259 pmull2 tmp1_v.1q, tmp1_v.2d, k3k4_v.2d
260 crc32_u64 wCRC,wCRC,crc_data2
261 crc32_u64 wCRC,wCRC,crc_data3
262 ldp crc_data2,crc_data3,[crc_blk_ptr],16
263 crc32_u64 wCRC,wCRC,crc_data0
264 crc32_u64 wCRC,wCRC,crc_data1
265 ldp crc_data0,crc_data1,[crc_blk_ptr],16
266 crc32_u64 wCRC,wCRC,crc_data2
267 crc32_u64 wCRC,wCRC,crc_data3
268 ldp crc_data2,crc_data3,[crc_blk_ptr],16
269 ext tmp0_v.16b, tmp0_v.16b, vzr.16b, #8
270 crc32_u64 wCRC,wCRC,crc_data0
271 crc32_u64 wCRC,wCRC,crc_data1
272 ldp crc_data0,crc_data1,[crc_blk_ptr],16
273 crc32_u64 wCRC,wCRC,crc_data2
274 crc32_u64 wCRC,wCRC,crc_data3
275 ldp crc_data2,crc_data3,[crc_blk_ptr],16
276 eor tmp0_v.16b, tmp0_v.16b, tmp1_v.16b
277 crc32_u64 wCRC,wCRC,crc_data0
278 crc32_u64 wCRC,wCRC,crc_data1
279 ldp crc_data0,crc_data1,[crc_blk_ptr],16
280 crc32_u64 wCRC,wCRC,crc_data2
281 crc32_u64 wCRC,wCRC,crc_data3
282 ldp crc_data2,crc_data3,[crc_blk_ptr],16
283
284 /* final 32-bit fold */
285 ext tmp1_v.16b, tmp0_v.16b, vzr.16b, #4
286 and tmp0_v.16b, tmp0_v.16b, mask_v.16b
287 crc32_u64 wCRC,wCRC,crc_data0
288 crc32_u64 wCRC,wCRC,crc_data1
289 ldp crc_data0,crc_data1,[crc_blk_ptr],16
290 pmull tmp0_v.1q, tmp0_v.1d, k5k0_v.1d
291 crc32_u64 wCRC,wCRC,crc_data2
292 crc32_u64 wCRC,wCRC,crc_data3
293 ldp crc_data2,crc_data3,[crc_blk_ptr],16
294 crc32_u64 wCRC,wCRC,crc_data0
295 crc32_u64 wCRC,wCRC,crc_data1
296 ldp crc_data0,crc_data1,[crc_blk_ptr],16
297 crc32_u64 wCRC,wCRC,crc_data2
298 crc32_u64 wCRC,wCRC,crc_data3
299 ldp crc_data2,crc_data3,[crc_blk_ptr],16
300 eor tmp0_v.16b, tmp0_v.16b, tmp1_v.16b
301
302 /**
303 * Finish up with the bit-reversed barrett
304 * reduction 64 ==> 32 bits
305 */
306 crc32_u64 wCRC,wCRC,crc_data0
307 crc32_u64 wCRC,wCRC,crc_data1
308 and tmp1_v.16b, tmp0_v.16b, mask_v.16b
309 ldp crc_data0,crc_data1,[crc_blk_ptr],16
310 ext tmp1_v.16b, vzr.16b, tmp1_v.16b, #8
311 crc32_u64 wCRC,wCRC,crc_data2
312 crc32_u64 wCRC,wCRC,crc_data3
313 pmull2 tmp1_v.1q, tmp1_v.2d, poly_v.2d
314 ldp crc_data2,crc_data3,[crc_blk_ptr],16
315 crc32_u64 wCRC,wCRC,crc_data0
316 crc32_u64 wCRC,wCRC,crc_data1
317 ldp crc_data0,crc_data1,[crc_blk_ptr],16
318 crc32_u64 wCRC,wCRC,crc_data2
319 crc32_u64 wCRC,wCRC,crc_data3
320 and tmp1_v.16b, tmp1_v.16b, mask_v.16b
321 ldp crc_data2,crc_data3,[crc_blk_ptr],16
322 pmull tmp1_v.1q, tmp1_v.1d, poly_v.1d
323 crc32_u64 wCRC,wCRC,crc_data0
324 crc32_u64 wCRC,wCRC,crc_data1
325 ldp crc_data0,crc_data1,[crc_blk_ptr],16
326 eor tmp0_v.16b, tmp0_v.16b, tmp1_v.16b
327 crc32_u64 wCRC,wCRC,crc_data2
328 crc32_u64 wCRC,wCRC,crc_data3
329 mov tmp4_v.16b,vzr.16b
330 mov tmp4_v.s[0], tmp0_v.s[1]
331 ldp crc_data2,crc_data3,[crc_blk_ptr],16
332 crc32_u64 wCRC,wCRC,crc_data0
333 crc32_u64 wCRC,wCRC,crc_data1
334 ldp crc_data0,crc_data1,[crc_blk_ptr],16
335 crc32_u64 wCRC,wCRC,crc_data2
336 crc32_u64 wCRC,wCRC,crc_data3
337 ldp crc_data2,crc_data3,[crc_blk_ptr],16
338 crc32_u64 wCRC,wCRC,crc_data0
339 crc32_u64 wCRC,wCRC,crc_data1
340 ldp crc_data0,crc_data1,[crc_blk_ptr],16
341 crc32_u64 wCRC,wCRC,crc_data2
342 crc32_u64 wCRC,wCRC,crc_data3
343 ldp crc_data2,crc_data3,[crc_blk_ptr],16
344 crc32_u64 wCRC,wCRC,crc_data0
345 crc32_u64 wCRC,wCRC,crc_data1
346 ldp crc_data0,crc_data1,[crc_blk_ptr],16
347 crc32_u64 wCRC,wCRC,crc_data2
348 crc32_u64 wCRC,wCRC,crc_data3
349 ldp crc_data2,crc_data3,[crc_blk_ptr],16
350
351 crc32_u64 wCRC,wCRC,crc_data0
352 crc32_u64 wCRC,wCRC,crc_data1
353 crc32_u64 wCRC,wCRC,crc_data2
354 crc32_u64 wCRC,wCRC,crc_data3
355
356 pmull tmp4_v.1q, tmp4_v.1d, fold_poly_v.1d
357 add BUF,BUF,MIX_BLK_SIZE
358 fmov xPmull, tmp4_d
359 crc32_u64 wPmull, wzr, xPmull
360 eor wCRC, wPmull, wCRC
361 bge loop_2048
362 start_final:
363 cmp LEN, 63
364 bls .loop_16B
365 .loop_64B:
366 ldp data0, data1, [BUF],#16
367 sub LEN,LEN,#64
368 ldp data2, data3, [BUF],#16
369 cmp LEN,#64
370 crc32_u64 wCRC, wCRC, data0
371 crc32_u64 wCRC, wCRC, data1
372 ldp data0, data1, [BUF],#16
373 crc32_u64 wCRC, wCRC, data2
374 crc32_u64 wCRC, wCRC, data3
375 ldp data2, data3, [BUF],#16
376 crc32_u64 wCRC, wCRC, data0
377 crc32_u64 wCRC, wCRC, data1
378 crc32_u64 wCRC, wCRC, data2
379 crc32_u64 wCRC, wCRC, data3
380 bge .loop_64B
381
382 .loop_16B:
383 cmp LEN, 15
384 bls .less_16B
385 ldp data0, data1, [BUF],#16
386 sub LEN,LEN,#16
387 cmp LEN,15
388 crc32_u64 wCRC, wCRC, data0
389 crc32_u64 wCRC, wCRC, data1
390 bls .less_16B
391 ldp data0, data1, [BUF],#16
392 sub LEN,LEN,#16
393 cmp LEN,15
394 crc32_u64 wCRC, wCRC, data0
395 crc32_u64 wCRC, wCRC, data1
396 bls .less_16B
397 ldp data0, data1, [BUF],#16
398 sub LEN,LEN,#16 //MUST less than 16B
399 crc32_u64 wCRC, wCRC, data0
400 crc32_u64 wCRC, wCRC, data1
401 .less_16B:
402 cmp LEN, 7
403 bls .less_8B
404 ldr data0, [BUF], 8
405 sub LEN, LEN, #8
406 crc32_u64 wCRC, wCRC, data0
407 .less_8B:
408 cmp LEN, 3
409 bls .less_4B
410 ldr wdata, [BUF], 4
411 sub LEN, LEN, #4
412 crc32_u32 wCRC, wCRC, wdata
413 .less_4B:
414 cmp LEN, 1
415 bls .less_2B
416 ldrh wdata, [BUF], 2
417 sub LEN, LEN, #2
418 crc32_u16 wCRC, wCRC, wdata
419 .less_2B:
420 cbz LEN, .finish_exit
421 ldrb wdata, [BUF]
422 crc32_u8 wCRC, wCRC, wdata
423 .finish_exit:
424 .ifc \poly_type,crc32
425 mvn w0, wCRC
426 .else
427 mov w0, wCRC
428 .endif
429 ret
430 .endm
431
0 ########################################################################
1 # Copyright(c) 2020 Arm Corporation All rights reserved.
2 #
3 # Redistribution and use in source and binary forms, with or without
4 # modification, are permitted provided that the following conditions
5 # are met:
6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above copyright
9 # notice, this list of conditions and the following disclaimer in
10 # the documentation and/or other materials provided with the
11 # distribution.
12 # * Neither the name of Arm Corporation nor the names of its
13 # contributors may be used to endorse or promote products derived
14 # from this software without specific prior written permission.
15 #
16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 #########################################################################
28 #include "crc32_aarch64_common.h"
29 .text
30 .align 6
31 .arch armv8-a+crc+crypto
32 .macro crc32_u64 dst,src,data
33 crc32x \dst,\src,\data
34 .endm
35 .macro crc32_u32 dst,src,data
36 crc32w \dst,\src,\data
37 .endm
38 .macro crc32_u16 dst,src,data
39 crc32h \dst,\src,\data
40 .endm
41 .macro crc32_u8 dst,src,data
42 crc32b \dst,\src,\data
43 .endm
44 .macro declare_var_vector_reg name:req,reg:req
45 q\name .req q\reg
46 v\name .req v\reg
47 s\name .req s\reg
48 d\name .req d\reg
49 .endm
50
51 BUF .req x1
52 ptr_crc0 .req x1
53 LEN .req x2
54 wCRC .req w0
55 crc0 .req w0
56 xcrc0 .req x0
57
58 crc1 .req w3
59 crc2 .req w4
60 xcrc1 .req x3
61 const_adr .req x3
62 ptr_crc1 .req x6
63 ptr_crc2 .req x7
64 crc0_data0 .req x9
65 crc0_data1 .req x10
66 crc1_data0 .req x11
67 crc1_data1 .req x12
68 crc2_data0 .req x13
69 crc2_data1 .req x14
70
71 wdata .req w3
72 data0 .req x3
73 data1 .req x4
74 data2 .req x5
75 data3 .req x6
76
77 declare_var_vector_reg tmp0,0
78 declare_var_vector_reg tmp1,1
79 declare_var_vector_reg const0,2
80 declare_var_vector_reg const1,3
81
82 /**
83 uint32_t crc32_gzip_refl(
84 uint32_t wCRC,
85 const unsigned char *BUF,
86 uint64_t LEN
87 );
88 */
89
90 .global crc32_gzip_refl_3crc_fold
91 .type crc32_gzip_refl_3crc_fold, %function
92 crc32_gzip_refl_3crc_fold:
93 crc32_3crc_fold crc32
94 .size crc32_gzip_refl_3crc_fold, .-crc32_gzip_refl_3crc_fold
0 /**********************************************************************
1 Copyright(c) 2020 Arm Corporation All rights reserved.
2
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
5 are met:
6 * Redistributions of source code must retain the above copyright
7 notice, this list of conditions and the following disclaimer.
8 * Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in
10 the documentation and/or other materials provided with the
11 distribution.
12 * Neither the name of Arm Corporation nor the names of its
13 contributors may be used to endorse or promote products derived
14 from this software without specific prior written permission.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 **********************************************************************/
28
29 .text
30 .align 6
31 .arch armv8-a+crc
32
33
34 #include "crc32_aarch64_common.h"
35
36 BUF .req x1
37 LEN .req x2
38 wCRC .req w0
39 data0 .req x4
40 data1 .req x5
41 data2 .req x6
42 data3 .req x7
43 wdata .req w3
44 .macro crc32_u64 dst,src,data
45 crc32x \dst,\src,\data
46 .endm
47 .macro crc32_u32 dst,src,data
48 crc32w \dst,\src,\data
49 .endm
50 .macro crc32_u16 dst,src,data
51 crc32h \dst,\src,\data
52 .endm
53 .macro crc32_u8 dst,src,data
54 crc32b \dst,\src,\data
55 .endm
56
57 /**
58 * uint32_t crc32_gzip_refl_crc_ext(const unsigned char *BUF,
59 * uint64_t LEN,uint32_t wCRC);
60 */
61 .global crc32_gzip_refl_crc_ext
62 .type crc32_gzip_refl_crc_ext, %function
63 crc32_gzip_refl_crc_ext:
64 crc32_hw_common crc32
65 .size crc32_gzip_refl_crc_ext, .-crc32_gzip_refl_crc_ext
+0
-176
crc/aarch64/crc32_gzip_refl_hw_fold.S less more
0 ########################################################################
1 # Copyright(c) 2019 Arm Corporation All rights reserved.
2 #
3 # Redistribution and use in source and binary forms, with or without
4 # modification, are permitted provided that the following conditions
5 # are met:
6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above copyright
9 # notice, this list of conditions and the following disclaimer in
10 # the documentation and/or other materials provided with the
11 # distribution.
12 # * Neither the name of Arm Corporation nor the names of its
13 # contributors may be used to endorse or promote products derived
14 # from this software without specific prior written permission.
15 #
16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 #########################################################################
28
29 .arch armv8-a+crc+crypto
30 .text
31 .align 3
32 .global crc32_gzip_refl_hw_fold
33 .type crc32_gzip_refl_hw_fold, %function
34
35 /* uint32_t crc32_gzip_refl_hw_fold(uint32_t seed, const unsigned char *buf, uint64_t len) */
36
37 w_seed .req w0
38 w_crc .req w0
39 x_buf .req x1
40 x_len .req x2
41
42 x_buf_loop_end .req x10
43 x_buf_iter .req x10
44
45 x_tmp .req x15
46 w_tmp .req w15
47
48 d_c0 .req d3
49 d_c1 .req d1
50 v_c0 .req v3
51 v_c1 .req v1
52 crc32_gzip_refl_hw_fold:
53 mvn w_seed, w_seed
54 cmp x_len, 1023
55 mov x_buf_iter, x_buf
56 bls .loop_fold_end
57
58 sub x_buf_loop_end, x_len, #1024
59 and x_buf_loop_end, x_buf_loop_end, -1024
60 add x_buf_loop_end, x_buf_loop_end, 1024
61 add x_buf_loop_end, x_buf, x_buf_loop_end
62
63 mov x_tmp, 0x819b
64 movk x_tmp, 0xb486, lsl 16
65 fmov d_c0, x_tmp
66
67 mov x_tmp, 0x8617
68 movk x_tmp, 0x7627, lsl 16
69 fmov d_c1, x_tmp
70
71 x_in64 .req x3
72 w_crc0 .req w0
73 w_crc1 .req w4
74 w_crc2 .req w5
75
76 d_crc0 .req d4
77 d_crc1 .req d5
78 v_crc0 .req v4
79 v_crc1 .req v5
80 .align 3
81 .loop_fold:
82 add x9, x_buf, 336
83 mov x_in64, x_buf
84 mov w_crc1, 0
85 mov w_crc2, 0
86
87 .align 3
88 .loop_for:
89 ldr x8, [x_in64]
90 ldr x7, [x_in64, 336]
91 ldr x6, [x_in64, 672]
92
93 add x_in64, x_in64, 8
94 cmp x_in64, x9
95
96 crc32x w_crc0, w_crc0, x8
97 crc32x w_crc1, w_crc1, x7
98 crc32x w_crc2, w_crc2, x6
99 bne .loop_for
100
101 uxtw x_tmp, w_crc0
102 fmov d_crc0, x_tmp
103 pmull v_crc0.1q, v_crc0.1d, v_c0.1d
104
105 uxtw x_tmp, w_crc1
106 fmov d_crc1, x_tmp
107 pmull v_crc1.1q, v_crc1.1d, v_c1.1d
108
109 ldr x_tmp, [x_buf, 1008]
110 crc32x w_crc2, w_crc2, x_tmp
111
112 fmov x_tmp, d_crc0
113 crc32x w_crc0, wzr, x_tmp
114
115 fmov x_tmp, d_crc1
116 crc32x w_crc1, wzr, x_tmp
117
118 eor w_crc0, w_crc0, w_crc1
119 eor w_crc0, w_crc0, w_crc2
120
121 ldr x_tmp, [x_buf, 1016]
122 crc32x w_crc0, w_crc0, x_tmp
123
124 add x_buf, x_buf, 1024
125 cmp x_buf_loop_end, x_buf
126 bne .loop_fold
127
128 and x_len, x_len, 1023
129
130 x_buf_loop_size8_end .req x3
131 .loop_fold_end:
132 cmp x_len, 7
133 bls .size_4
134
135 sub x_buf_loop_size8_end, x_len, #8
136 and x_buf_loop_size8_end, x_buf_loop_size8_end, -8
137 add x_buf_loop_size8_end, x_buf_loop_size8_end, 8
138 add x_buf_loop_size8_end, x_buf_iter, x_buf_loop_size8_end
139
140 .align 3
141 .loop_size_8:
142 ldr x_tmp, [x_buf_iter], 8
143 crc32x w_crc, w_crc, x_tmp
144
145 cmp x_buf_iter, x_buf_loop_size8_end
146 bne .loop_size_8
147
148 and x_len, x_len, 7
149 .size_4:
150 cmp x_len, 3
151 bls .size_2
152
153 ldr w_tmp, [x_buf_iter], 4
154 crc32w w_crc, w_crc, w_tmp
155
156 sub x_len, x_len, #4
157 .size_2:
158 cmp x_len, 1
159 bls .size_1
160
161 ldrh w_tmp, [x_buf_iter], 2
162 crc32h w_crc, w_crc, w_tmp
163
164 sub x_len, x_len, #2
165 .size_1:
166 cbz x_len, .done
167
168 ldrb w_tmp, [x_buf_iter]
169 crc32b w_crc, w_crc, w_tmp
170
171 .done:
172 mvn w_crc, w_crc
173 ret
174
175 .size crc32_gzip_refl_hw_fold, .-crc32_gzip_refl_hw_fold
0 ########################################################################
1 # Copyright(c) 2020 Arm Corporation All rights reserved.
2 #
3 # Redistribution and use in source and binary forms, with or without
4 # modification, are permitted provided that the following conditions
5 # are met:
6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above copyright
9 # notice, this list of conditions and the following disclaimer in
10 # the documentation and/or other materials provided with the
11 # distribution.
12 # * Neither the name of Arm Corporation nor the names of its
13 # contributors may be used to endorse or promote products derived
14 # from this software without specific prior written permission.
15 #
16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 #########################################################################
28
29
30 .text
31 .align 6
32 .arch armv8-a+crc+crypto
33 #include "crc32_aarch64_common.h"
34 .macro crc32_u64 dst,src,data
35 crc32cx \dst,\src,\data
36 .endm
37 .macro crc32_u32 dst,src,data
38 crc32cw \dst,\src,\data
39 .endm
40 .macro crc32_u16 dst,src,data
41 crc32ch \dst,\src,\data
42 .endm
43 .macro crc32_u8 dst,src,data
44 crc32cb \dst,\src,\data
45 .endm
46 .macro declare_var_vector_reg name:req,reg:req
47 q\name .req q\reg
48 v\name .req v\reg
49 s\name .req s\reg
50 d\name .req d\reg
51 .endm
52
53 BUF .req x0
54 LEN .req x1
55 wCRC .req w2
56 crc0 .req w2
57 crc1 .req w3
58 crc2 .req w4
59 xcrc0 .req x2
60 xcrc1 .req x3
61 const_adr .req x3
62 ptr_crc0 .req x0
63 ptr_crc1 .req x6
64 ptr_crc2 .req x7
65 crc0_data0 .req x9
66 crc0_data1 .req x10
67 crc1_data0 .req x11
68 crc1_data1 .req x12
69 crc2_data0 .req x13
70 crc2_data1 .req x14
71
72 wdata .req w3
73 data0 .req x3
74 data1 .req x4
75 data2 .req x5
76 data3 .req x6
77
78 declare_var_vector_reg tmp0,0
79 declare_var_vector_reg tmp1,1
80 declare_var_vector_reg const0,2
81 declare_var_vector_reg const1,3
82
83 /**
84 unsigned int crc32_iscsi(
85 unsigned char *BUF,
86 int LEN,
87 unsigned int wCRC
88 );
89
90 */
91
92 .global crc32_iscsi_3crc_fold
93 .type crc32_iscsi_3crc_fold, %function
94 crc32_iscsi_3crc_fold:
95 crc32_3crc_fold crc32c
96 .size crc32_iscsi_3crc_fold, .-crc32_iscsi_3crc_fold
0 /**********************************************************************
1 Copyright(c) 2020 Arm Corporation All rights reserved.
2
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
5 are met:
6 * Redistributions of source code must retain the above copyright
7 notice, this list of conditions and the following disclaimer.
8 * Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in
10 the documentation and/or other materials provided with the
11 distribution.
12 * Neither the name of Arm Corporation nor the names of its
13 contributors may be used to endorse or promote products derived
14 from this software without specific prior written permission.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 **********************************************************************/
28
29 .text
30 .align 6
31 .arch armv8-a+crc
32
33
34 #include "crc32_aarch64_common.h"
35 BUF .req x0
36 LEN .req x1
37 wCRC .req w2
38 data0 .req x4
39 data1 .req x5
40 data2 .req x6
41 data3 .req x7
42 wdata .req w3
43 .macro crc32_u64 dst,src,data
44 crc32cx \dst,\src,\data
45 .endm
46 .macro crc32_u32 dst,src,data
47 crc32cw \dst,\src,\data
48 .endm
49 .macro crc32_u16 dst,src,data
50 crc32ch \dst,\src,\data
51 .endm
52 .macro crc32_u8 dst,src,data
53 crc32cb \dst,\src,\data
54 .endm
55
56 /**
57 * uint32_t crc32_iscsi_crc_ext(const unsigned char *BUF,
58 * uint64_t LEN,uint32_t wCRC);
59 */
60 .global crc32_iscsi_crc_ext
61 .type crc32_iscsi_crc_ext, %function
62 crc32_iscsi_crc_ext:
63 crc32_hw_common crc32c
64 .size crc32_iscsi_crc_ext, .-crc32_iscsi_crc_ext
+0
-172
crc/aarch64/crc32_iscsi_refl_hw_fold.S less more
0 ########################################################################
1 # Copyright(c) 2019 Arm Corporation All rights reserved.
2 #
3 # Redistribution and use in source and binary forms, with or without
4 # modification, are permitted provided that the following conditions
5 # are met:
6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above copyright
9 # notice, this list of conditions and the following disclaimer in
10 # the documentation and/or other materials provided with the
11 # distribution.
12 # * Neither the name of Arm Corporation nor the names of its
13 # contributors may be used to endorse or promote products derived
14 # from this software without specific prior written permission.
15 #
16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 #########################################################################
28
29 .arch armv8-a+crc+crypto
30 .text
31 .align 3
32 .global crc32_iscsi_refl_hw_fold
33 .type crc32_iscsi_refl_hw_fold, %function
34
35 /* unsigned int crc32_iscsi_refl_hw_fold(unsigned char *buffer, int len, unsigned int crc_init) */
36
37 x_buffer .req x0
38 w_len .req w1
39 w_crc_init .req w2
40 w_crc .req w2
41
42 w_len_loop_end .req w9
43 x_buf_loop_end .req x9
44 x_buf_iter .req x9
45
46 x_tmp .req x15
47 w_tmp .req w15
48
49 w_crc_ret .req w0
50 crc32_iscsi_refl_hw_fold:
51 cmp w_len, 1023
52 mov x_buf_iter, x_buffer
53 ble .loop_fold_end
54
55 sub w10, w_len, #1024
56 lsr w12, w10, 10
57 lsl w_len_loop_end, w12, 10
58
59 add x_buf_loop_end, x_buf_loop_end, 1024
60 add x_buf_loop_end, x_buffer, x_buf_loop_end
61
62 mov x_tmp, 0xf38a
63 movk x_tmp, 0xe417, lsl 16
64 fmov d3, x_tmp
65
66 mov x_tmp, 0x8014
67 movk x_tmp, 0x8f15, lsl 16
68 fmov d1, x_tmp
69
70 x_in64 .req x1
71 w_crc0 .req w2
72 w_crc1 .req w3
73 w_crc2 .req w4
74 .align 3
75 .loop_fold:
76 add x8, x_buffer, 336
77 mov x_in64, x_buffer
78 mov w_crc1, 0
79 mov w_crc2, 0
80
81 .align 3
82 .loop_for:
83 ldr x7, [x_in64]
84 ldr x6, [x_in64, 336]
85 ldr x5, [x_in64, 672]
86
87 add x_in64, x_in64, 8
88 cmp x_in64, x8
89
90 crc32cx w_crc0, w_crc0, x7
91 crc32cx w_crc1, w_crc1, x6
92 crc32cx w_crc2, w_crc2, x5
93 bne .loop_for
94
95 uxtw x_tmp, w_crc0
96 fmov d4, x_tmp
97 pmull v2.1q, v4.1d, v3.1d
98
99 uxtw x_tmp, w_crc1
100 fmov d5, x_tmp
101 pmull v5.1q, v5.1d, v1.1d
102
103 fmov x_tmp, d2
104 crc32cx w_crc0, wzr, x_tmp
105
106 fmov x_tmp, d5
107 crc32cx w_crc1, wzr, x_tmp
108
109 ldr x_tmp, [x_buffer, 1008]
110 crc32cx w_crc2, w_crc2, x_tmp
111
112 eor w_crc1, w_crc1, w_crc0
113 eor w_crc1, w_crc1, w_crc2
114
115 ldr x_tmp, [x_buffer, 1016]
116 crc32cx w_crc0, w_crc1, x_tmp
117
118 add x_buffer, x_buffer, 1024
119 cmp x_buf_loop_end, x_buffer
120 bne .loop_fold
121
122 sub w_len, w10, w12, lsl 10
123
124 x_buf_loop_size8_end .req x3
125 .loop_fold_end:
126 cmp w_len, 7
127 ble .size_4
128
129 sub w_len, w_len, #8
130 lsr w4, w_len, 3
131 lsl w3, w4, 3
132 add x_buf_loop_size8_end, x_buf_loop_size8_end, 8
133 add x_buf_loop_size8_end, x_buf_iter, x_buf_loop_size8_end
134
135 .align 3
136 .loop_size_8:
137 ldr x_tmp, [x_buf_iter], 8
138 crc32cx w_crc, w_crc, x_tmp
139
140 cmp x_buf_iter, x_buf_loop_size8_end
141 bne .loop_size_8
142
143 sub w_len, w_len, w4, lsl 3
144 .size_4:
145 cmp w_len, 3
146 ble .size_2
147
148 ldr w_tmp, [x_buf_iter], 4
149 crc32cw w_crc, w_crc, w_tmp
150 sub w_len, w_len, #4
151
152 .size_2:
153 cmp w_len, 1
154 ble .size_1
155
156 ldrh w_tmp, [x_buf_iter], 2
157 crc32ch w_crc, w_crc, w_tmp
158 sub w_len, w_len, #2
159
160 .size_1:
161 mov w_crc_ret, w_crc
162 cmp w_len, 1
163 bne .done
164
165 ldrb w_tmp, [x_buf_iter]
166 crc32cb w_crc_ret, w_crc, w_tmp
167
168 .done:
169 ret
170
171 .size crc32_iscsi_refl_hw_fold, .-crc32_iscsi_refl_hw_fold
0 /**********************************************************************
1 Copyright(c) 2020 Arm Corporation All rights reserved.
2
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
5 are met:
6 * Redistributions of source code must retain the above copyright
7 notice, this list of conditions and the following disclaimer.
8 * Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in
10 the documentation and/or other materials provided with the
11 distribution.
12 * Neither the name of Arm Corporation nor the names of its
13 contributors may be used to endorse or promote products derived
14 from this software without specific prior written permission.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 **********************************************************************/
28
29 .arch armv8-a+crypto+crc
30 .text
31 .align 6
32
33 #define CRC32
34
35 .macro crc32_u64 dst,src,data
36 crc32x \dst,\src,\data
37 .endm
38
39 .macro crc32_u32 dst,src,data
40 crc32w \dst,\src,\data
41 .endm
42
43 .macro crc32_u16 dst,src,data
44 crc32h \dst,\src,\data
45 .endm
46
47 .macro crc32_u8 dst,src,data
48 crc32b \dst,\src,\data
49 .endm
50
51 #include "crc32_mix_default_common.S"
52
53 .global crc32_mix_default
54 .type crc32_mix_default, %function
55 crc32_mix_default:
56 crc32_mix_main_default
57 .size crc32_mix_default, .-crc32_mix_default
58
59 .section .rodata
60 .align 4
61 .set lanchor_crc32,. + 0
62
63 .type k1k2, %object
64 .size k1k2, 16
65 k1k2:
66 .xword 0x0154442bd4
67 .xword 0x01c6e41596
68
69 .type k3k4, %object
70 .size k3k4, 16
71 k3k4:
72 .xword 0x01751997d0
73 .xword 0x00ccaa009e
74
75 .type k5k0, %object
76 .size k5k0, 16
77 k5k0:
78 .xword 0x0163cd6124
79 .xword 0
80
81 .type poly, %object
82 .size poly, 16
83 poly:
84 .xword 0x01db710641
85 .xword 0x01f7011641
86
87 .type crc32_const, %object
88 .size crc32_const, 48
89 crc32_const:
90 .xword 0x1753ab84
91 .xword 0
92 .xword 0xbbf2f6d6
93 .xword 0
94 .xword 0x0c30f51d
95 .xword 0
96
97 .align 4
98 .set .lanchor_mask,. + 0
99
100 .type mask, %object
101 .size mask, 16
102 mask:
103 .word -1
104 .word 0
105 .word -1
106 .word 0
0 /**********************************************************************
1 Copyright(c) 2020 Arm Corporation All rights reserved.
2
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
5 are met:
6 * Redistributions of source code must retain the above copyright
7 notice, this list of conditions and the following disclaimer.
8 * Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in
10 the documentation and/or other materials provided with the
11 distribution.
12 * Neither the name of Arm Corporation nor the names of its
13 contributors may be used to endorse or promote products derived
14 from this software without specific prior written permission.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 **********************************************************************/
28
29 .macro declare_generic_reg name:req, reg:req, default:req
30 \name .req \default\reg
31 w_\name .req w\reg
32 x_\name .req x\reg
33 .endm
34
35 .macro declare_neon_reg name:req, reg:req, default:req
36 \name .req \default\reg
37 v_\name .req v\reg
38 q_\name .req q\reg
39 d_\name .req d\reg
40 s_\name .req s\reg
41 .endm
42
43 /**********************************************************************
44 variables
45 **********************************************************************/
46 declare_generic_reg crc, 0,w
47 declare_generic_reg buf, 1,x
48 declare_generic_reg len, 2,x
49 declare_generic_reg buf_saved, 3,x
50 declare_generic_reg buf_iter, 4,x
51 declare_generic_reg len_saved, 5,x
52 declare_generic_reg buf_tmp, 6,x
53
54 declare_generic_reg crc0, 7,x
55 declare_generic_reg crc1, 8,x
56 declare_generic_reg crc2, 9,x
57 declare_generic_reg pconst, 10,x
58 declare_generic_reg data_crc0, 11,x
59 declare_generic_reg data_crc1, 12,x
60 declare_generic_reg data_crc2, 13,x
61
62 declare_generic_reg size, 9,x
63 declare_generic_reg crc_tmp, 10,w
64 declare_generic_reg size_tmp, 11,x
65 declare_generic_reg data_tmp1, 11,x
66 declare_generic_reg data_tmp2, 12,x
67 declare_generic_reg data_tmp3, 13,x
68
69 declare_generic_reg tmp, 14,x
70 declare_generic_reg tmp1, 15,x
71
72 // return
73 declare_generic_reg ret_crc, 0,w
74
75 /**********************************************************************
76 simd variables
77 **********************************************************************/
78 declare_neon_reg a0, 0,v
79 declare_neon_reg a1, 1,v
80 declare_neon_reg a2, 2,v
81 declare_neon_reg a3, 3,v
82 declare_neon_reg a4, 4,v
83
84 declare_neon_reg a5, 16,v
85 declare_neon_reg a6, 17,v
86 declare_neon_reg a7, 18,v
87 declare_neon_reg a8, 19,v
88
89 declare_neon_reg y5, 20,v
90 declare_neon_reg y6, 21,v
91 declare_neon_reg y7, 22,v
92 declare_neon_reg y8, 23,v
93
94 declare_neon_reg neon_zero, 24,v
95 declare_neon_reg neon_tmp, 24,v
96
97 declare_neon_reg k5k0, 25,v
98 declare_neon_reg neon_tmp1, 26,v
99 declare_neon_reg neon_tmp2, 27,v
100 declare_neon_reg neon_tmp3, 28,v
101
102 declare_neon_reg crc_pmull, 29,v
103 declare_neon_reg neon_crc0, 30,v
104 declare_neon_reg neon_crc1, 31,v
105
106 declare_neon_reg neon_const0, 5,v
107 declare_neon_reg neon_const1, 6,v
108 declare_neon_reg neon_const2, 7,v
109
110 // constants
111 .equ offset_k3k4, 16
112 .equ offset_k5k0, 32
113 .equ offset_poly, 48
114 .equ offset_crc32_const, 64
115
116 // pmull fold
117 .macro pmull_fold
118 ldr x_data_crc0, [x_buf_tmp, 464]
119 ldr x_data_crc1, [x_buf_tmp, 976]
120 ldr x_data_crc2, [x_buf_tmp, 1488]
121
122 pmull v_a5.1q, v_a1.1d, v_a0.1d
123 crc32_u64 w_crc0, w_crc0, x_data_crc0
124 crc32_u64 w_crc1, w_crc1, x_data_crc1
125 crc32_u64 w_crc2, w_crc2, x_data_crc2
126
127 ldr x_data_crc0, [x_buf_tmp, 472]
128 ldr x_data_crc1, [x_buf_tmp, 984]
129 ldr x_data_crc2, [x_buf_tmp, 1496]
130
131 pmull v_a6.1q, v_a2.1d, v_a0.1d
132 crc32_u64 w_crc0, w_crc0, x_data_crc0
133 crc32_u64 w_crc1, w_crc1, x_data_crc1
134 crc32_u64 w_crc2, w_crc2, x_data_crc2
135
136 ldr x_data_crc0, [x_buf_tmp, 480]
137 ldr x_data_crc1, [x_buf_tmp, 992]
138 ldr x_data_crc2, [x_buf_tmp, 1504]
139
140 pmull v_a7.1q, v_a3.1d, v_a0.1d
141 crc32_u64 w_crc0, w_crc0, x_data_crc0
142 crc32_u64 w_crc1, w_crc1, x_data_crc1
143 crc32_u64 w_crc2, w_crc2, x_data_crc2
144
145 ldr x_data_crc0, [x_buf_tmp, 488]
146 ldr x_data_crc1, [x_buf_tmp, 1000]
147 ldr x_data_crc2, [x_buf_tmp, 1512]
148
149 pmull v_a8.1q, v_a4.1d, v_a0.1d
150 crc32_u64 w_crc0, w_crc0, x_data_crc0
151 crc32_u64 w_crc1, w_crc1, x_data_crc1
152 crc32_u64 w_crc2, w_crc2, x_data_crc2
153
154 ldr x_data_crc0, [x_buf_tmp, 496]
155 ldr x_data_crc1, [x_buf_tmp, 1008]
156 ldr x_data_crc2, [x_buf_tmp, 1520]
157
158 pmull2 v_a1.1q, v_a1.2d, v_a0.2d
159 crc32_u64 w_crc0, w_crc0, x_data_crc0
160 crc32_u64 w_crc1, w_crc1, x_data_crc1
161 crc32_u64 w_crc2, w_crc2, x_data_crc2
162
163 ld1 {v_y5.4s, v_y6.4s, v_y7.4s, v_y8.4s}, [x_buf_tmp]
164
165 ldr x_data_crc0, [x_buf_tmp, 504]
166 ldr x_data_crc1, [x_buf_tmp, 1016]
167 ldr x_data_crc2, [x_buf_tmp, 1528]
168
169 pmull2 v_a2.1q, v_a2.2d, v_a0.2d
170 crc32_u64 w_crc0, w_crc0, x_data_crc0
171 crc32_u64 w_crc1, w_crc1, x_data_crc1
172 crc32_u64 w_crc2, w_crc2, x_data_crc2
173
174 pmull2 v_a3.1q, v_a3.2d, v_a0.2d
175 pmull2 v_a4.1q, v_a4.2d, v_a0.2d
176
177 eor v_y5.16b, v_y5.16b, v_a5.16b
178 eor v_y6.16b, v_y6.16b, v_a6.16b
179 eor v_y7.16b, v_y7.16b, v_a7.16b
180 eor v_y8.16b, v_y8.16b, v_a8.16b
181
182 ldr x_data_crc0, [x_buf_tmp, 512]
183 ldr x_data_crc1, [x_buf_tmp, 1024]
184 ldr x_data_crc2, [x_buf_tmp, 1536]
185
186 eor v_a1.16b, v_y5.16b, v_a1.16b
187 eor v_a2.16b, v_y6.16b, v_a2.16b
188 eor v_a3.16b, v_y7.16b, v_a3.16b
189 eor v_a4.16b, v_y8.16b, v_a4.16b
190
191 crc32_u64 w_crc0, w_crc0, x_data_crc0
192 crc32_u64 w_crc1, w_crc1, x_data_crc1
193 crc32_u64 w_crc2, w_crc2, x_data_crc2
194
195 ldr x_data_crc0, [x_buf_tmp, 520]
196 ldr x_data_crc1, [x_buf_tmp, 1032]
197 ldr x_data_crc2, [x_buf_tmp, 1544]
198
199 crc32_u64 w_crc0, w_crc0, x_data_crc0
200 crc32_u64 w_crc1, w_crc1, x_data_crc1
201 crc32_u64 w_crc2, w_crc2, x_data_crc2
202 .endm
203
204 // crc32 mix for 2048 byte input data
205 .macro crc32_mix2048
206 fmov s_a1, w_crc
207 movi v_neon_tmp.4s, 0
208
209 adrp x_pconst, lanchor_crc32
210 add x_buf_tmp, x_buf, 64
211
212 ldr x_data_crc0, [x_buf, 512]
213 ldr x_data_crc1, [x_buf, 1024]
214 ldr x_data_crc2, [x_buf, 1536]
215
216 crc32_u64 w_crc0, wzr, x_data_crc0
217 crc32_u64 w_crc1, wzr, x_data_crc1
218 crc32_u64 w_crc2, wzr, x_data_crc2
219
220 #ifdef CRC32
221 mvn v_a1.8b, v_a1.8b
222 #endif
223
224 ins v_neon_tmp.s[0], v_a1.s[0]
225
226 ld1 {v_a1.4s, v_a2.4s, v_a3.4s, v_a4.4s}, [x_buf]
227
228 ldr x_data_crc0, [x_buf, 520]
229 ldr x_data_crc1, [x_buf, 1032]
230 ldr x_data_crc2, [x_buf, 1544]
231
232 eor v_a1.16b, v_a1.16b, v_neon_tmp.16b
233 ldr q_a0, [x_pconst, #:lo12:lanchor_crc32] // k1k2
234
235 crc32_u64 w_crc0, w_crc0, x_data_crc0
236 crc32_u64 w_crc1, w_crc1, x_data_crc1
237 crc32_u64 w_crc2, w_crc2, x_data_crc2
238
239 // loop start, unroll the loop
240 .align 4
241 pmull_fold
242
243 add x_buf_tmp, x_buf_tmp, 64
244 pmull_fold
245
246 add x_buf_tmp, x_buf_tmp, 64
247 pmull_fold
248
249 add x_buf_tmp, x_buf_tmp, 64
250 pmull_fold
251
252 add x_buf_tmp, x_buf_tmp, 64
253 pmull_fold
254
255 add x_buf_tmp, x_buf_tmp, 64
256 pmull_fold
257
258 add x_buf_tmp, x_buf_tmp, 64
259 pmull_fold
260 // loop end
261
262 // PMULL: fold into 128-bits
263 add x_pconst, x_pconst, :lo12:lanchor_crc32
264
265 ldr x_data_crc0, [x_buf, 976]
266 ldr x_data_crc1, [x_buf, 1488]
267 ldr x_data_crc2, [x_buf, 2000]
268
269 ldr q_a0, [x_pconst, offset_k3k4] // k3k4
270
271 crc32_u64 w_crc0, w_crc0, x_data_crc0
272 crc32_u64 w_crc1, w_crc1, x_data_crc1
273 crc32_u64 w_crc2, w_crc2, x_data_crc2
274
275 pmull v_a5.1q, v_a1.1d, v_a0.1d
276 pmull2 v_a1.1q, v_a1.2d, v_a0.2d
277
278 eor v_a1.16b, v_a5.16b, v_a1.16b
279 eor v_a1.16b, v_a1.16b, v_a2.16b
280
281 ldr x_data_crc0, [x_buf, 984]
282 ldr x_data_crc1, [x_buf, 1496]
283 ldr x_data_crc2, [x_buf, 2008]
284
285 crc32_u64 w_crc0, w_crc0, x_data_crc0
286 crc32_u64 w_crc1, w_crc1, x_data_crc1
287 crc32_u64 w_crc2, w_crc2, x_data_crc2
288
289 pmull v_a5.1q, v_a1.1d, v_a0.1d
290 pmull2 v_a1.1q, v_a1.2d, v_a0.2d
291
292 ldr x_data_crc0, [x_buf, 992]
293 ldr x_data_crc1, [x_buf, 1504]
294 ldr x_data_crc2, [x_buf, 2016]
295
296 eor v_a1.16b, v_a5.16b, v_a1.16b
297 eor v_a1.16b, v_a1.16b, v_a3.16b
298
299 crc32_u64 w_crc0, w_crc0, x_data_crc0
300 crc32_u64 w_crc1, w_crc1, x_data_crc1
301 crc32_u64 w_crc2, w_crc2, x_data_crc2
302
303 pmull v_a5.1q, v_a1.1d, v_a0.1d
304 pmull2 v_a1.1q, v_a1.2d, v_a0.2d
305
306 ldr x_data_crc0, [x_buf, 1000]
307 ldr x_data_crc1, [x_buf, 1512]
308 ldr x_data_crc2, [x_buf, 2024]
309
310 eor v_a1.16b, v_a5.16b, v_a1.16b
311 eor v_a1.16b, v_a1.16b, v_a4.16b
312
313 // PMULL: fold 128-bits to 64-bits
314 crc32_u64 w_crc0, w_crc0, x_data_crc0
315 crc32_u64 w_crc1, w_crc1, x_data_crc1
316 crc32_u64 w_crc2, w_crc2, x_data_crc2
317
318 dup d_a0, v_a0.d[1]
319 pmull v_a2.1q, v_a1.1d, v_a0.1d
320
321 movi v_neon_zero.4s, 0
322 ldr q_k5k0, [x_pconst, offset_k5k0] // k5k0
323 adrp x_tmp, .lanchor_mask
324
325 ldr x_data_crc0, [x_buf, 1008]
326 ldr x_data_crc1, [x_buf, 1520]
327 ldr x_data_crc2, [x_buf, 2032]
328
329 ext v_a1.16b, v_a1.16b, v_neon_zero.16b, #8
330 eor v_a1.16b, v_a2.16b, v_a1.16b
331 ldr q_neon_tmp3, [x_tmp, #:lo12:.lanchor_mask]
332
333 crc32_u64 w_crc0, w_crc0, x_data_crc0
334 crc32_u64 w_crc1, w_crc1, x_data_crc1
335 crc32_u64 w_crc2, w_crc2, x_data_crc2
336
337 dup d_a0, v_k5k0.d[1]
338 pmull v_a3.1q, v_a2.1d, v_a0.1d
339
340 ext v_a2.16b, v_a1.16b, v_neon_zero.16b, #4
341 and v_a1.16b, v_a1.16b, v_neon_tmp3.16b
342 pmull v_a1.1q, v_a1.1d, v_k5k0.1d
343 eor v_a1.16b, v_a2.16b, v_a1.16b
344
345 // PMULL: barret reduce to 32-bits
346 ldr q_neon_tmp1, [x_pconst, offset_poly] // poly
347
348 ldr x_data_crc0, [x_buf, 1016]
349 ldr x_data_crc1, [x_buf, 1528]
350 ldr x_data_crc2, [x_buf, 2040]
351
352 dup d_neon_tmp2, v_neon_tmp1.d[1]
353
354 crc32_u64 w_crc0, w_crc0, x_data_crc0
355 crc32_u64 w_crc1, w_crc1, x_data_crc1
356 crc32_u64 w_crc2, w_crc2, x_data_crc2
357
358 and v_a2.16b, v_a1.16b, v_neon_tmp3.16b
359 pmull v_a2.1q, v_a2.1d, v_neon_tmp2.1d
360 and v_a2.16b, v_neon_tmp3.16b, v_a2.16b
361 pmull v_a2.1q, v_a2.1d, v_neon_tmp1.1d
362
363 // crc_pmull result
364 eor v_a1.16b, v_a1.16b, v_a2.16b
365 dup s_crc_pmull, v_a1.s[1]
366
367 // merge crc_pmull, crc0, crc1, crc2 using pmull instruction
368 fmov s_neon_crc0, w_crc0
369 fmov s_neon_crc1, w_crc1
370
371 ldr q_neon_const0, [x_pconst, offset_crc32_const]
372 ldr q_neon_const1, [x_pconst, offset_crc32_const+16]
373 ldr q_neon_const2, [x_pconst, offset_crc32_const+32]
374
375 pmull v_crc_pmull.1q, v_crc_pmull.1d, v_neon_const0.1d
376 pmull v_neon_crc0.1q, v_neon_crc0.1d, v_neon_const1.1d
377 pmull v_neon_crc1.1q, v_neon_crc1.1d, v_neon_const2.1d
378
379 fmov x_tmp1, d_neon_crc0
380 crc32_u64 w_crc0, wzr, x_tmp1
381
382 fmov x_tmp1, d_neon_crc1
383 crc32_u64 w_crc1, wzr, x_tmp1
384
385 eor w_ret_crc, w_crc1, w_crc0
386
387 fmov x_tmp1, d_crc_pmull
388 crc32_u64 w_tmp, wzr, x_tmp1
389
390 eor w_crc2, w_tmp, w_crc2
391
392 // handle crc32/crc32c
393 #ifdef CRC32
394 eon w_ret_crc, w_crc2, w_ret_crc
395 #else
396 eor w_ret_crc, w_crc2, w_ret_crc
397 #endif
398 .endm
399
400 // crc32 mix main default
401 .macro crc32_mix_main_default
402 cmp x_len, 2047
403 mov x_len_saved, x_len
404 mov x_buf_saved, x_buf
405 bls .less_than_2048
406
407 sub x_buf_iter, x_len, #2048
408 stp x29, x30, [sp, -16]!
409
410 mov x29, sp
411 and x_buf_iter, x_buf_iter, -2048
412 add x_buf_iter, x_buf_iter, 2048
413 add x_buf_iter, x_buf, x_buf_iter
414
415 .align 4
416 .loop_mix:
417 mov x_buf, x_buf_saved
418 crc32_mix2048
419
420 add x_buf_saved, x_buf_saved, 2048
421 cmp x_buf_saved, x_buf_iter
422 bne .loop_mix
423
424 and x_len_saved, x_len_saved, 2047
425 cbnz x_len_saved, .remain_ldp
426
427 ldp x29, x30, [sp], 16
428 ret
429
430 .align 4
431 .remain_ldp:
432 mov w_crc_tmp, crc
433 ldp x29, x30, [sp], 16
434 mov size, x_len_saved
435 mov buf, x_buf_iter
436 b .crc32_hw_handle
437
438 .remain:
439 mov w_crc_tmp, crc
440 mov size, x_len_saved
441 mov buf, x_buf_saved
442 b .crc32_hw_handle
443
444 .align 4
445 .less_than_2048:
446 cbnz x_len, .remain
447 ret
448
449 .crc32_hw_handle:
450 cmp size, 63
451
452 #ifdef CRC32
453 mvn crc_tmp, crc_tmp
454 #endif
455
456 bls .less_than_64
457 sub buf_saved, size, #64
458 and buf_saved, buf_saved, -64
459 add buf_saved, buf_saved, 64
460 add buf_saved, buf, buf_saved
461
462 .align 4
463 .loop_64:
464 ldp data_tmp1, data_tmp2, [buf]
465 ldr data_tmp3, [buf, 16]
466 crc32_u64 crc_tmp, crc_tmp, data_tmp1
467 crc32_u64 crc_tmp, crc_tmp, data_tmp2
468
469 ldp data_tmp1, data_tmp2, [buf, 24]
470 add buf, buf, 64
471
472 crc32_u64 crc_tmp, crc_tmp, data_tmp3
473 ldr data_tmp3, [buf, -24]
474
475 crc32_u64 crc_tmp, crc_tmp, data_tmp1
476 crc32_u64 crc_tmp, crc_tmp, data_tmp2
477
478 ldp data_tmp1, data_tmp2, [buf, -16]
479 cmp buf_saved, buf
480 crc32_u64 crc_tmp, crc_tmp, data_tmp3
481
482 crc32_u64 crc_tmp, crc_tmp, data_tmp1
483 crc32_u64 crc_tmp, crc_tmp, data_tmp2
484 bne .loop_64
485
486 and size, size, 63
487 .less_than_64:
488 cmp size, 7
489 bls .crc32_hw_w
490
491 ldr data_tmp2, [buf]
492 sub size_tmp, size, #8
493 cmp size_tmp, 7
494 crc32_u64 crc_tmp, crc_tmp, data_tmp2
495 bls .crc32_hw_w_pre
496
497 ldr data_tmp2, [buf, 8]
498 sub data_tmp3, size, #16
499 cmp data_tmp3, 7
500 crc32_u64 crc_tmp, crc_tmp, data_tmp2
501 bls .crc32_hw_w_pre
502
503 ldr data_tmp2, [buf, 16]
504 sub data_tmp3, size, #24
505 cmp data_tmp3, 7
506 crc32_u64 crc_tmp, crc_tmp, data_tmp2
507 bls .crc32_hw_w_pre
508
509 ldr data_tmp2, [buf, 24]
510 sub data_tmp3, size, #32
511 cmp data_tmp3, 7
512 crc32_u64 crc_tmp, crc_tmp, data_tmp2
513 bls .crc32_hw_w_pre
514
515 ldr data_tmp2, [buf, 32]
516 sub data_tmp3, size, #40
517 cmp data_tmp3, 7
518 crc32_u64 crc_tmp, crc_tmp, data_tmp2
519 bls .crc32_hw_w_pre
520
521 ldr data_tmp2, [buf, 40]
522 sub data_tmp3, size, #48
523 cmp data_tmp3, 7
524 crc32_u64 crc_tmp, crc_tmp, data_tmp2
525 bls .crc32_hw_w_pre
526
527 ldr data_tmp2, [buf, 48]
528 crc32_u64 crc_tmp, crc_tmp, data_tmp2
529
530 .crc32_hw_w_pre:
531 and size_tmp, size_tmp, -8
532 and size, size, 7
533 add size_tmp, size_tmp, 8
534 add buf, buf, size_tmp
535
536 .crc32_hw_w:
537 cmp size, 3
538 bls .crc32_hw_h
539 ldr w_data_tmp2, [buf], 4
540 sub size, size, #4
541 crc32_u32 crc_tmp, crc_tmp, w_data_tmp2
542
543 .crc32_hw_h:
544 cmp size, 1
545 bls .crc32_hw_b
546 ldrh w_data_tmp2, [buf], 2
547 sub size, size, #2
548 crc32_u16 crc_tmp, crc_tmp, w_data_tmp2
549
550 .crc32_hw_b:
551 cbz size, .crc32_hw_done
552 ldrb w_data_tmp2, [buf]
553 crc32_u8 crc_tmp, crc_tmp, w_data_tmp2
554
555 .crc32_hw_done:
556 #ifdef CRC32
557 mvn ret_crc, crc_tmp
558 #else
559 mov ret_crc, crc_tmp
560 #endif
561 ret
562 .endm
0 /**********************************************************************
1 Copyright(c) 2020 Arm Corporation All rights reserved.
2
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
5 are met:
6 * Redistributions of source code must retain the above copyright
7 notice, this list of conditions and the following disclaimer.
8 * Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in
10 the documentation and/or other materials provided with the
11 distribution.
12 * Neither the name of Arm Corporation nor the names of its
13 contributors may be used to endorse or promote products derived
14 from this software without specific prior written permission.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 **********************************************************************/
28
29 .text
30 .align 6
31 .arch armv8-a+crypto+crc
32
33 #include "crc32_common_mix_neoverse_n1.S"
34 .Lconstants:
35 .octa 0x00000001c6e415960000000154442bd4
36 .octa 0x00000000ccaa009e00000001751997d0
37 .octa 0x00000001F701164100000001DB710641
38 .quad 0x0000000163cd6124
39 .quad 0x00000000FFFFFFFF
40 .quad 0x000000001753ab84
41 .macro crc32_u64 dst,src,data
42 crc32x \dst,\src,\data
43 .endm
44 .macro crc32_u32 dst,src,data
45 crc32w \dst,\src,\data
46 .endm
47 .macro crc32_u16 dst,src,data
48 crc32h \dst,\src,\data
49 .endm
50 .macro crc32_u8 dst,src,data
51 crc32b \dst,\src,\data
52 .endm
53
54
55 /**
56 * uint32_t crc32_mix_neoverse_n1(uint CRC ,uint8_t * BUF,
57 * size_t LEN)
58 */
59 BUF .req x1
60 LEN .req x2
61 CRC .req x0
62 wCRC .req w0
63 .align 6
64 .global crc32_mix_neoverse_n1
65 .type crc32_mix_neoverse_n1, %function
66 crc32_mix_neoverse_n1:
67 crc32_common_mix crc32
68 .size crc32_mix_neoverse_n1, .-crc32_mix_neoverse_n1
69
0 /**********************************************************************
1 Copyright(c) 2020 Arm Corporation All rights reserved.
2
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
5 are met:
6 * Redistributions of source code must retain the above copyright
7 notice, this list of conditions and the following disclaimer.
8 * Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in
10 the documentation and/or other materials provided with the
11 distribution.
12 * Neither the name of Arm Corporation nor the names of its
13 contributors may be used to endorse or promote products derived
14 from this software without specific prior written permission.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 **********************************************************************/
28
29 .text
30 .arch armv8-a+crypto+crc
31 .align 6
32
33 .macro crc32_u64 dst,src,data
34 crc32cx \dst,\src,\data
35 .endm
36
37 .macro crc32_u32 dst,src,data
38 crc32cw \dst,\src,\data
39 .endm
40
41 .macro crc32_u16 dst,src,data
42 crc32ch \dst,\src,\data
43 .endm
44
45 .macro crc32_u8 dst,src,data
46 crc32cb \dst,\src,\data
47 .endm
48
49 #include "crc32_mix_default_common.S"
50
51 .global crc32c_mix_default
52 .type crc32c_mix_default, %function
53 crc32c_mix_default:
54 mov w3, w2
55 sxtw x2, w1
56 mov x1, x0
57 mov w0, w3
58 crc32_mix_main_default
59 .size crc32c_mix_default, .-crc32c_mix_default
60
61 .section .rodata
62 .align 4
63 .set lanchor_crc32,. + 0
64
65 .type k1k2, %object
66 .size k1k2, 16
67 k1k2:
68 .xword 0x00740eef02
69 .xword 0x009e4addf8
70
71 .type k3k4, %object
72 .size k3k4, 16
73 k3k4:
74 .xword 0x00f20c0dfe
75 .xword 0x014cd00bd6
76
77 .type k5k0, %object
78 .size k5k0, 16
79 k5k0:
80 .xword 0x00dd45aab8
81 .xword 0
82
83 .type poly, %object
84 .size poly, 16
85 poly:
86 .xword 0x0105ec76f0
87 .xword 0x00dea713f1
88
89 .type crc32_const, %object
90 .size crc32_const, 48
91 crc32_const:
92 .xword 0x9ef68d35
93 .xword 0
94 .xword 0x170076fa
95 .xword 0
96 .xword 0xdd7e3b0c
97 .xword 0
98
99 .align 4
100 .set .lanchor_mask,. + 0
101
102 .type mask, %object
103 .size mask, 16
104 mask:
105 .word -1
106 .word 0
107 .word -1
108 .word 0
0 /**********************************************************************
1 Copyright(c) 2020 Arm Corporation All rights reserved.
2
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
5 are met:
6 * Redistributions of source code must retain the above copyright
7 notice, this list of conditions and the following disclaimer.
8 * Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in
10 the documentation and/or other materials provided with the
11 distribution.
12 * Neither the name of Arm Corporation nor the names of its
13 contributors may be used to endorse or promote products derived
14 from this software without specific prior written permission.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 **********************************************************************/
28
29 .text
30 .align 6
31 .arch armv8-a+crypto+crc
32
33 #include "crc32_common_mix_neoverse_n1.S"
34 .Lconstants:
35 .octa 0x000000009e4addf800000000740eef02
36 .octa 0x000000014cd00bd600000000f20c0dfe
37 .octa 0x00000000dea713f10000000105ec76f0
38 .quad 0x00000000dd45aab8
39 .quad 0x00000000FFFFFFFF
40 .quad 0x000000009ef68d35
41
42 .macro crc32_u64 dst,src,data
43 crc32cx \dst,\src,\data
44 .endm
45 .macro crc32_u32 dst,src,data
46 crc32cw \dst,\src,\data
47 .endm
48 .macro crc32_u16 dst,src,data
49 crc32ch \dst,\src,\data
50 .endm
51 .macro crc32_u8 dst,src,data
52 crc32cb \dst,\src,\data
53 .endm
54 /**
55 * uint32_t crc32c_mix_neoverse_n1(uint8_t * BUF,
56 * size_t LEN, uint CRC)
57 */
58 BUF .req x0
59 LEN .req x1
60 CRC .req x2
61 wCRC .req w2
62 .align 6
63 .global crc32c_mix_neoverse_n1
64 .type crc32c_mix_neoverse_n1, %function
65 crc32c_mix_neoverse_n1:
66 crc32_common_mix crc32c
67 .size crc32c_mix_neoverse_n1, .-crc32c_mix_neoverse_n1
00 /**********************************************************************
1 Copyright(c) 2019 Arm Corporation All rights reserved.
1 Copyright(c) 2019-2020 Arm Corporation All rights reserved.
22
33 Redistribution and use in source and binary forms, with or without
44 modification, are permitted provided that the following conditions
6161 DEFINE_INTERFACE_DISPATCHER(crc32_iscsi)
6262 {
6363 unsigned long auxval = getauxval(AT_HWCAP);
64 if (auxval & HWCAP_CRC32)
65 return PROVIDER_INFO(crc32_iscsi_refl_hw_fold);
64 if (auxval & HWCAP_CRC32) {
65 switch (get_micro_arch_id()) {
66 case MICRO_ARCH_ID(ARM, NEOVERSE_N1):
67 case MICRO_ARCH_ID(ARM, CORTEX_A57):
68 case MICRO_ARCH_ID(ARM, CORTEX_A72):
69 return PROVIDER_INFO(crc32_iscsi_crc_ext);
70 }
71 }
72 if ((HWCAP_CRC32 | HWCAP_PMULL) == (auxval & (HWCAP_CRC32 | HWCAP_PMULL))) {
73 return PROVIDER_INFO(crc32_iscsi_3crc_fold);
74 }
75
6676 if (auxval & HWCAP_PMULL) {
6777 return PROVIDER_INFO(crc32_iscsi_refl_pmull);
6878 }
7383 DEFINE_INTERFACE_DISPATCHER(crc32_gzip_refl)
7484 {
7585 unsigned long auxval = getauxval(AT_HWCAP);
76 if (auxval & HWCAP_CRC32)
77 return PROVIDER_INFO(crc32_gzip_refl_hw_fold);
86
87 if (auxval & HWCAP_CRC32) {
88 switch (get_micro_arch_id()) {
89 case MICRO_ARCH_ID(ARM, NEOVERSE_N1):
90 case MICRO_ARCH_ID(ARM, CORTEX_A57):
91 case MICRO_ARCH_ID(ARM, CORTEX_A72):
92 return PROVIDER_INFO(crc32_gzip_refl_crc_ext);
93 }
94 }
95 if ((HWCAP_CRC32 | HWCAP_PMULL) == (auxval & (HWCAP_CRC32 | HWCAP_PMULL))) {
96 return PROVIDER_INFO(crc32_gzip_refl_3crc_fold);
97 }
98
7899 if (auxval & HWCAP_PMULL)
79100 return PROVIDER_INFO(crc32_gzip_refl_pmull);
80101
7272 %endif
7373
7474 align 16
75 global crc16_t10dif_01:ISAL_SYM_TYPE_FUNCTION
75 mk_global crc16_t10dif_01, function
7676 crc16_t10dif_01:
77 endbranch
7778
7879 ; adjust the 16-bit initial_crc value, scale it to 32 bits
7980 shl arg1_low32, 16
7272 %endif
7373
7474 align 16
75 global crc16_t10dif_02:ISAL_SYM_TYPE_FUNCTION
75 mk_global crc16_t10dif_02, function
7676 crc16_t10dif_02:
77 endbranch
7778
7879 ; adjust the 16-bit initial_crc value, scale it to 32 bits
7980 shl arg1_low32, 16
8181 %endif
8282
8383 align 16
84 global FUNCTION_NAME:ISAL_SYM_TYPE_FUNCTION
84 mk_global FUNCTION_NAME, function
8585 FUNCTION_NAME:
86 endbranch
8687
8788 ; adjust the 16-bit initial_crc value, scale it to 32 bits
8889 shl arg1_low32, 16
6565 %endif
6666
6767 align 16
68 global crc16_t10dif_by4:ISAL_SYM_TYPE_FUNCTION
68 mk_global crc16_t10dif_by4, function
6969 crc16_t10dif_by4:
70 endbranch
7071
7172 ; adjust the 16-bit initial_crc value, scale it to 32 bits
7273 shl arg1_low32, 16
6868 %endif
6969
7070 align 16
71 global crc16_t10dif_copy_by4:ISAL_SYM_TYPE_FUNCTION
71 mk_global crc16_t10dif_copy_by4, function
7272 crc16_t10dif_copy_by4:
73 endbranch
7374
7475 ; adjust the 16-bit initial_crc value, scale it to 32 bits
7576 shl arg1_low32, 16
6868 %endif
6969
7070 align 16
71 global crc16_t10dif_copy_by4_02:ISAL_SYM_TYPE_FUNCTION
71 mk_global crc16_t10dif_copy_by4_02, function
7272 crc16_t10dif_copy_by4_02:
73 endbranch
7374
7475 ; adjust the 16-bit initial_crc value, scale it to 32 bits
7576 shl arg1_low32, 16
9191 %endif
9292
9393 align 16
94 global FUNCTION_NAME:ISAL_SYM_TYPE_FUNCTION
94 mk_global FUNCTION_NAME, function
9595 FUNCTION_NAME:
96 endbranch
9697
9798 not arg1_low32
9899 sub rsp, VARIABLE_OFFSET
8585 %endif
8686
8787 align 16
88 global crc32_gzip_refl_by8:ISAL_SYM_TYPE_FUNCTION
88 mk_global crc32_gzip_refl_by8, function
8989 crc32_gzip_refl_by8:
90 endbranch
9091
9192 ; unsigned long c = crc ^ 0xffffffffL;
9293 not arg1_low32 ;
8585 %endif
8686
8787 align 16
88 global crc32_gzip_refl_by8_02:ISAL_SYM_TYPE_FUNCTION
88 mk_global crc32_gzip_refl_by8_02, function
8989 crc32_gzip_refl_by8_02:
90 endbranch
9091 not arg1_low32
9192 sub rsp, VARIABLE_OFFSET
9293
7171 %define VARIABLE_OFFSET 16*2+8
7272 %endif
7373 align 16
74 global crc32_ieee_01:ISAL_SYM_TYPE_FUNCTION
74 mk_global crc32_ieee_01, function
7575 crc32_ieee_01:
76 endbranch
7677
7778 not arg1_low32 ;~init_crc
7879
7171 %define VARIABLE_OFFSET 16*2+8
7272 %endif
7373 align 16
74 global crc32_ieee_02:ISAL_SYM_TYPE_FUNCTION
74 mk_global crc32_ieee_02, function
7575 crc32_ieee_02:
76 endbranch
7677
7778 not arg1_low32 ;~init_crc
7879
8181 %endif
8282
8383 align 16
84 global FUNCTION_NAME:ISAL_SYM_TYPE_FUNCTION
84 mk_global FUNCTION_NAME, function
8585 FUNCTION_NAME:
86 endbranch
8687
8788 not arg1_low32
8889 sub rsp, VARIABLE_OFFSET
7373 %endif
7474
7575 align 16
76 global crc32_ieee_by4:ISAL_SYM_TYPE_FUNCTION
76 mk_global crc32_ieee_by4, function
7777 crc32_ieee_by4:
78 endbranch
7879
7980 not arg1_low32
8081
152152 ;;; crc_init = r8
153153 ;;;
154154
155 global crc32_iscsi_00:ISAL_SYM_TYPE_FUNCTION
155 mk_global crc32_iscsi_00, function
156156 crc32_iscsi_00:
157 endbranch
157158
158159 %ifidn __OUTPUT_FORMAT__, elf64
159160 %define bufp rdi
4949 ;;; len = rdx
5050 ;;; crc_init = r8
5151
52 global crc32_iscsi_01:ISAL_SYM_TYPE_FUNCTION
52 mk_global crc32_iscsi_01, function
5353 crc32_iscsi_01:
54 endbranch
5455
5556 %ifidn __OUTPUT_FORMAT__, elf64
5657 %define bufp rdi
213214 %rep 128-1
214215
215216 CONCAT(crc_,i,:)
217 endbranch
216218 crc32 crc_init, qword [block_0 - i*8]
217219 crc32 crc1, qword [block_1 - i*8]
218220 crc32 crc2, qword [block_2 - i*8]
0 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1 ; Copyright(c) 2011-2020 Intel Corporation All rights reserved.
2 ;
3 ; Redistribution and use in source and binary forms, with or without
4 ; modification, are permitted provided that the following conditions
5 ; are met:
6 ; * Redistributions of source code must retain the above copyright
7 ; notice, this list of conditions and the following disclaimer.
8 ; * Redistributions in binary form must reproduce the above copyright
9 ; notice, this list of conditions and the following disclaimer in
10 ; the documentation and/or other materials provided with the
11 ; distribution.
12 ; * Neither the name of Intel Corporation nor the names of its
13 ; contributors may be used to endorse or promote products derived
14 ; from this software without specific prior written permission.
15 ;
16 ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 ; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 ; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 ; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 ; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 ; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 ; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
28
29 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
30 ; Function API:
31 ; UINT32 crc32_iscsi_by16_10(
32 ; UINT32 init_crc, //initial CRC value, 32 bits
33 ; const unsigned char *buf, //buffer pointer to calculate CRC on
34 ; UINT64 len //buffer length in bytes (64-bit data)
35 ; );
36 ;
37 ; Authors:
38 ; Erdinc Ozturk
39 ; Vinodh Gopal
40 ; James Guilford
41 ;
42 ; Reference paper titled "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction"
43 ; URL: http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
44 ;
45 ;
46
47 %include "reg_sizes.asm"
48
49 %ifndef FUNCTION_NAME
50 %define FUNCTION_NAME crc32_iscsi_by16_10
51 %endif
52
53 %if (AS_FEATURE_LEVEL) >= 10
54
55 [bits 64]
56 default rel
57
58 section .text
59
60
61 %ifidn __OUTPUT_FORMAT__, win64
62 %xdefine arg1 r8
63 %xdefine arg2 rcx
64 %xdefine arg3 rdx
65
66 %xdefine arg1_low32 r8d
67 %else
68 %xdefine arg1 rdx
69 %xdefine arg2 rdi
70 %xdefine arg3 rsi
71
72 %xdefine arg1_low32 edx
73 %endif
74
75 %define TMP 16*0
76 %ifidn __OUTPUT_FORMAT__, win64
77 %define XMM_SAVE 16*2
78 %define VARIABLE_OFFSET 16*12+8
79 %else
80 %define VARIABLE_OFFSET 16*2+8
81 %endif
82
83 align 16
84 mk_global FUNCTION_NAME, function
85 FUNCTION_NAME:
86 endbranch
87 sub rsp, VARIABLE_OFFSET
88
89 %ifidn __OUTPUT_FORMAT__, win64
90 ; push the xmm registers into the stack to maintain
91 vmovdqa [rsp + XMM_SAVE + 16*0], xmm6
92 vmovdqa [rsp + XMM_SAVE + 16*1], xmm7
93 vmovdqa [rsp + XMM_SAVE + 16*2], xmm8
94 vmovdqa [rsp + XMM_SAVE + 16*3], xmm9
95 vmovdqa [rsp + XMM_SAVE + 16*4], xmm10
96 vmovdqa [rsp + XMM_SAVE + 16*5], xmm11
97 vmovdqa [rsp + XMM_SAVE + 16*6], xmm12
98 vmovdqa [rsp + XMM_SAVE + 16*7], xmm13
99 vmovdqa [rsp + XMM_SAVE + 16*8], xmm14
100 vmovdqa [rsp + XMM_SAVE + 16*9], xmm15
101 %endif
102
103 ; check if smaller than 256B
104 cmp arg3, 256
105 jl .less_than_256
106
107 ; load the initial crc value
108 vmovd xmm10, arg1_low32 ; initial crc
109
110 ; receive the initial 64B data, xor the initial crc value
111 vmovdqu8 zmm0, [arg2+16*0]
112 vmovdqu8 zmm4, [arg2+16*4]
113 vpxorq zmm0, zmm10
114 vbroadcasti32x4 zmm10, [rk3] ;xmm10 has rk3 and rk4
115 ;imm value of pclmulqdq instruction will determine which constant to use
116
117 sub arg3, 256
118 cmp arg3, 256
119 jl .fold_128_B_loop
120
121 vmovdqu8 zmm7, [arg2+16*8]
122 vmovdqu8 zmm8, [arg2+16*12]
123 vbroadcasti32x4 zmm16, [rk_1] ;zmm16 has rk-1 and rk-2
124 sub arg3, 256
125
126 .fold_256_B_loop:
127 add arg2, 256
128 vmovdqu8 zmm3, [arg2+16*0]
129 vpclmulqdq zmm1, zmm0, zmm16, 0x10
130 vpclmulqdq zmm2, zmm0, zmm16, 0x01
131 vpxorq zmm0, zmm1, zmm2
132 vpxorq zmm0, zmm0, zmm3
133
134 vmovdqu8 zmm9, [arg2+16*4]
135 vpclmulqdq zmm5, zmm4, zmm16, 0x10
136 vpclmulqdq zmm6, zmm4, zmm16, 0x01
137 vpxorq zmm4, zmm5, zmm6
138 vpxorq zmm4, zmm4, zmm9
139
140 vmovdqu8 zmm11, [arg2+16*8]
141 vpclmulqdq zmm12, zmm7, zmm16, 0x10
142 vpclmulqdq zmm13, zmm7, zmm16, 0x01
143 vpxorq zmm7, zmm12, zmm13
144 vpxorq zmm7, zmm7, zmm11
145
146 vmovdqu8 zmm17, [arg2+16*12]
147 vpclmulqdq zmm14, zmm8, zmm16, 0x10
148 vpclmulqdq zmm15, zmm8, zmm16, 0x01
149 vpxorq zmm8, zmm14, zmm15
150 vpxorq zmm8, zmm8, zmm17
151
152 sub arg3, 256
153 jge .fold_256_B_loop
154
155 ;; Fold 256 into 128
156 add arg2, 256
157 vpclmulqdq zmm1, zmm0, zmm10, 0x01
158 vpclmulqdq zmm2, zmm0, zmm10, 0x10
159 vpternlogq zmm7, zmm1, zmm2, 0x96 ; xor ABC
160
161 vpclmulqdq zmm5, zmm4, zmm10, 0x01
162 vpclmulqdq zmm6, zmm4, zmm10, 0x10
163 vpternlogq zmm8, zmm5, zmm6, 0x96 ; xor ABC
164
165 vmovdqa32 zmm0, zmm7
166 vmovdqa32 zmm4, zmm8
167
168 add arg3, 128
169 jmp .fold_128_B_register
170
171
172
173 ; at this section of the code, there is 128*x+y (0<=y<128) bytes of buffer. The fold_128_B_loop
174 ; loop will fold 128B at a time until we have 128+y Bytes of buffer
175
176 ; fold 128B at a time. This section of the code folds 8 xmm registers in parallel
177 .fold_128_B_loop:
178 add arg2, 128
179 vmovdqu8 zmm8, [arg2+16*0]
180 vpclmulqdq zmm2, zmm0, zmm10, 0x10
181 vpclmulqdq zmm1, zmm0, zmm10, 0x01
182 vpxorq zmm0, zmm2, zmm1
183 vpxorq zmm0, zmm0, zmm8
184
185 vmovdqu8 zmm9, [arg2+16*4]
186 vpclmulqdq zmm5, zmm4, zmm10, 0x10
187 vpclmulqdq zmm6, zmm4, zmm10, 0x01
188 vpxorq zmm4, zmm5, zmm6
189 vpxorq zmm4, zmm4, zmm9
190
191 sub arg3, 128
192 jge .fold_128_B_loop
193 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
194
195 add arg2, 128
196 ; at this point, the buffer pointer is pointing at the last y Bytes of the buffer, where 0 <= y < 128
197 ; the 128B of folded data is in 8 of the xmm registers: xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
198
199 .fold_128_B_register:
200 ; fold the 8 128b parts into 1 xmm register with different constants
201 vmovdqu8 zmm16, [rk9] ; multiply by rk9-rk16
202 vmovdqu8 zmm11, [rk17] ; multiply by rk17-rk20, rk1,rk2, 0,0
203 vpclmulqdq zmm1, zmm0, zmm16, 0x01
204 vpclmulqdq zmm2, zmm0, zmm16, 0x10
205 vextracti64x2 xmm7, zmm4, 3 ; save last that has no multiplicand
206
207 vpclmulqdq zmm5, zmm4, zmm11, 0x01
208 vpclmulqdq zmm6, zmm4, zmm11, 0x10
209 vmovdqa xmm10, [rk1] ; Needed later in reduction loop
210 vpternlogq zmm1, zmm2, zmm5, 0x96 ; xor ABC
211 vpternlogq zmm1, zmm6, zmm7, 0x96 ; xor ABC
212
213 vshufi64x2 zmm8, zmm1, zmm1, 0x4e ; Swap 1,0,3,2 - 01 00 11 10
214 vpxorq ymm8, ymm8, ymm1
215 vextracti64x2 xmm5, ymm8, 1
216 vpxorq xmm7, xmm5, xmm8
217
218 ; instead of 128, we add 128-16 to the loop counter to save 1 instruction from the loop
219 ; instead of a cmp instruction, we use the negative flag with the jl instruction
220 add arg3, 128-16
221 jl .final_reduction_for_128
222
223 ; now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 and the rest is in memory
224 ; we can fold 16 bytes at a time if y>=16
225 ; continue folding 16B at a time
226
227 .16B_reduction_loop:
228 vpclmulqdq xmm8, xmm7, xmm10, 0x1
229 vpclmulqdq xmm7, xmm7, xmm10, 0x10
230 vpxor xmm7, xmm8
231 vmovdqu xmm0, [arg2]
232 vpxor xmm7, xmm0
233 add arg2, 16
234 sub arg3, 16
235 ; instead of a cmp instruction, we utilize the flags with the jge instruction
236 ; equivalent of: cmp arg3, 16-16
237 ; check if there is any more 16B in the buffer to be able to fold
238 jge .16B_reduction_loop
239
240 ;now we have 16+z bytes left to reduce, where 0<= z < 16.
241 ;first, we reduce the data in the xmm7 register
242
243
244 .final_reduction_for_128:
245 add arg3, 16
246 je .128_done
247
248 ; here we are getting data that is less than 16 bytes.
249 ; since we know that there was data before the pointer, we can offset
250 ; the input pointer before the actual point, to receive exactly 16 bytes.
251 ; after that the registers need to be adjusted.
252 .get_last_two_xmms:
253
254 vmovdqa xmm2, xmm7
255 vmovdqu xmm1, [arg2 - 16 + arg3]
256
257 ; get rid of the extra data that was loaded before
258 ; load the shift constant
259 lea rax, [pshufb_shf_table]
260 add rax, arg3
261 vmovdqu xmm0, [rax]
262
263 vpshufb xmm7, xmm0
264 vpxor xmm0, [mask3]
265 vpshufb xmm2, xmm0
266
267 vpblendvb xmm2, xmm2, xmm1, xmm0
268 ;;;;;;;;;;
269 vpclmulqdq xmm8, xmm7, xmm10, 0x1
270 vpclmulqdq xmm7, xmm7, xmm10, 0x10
271 vpxor xmm7, xmm8
272 vpxor xmm7, xmm2
273
274 .128_done:
275 ; compute crc of a 128-bit value
276 vmovdqa xmm10, [rk5]
277 vmovdqa xmm0, xmm7
278
279 ;64b fold
280 vpclmulqdq xmm7, xmm10, 0
281 vpsrldq xmm0, 8
282 vpxor xmm7, xmm0
283
284 ;32b fold
285 vmovdqa xmm0, xmm7
286 vpslldq xmm7, 4
287 vpclmulqdq xmm7, xmm10, 0x10
288 vpxor xmm7, xmm0
289
290
291 ;barrett reduction
292 .barrett:
293 vpand xmm7, [mask2]
294 vmovdqa xmm1, xmm7
295 vmovdqa xmm2, xmm7
296 vmovdqa xmm10, [rk7]
297
298 vpclmulqdq xmm7, xmm10, 0
299 vpxor xmm7, xmm2
300 vpand xmm7, [mask]
301 vmovdqa xmm2, xmm7
302 vpclmulqdq xmm7, xmm10, 0x10
303 vpxor xmm7, xmm2
304 vpxor xmm7, xmm1
305 vpextrd eax, xmm7, 2
306
307 .cleanup:
308
309 %ifidn __OUTPUT_FORMAT__, win64
310 vmovdqa xmm6, [rsp + XMM_SAVE + 16*0]
311 vmovdqa xmm7, [rsp + XMM_SAVE + 16*1]
312 vmovdqa xmm8, [rsp + XMM_SAVE + 16*2]
313 vmovdqa xmm9, [rsp + XMM_SAVE + 16*3]
314 vmovdqa xmm10, [rsp + XMM_SAVE + 16*4]
315 vmovdqa xmm11, [rsp + XMM_SAVE + 16*5]
316 vmovdqa xmm12, [rsp + XMM_SAVE + 16*6]
317 vmovdqa xmm13, [rsp + XMM_SAVE + 16*7]
318 vmovdqa xmm14, [rsp + XMM_SAVE + 16*8]
319 vmovdqa xmm15, [rsp + XMM_SAVE + 16*9]
320 %endif
321 add rsp, VARIABLE_OFFSET
322 ret
323
324
325 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
326 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
327 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
328 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
329
330 align 16
331 .less_than_256:
332
333 ; check if there is enough buffer to be able to fold 16B at a time
334 cmp arg3, 32
335 jl .less_than_32
336
337 ; if there is, load the constants
338 vmovdqa xmm10, [rk1] ; rk1 and rk2 in xmm10
339
340 vmovd xmm0, arg1_low32 ; get the initial crc value
341 vmovdqu xmm7, [arg2] ; load the plaintext
342 vpxor xmm7, xmm0
343
344 ; update the buffer pointer
345 add arg2, 16
346
347 ; update the counter. subtract 32 instead of 16 to save one instruction from the loop
348 sub arg3, 32
349
350 jmp .16B_reduction_loop
351
352
353 align 16
354 .less_than_32:
355 ; mov initial crc to the return value. this is necessary for zero-length buffers.
356 mov eax, arg1_low32
357 test arg3, arg3
358 je .cleanup
359
360 vmovd xmm0, arg1_low32 ; get the initial crc value
361
362 cmp arg3, 16
363 je .exact_16_left
364 jl .less_than_16_left
365
366 vmovdqu xmm7, [arg2] ; load the plaintext
367 vpxor xmm7, xmm0 ; xor the initial crc value
368 add arg2, 16
369 sub arg3, 16
370 vmovdqa xmm10, [rk1] ; rk1 and rk2 in xmm10
371 jmp .get_last_two_xmms
372
373 align 16
374 .less_than_16_left:
375 ; use stack space to load data less than 16 bytes, zero-out the 16B in memory first.
376
377 vpxor xmm1, xmm1
378 mov r11, rsp
379 vmovdqa [r11], xmm1
380
381 cmp arg3, 4
382 jl .only_less_than_4
383
384 ; backup the counter value
385 mov r9, arg3
386 cmp arg3, 8
387 jl .less_than_8_left
388
389 ; load 8 Bytes
390 mov rax, [arg2]
391 mov [r11], rax
392 add r11, 8
393 sub arg3, 8
394 add arg2, 8
395 .less_than_8_left:
396
397 cmp arg3, 4
398 jl .less_than_4_left
399
400 ; load 4 Bytes
401 mov eax, [arg2]
402 mov [r11], eax
403 add r11, 4
404 sub arg3, 4
405 add arg2, 4
406 .less_than_4_left:
407
408 cmp arg3, 2
409 jl .less_than_2_left
410
411 ; load 2 Bytes
412 mov ax, [arg2]
413 mov [r11], ax
414 add r11, 2
415 sub arg3, 2
416 add arg2, 2
417 .less_than_2_left:
418 cmp arg3, 1
419 jl .zero_left
420
421 ; load 1 Byte
422 mov al, [arg2]
423 mov [r11], al
424
425 .zero_left:
426 vmovdqa xmm7, [rsp]
427 vpxor xmm7, xmm0 ; xor the initial crc value
428
429 lea rax,[pshufb_shf_table]
430 vmovdqu xmm0, [rax + r9]
431 vpshufb xmm7,xmm0
432 jmp .128_done
433
434 align 16
435 .exact_16_left:
436 vmovdqu xmm7, [arg2]
437 vpxor xmm7, xmm0 ; xor the initial crc value
438 jmp .128_done
439
440 .only_less_than_4:
441 cmp arg3, 3
442 jl .only_less_than_3
443
444 ; load 3 Bytes
445 mov al, [arg2]
446 mov [r11], al
447
448 mov al, [arg2+1]
449 mov [r11+1], al
450
451 mov al, [arg2+2]
452 mov [r11+2], al
453
454 vmovdqa xmm7, [rsp]
455 vpxor xmm7, xmm0 ; xor the initial crc value
456
457 vpslldq xmm7, 5
458 jmp .barrett
459
460 .only_less_than_3:
461 cmp arg3, 2
462 jl .only_less_than_2
463
464 ; load 2 Bytes
465 mov al, [arg2]
466 mov [r11], al
467
468 mov al, [arg2+1]
469 mov [r11+1], al
470
471 vmovdqa xmm7, [rsp]
472 vpxor xmm7, xmm0 ; xor the initial crc value
473
474 vpslldq xmm7, 6
475 jmp .barrett
476
477 .only_less_than_2:
478 ; load 1 Byte
479 mov al, [arg2]
480 mov [r11], al
481
482 vmovdqa xmm7, [rsp]
483 vpxor xmm7, xmm0 ; xor the initial crc value
484
485 vpslldq xmm7, 7
486 jmp .barrett
487
488 section .data
489 align 32
490
491 %ifndef USE_CONSTS
492 ; precomputed constants
493 rk_1: dq 0x00000000b9e02b86
494 rk_2: dq 0x00000000dcb17aa4
495 rk1: dq 0x00000000493c7d27
496 rk2: dq 0x0000000ec1068c50
497 rk3: dq 0x0000000206e38d70
498 rk4: dq 0x000000006992cea2
499 rk5: dq 0x00000000493c7d27
500 rk6: dq 0x00000000dd45aab8
501 rk7: dq 0x00000000dea713f0
502 rk8: dq 0x0000000105ec76f0
503 rk9: dq 0x0000000047db8317
504 rk10: dq 0x000000002ad91c30
505 rk11: dq 0x000000000715ce53
506 rk12: dq 0x00000000c49f4f67
507 rk13: dq 0x0000000039d3b296
508 rk14: dq 0x00000000083a6eec
509 rk15: dq 0x000000009e4addf8
510 rk16: dq 0x00000000740eef02
511 rk17: dq 0x00000000ddc0152b
512 rk18: dq 0x000000001c291d04
513 rk19: dq 0x00000000ba4fc28e
514 rk20: dq 0x000000003da6d0cb
515
516 rk_1b: dq 0x00000000493c7d27
517 rk_2b: dq 0x0000000ec1068c50
518 dq 0x0000000000000000
519 dq 0x0000000000000000
520
521 %else
522 INCLUDE_CONSTS
523 %endif
524
525 pshufb_shf_table:
526 ; use these values for shift constants for the pshufb instruction
527 ; different alignments result in values as shown:
528 ; dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1
529 ; dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2
530 ; dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3
531 ; dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4
532 ; dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5
533 ; dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6
534 ; dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7
535 ; dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8
536 ; dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9
537 ; dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10
538 ; dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11
539 ; dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12
540 ; dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13
541 ; dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14
542 ; dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15
543 dq 0x8786858483828100, 0x8f8e8d8c8b8a8988
544 dq 0x0706050403020100, 0x000e0d0c0b0a0908
545
546 mask: dq 0xFFFFFFFFFFFFFFFF, 0x0000000000000000
547 mask2: dq 0xFFFFFFFF00000000, 0xFFFFFFFFFFFFFFFF
548 mask3: dq 0x8080808080808080, 0x8080808080808080
549
550 %else ; Assembler doesn't understand these opcodes. Add empty symbol for windows.
551 %ifidn __OUTPUT_FORMAT__, win64
552 global no_ %+ FUNCTION_NAME
553 no_ %+ FUNCTION_NAME %+ :
554 %endif
555 %endif ; (AS_FEATURE_LEVEL) >= 10
6161 %define VARIABLE_OFFSET 16*2+8
6262 %endif
6363 align 16
64 global crc64_ecma_norm_by8:ISAL_SYM_TYPE_FUNCTION
64 mk_global crc64_ecma_norm_by8, function
6565 crc64_ecma_norm_by8:
66 endbranch
6667
6768 not arg1 ;~init_crc
6869
6767
6868
6969 align 16
70 global crc64_ecma_refl_by8:ISAL_SYM_TYPE_FUNCTION
70 mk_global crc64_ecma_refl_by8, function
7171 crc64_ecma_refl_by8:
72 endbranch
7273 ; uint64_t c = crc ^ 0xffffffff,ffffffffL;
7374 not arg1
7475 sub rsp, VARIABLE_OFFSET
6868 %endif
6969
7070 align 16
71 global FUNCTION_NAME:ISAL_SYM_TYPE_FUNCTION
71 mk_global FUNCTION_NAME, function
7272 FUNCTION_NAME:
73 endbranch
7374 not arg1
7475 sub rsp, VARIABLE_OFFSET
7576
6060 %define VARIABLE_OFFSET 16*2+8
6161 %endif
6262 align 16
63 global crc64_iso_norm_by8:ISAL_SYM_TYPE_FUNCTION
63 mk_global crc64_iso_norm_by8, function
6464 crc64_iso_norm_by8:
65 endbranch
6566
6667 not arg1 ;~init_crc
6768
6969 %endif
7070
7171 align 16
72 global FUNCTION_NAME:ISAL_SYM_TYPE_FUNCTION
72 mk_global FUNCTION_NAME, function
7373 FUNCTION_NAME:
74 endbranch
7475 not arg1
7576 sub rsp, VARIABLE_OFFSET
7677
6464
6565
6666 align 16
67 global crc64_iso_refl_by8:ISAL_SYM_TYPE_FUNCTION
67 mk_global crc64_iso_refl_by8, function
6868 crc64_iso_refl_by8:
69 endbranch
6970 ; uint64_t c = crc ^ 0xffffffff,ffffffffL;
7071 not arg1
7172 sub rsp, VARIABLE_OFFSET
6060 %define VARIABLE_OFFSET 16*2+8
6161 %endif
6262 align 16
63 global crc64_jones_norm_by8:ISAL_SYM_TYPE_FUNCTION
63 mk_global crc64_jones_norm_by8, function
6464 crc64_jones_norm_by8:
65 endbranch
6566
6667 not arg1 ;~init_crc
6768
6464
6565
6666 align 16
67 global crc64_jones_refl_by8:ISAL_SYM_TYPE_FUNCTION
67 mk_global crc64_jones_refl_by8, function
6868 crc64_jones_refl_by8:
69 endbranch
6970 ; uint64_t c = crc ^ 0xffffffff,ffffffffL;
7071 not arg1
7172 sub rsp, VARIABLE_OFFSET
5656 %if (AS_FEATURE_LEVEL) >= 10
5757 extern crc32_gzip_refl_by16_10
5858 extern crc32_ieee_by16_10
59 extern crc32_iscsi_by16_10
5960 extern crc16_t10dif_by16_10
6061 %endif
6162
7879 ;;;;
7980 ; crc32_iscsi multibinary function
8081 ;;;;
81 global crc32_iscsi:ISAL_SYM_TYPE_FUNCTION
82 mk_global crc32_iscsi, function
8283 crc32_iscsi_mbinit:
84 endbranch
8385 call crc32_iscsi_dispatch_init
8486 crc32_iscsi:
87 endbranch
8588 jmp qword [crc32_iscsi_dispatched]
8689
8790 crc32_iscsi_dispatch_init:
9093 push rcx
9194 push rdx
9295 push rsi
96 push rdi
9397 lea rsi, [crc32_iscsi_base WRT_OPT] ; Default
9498
9599 mov eax, 1
96100 cpuid
97 lea rbx, [crc32_iscsi_00 WRT_OPT]
98 lea rax, [crc32_iscsi_01 WRT_OPT]
99
100 test ecx, FLAG_CPUID1_ECX_SSE4_2
101 cmovne rsi, rbx
102 test ecx, FLAG_CPUID1_ECX_CLMUL
103 cmovne rsi, rax
101 mov ebx, ecx ; save cpuid1.ecx
102 test ecx, FLAG_CPUID1_ECX_SSE4_2
103 jz .crc_iscsi_init_done ; use iscsi_base
104 lea rsi, [crc32_iscsi_00 WRT_OPT]
105 test ecx, FLAG_CPUID1_ECX_CLMUL
106 jz .crc_iscsi_init_done ; use ieee_base
107 lea rsi, [crc32_iscsi_01 WRT_OPT]
108
109 ;; Test for XMM_YMM support/AVX
110 test ecx, FLAG_CPUID1_ECX_OSXSAVE
111 je .crc_iscsi_init_done
112 xor ecx, ecx
113 xgetbv ; xcr -> edx:eax
114 mov edi, eax ; save xgetvb.eax
115
116 and eax, FLAG_XGETBV_EAX_XMM_YMM
117 cmp eax, FLAG_XGETBV_EAX_XMM_YMM
118 jne .crc_iscsi_init_done
119 test ebx, FLAG_CPUID1_ECX_AVX
120 je .crc_iscsi_init_done
121 ;; AVX/02 opt if available
122
123 %if AS_FEATURE_LEVEL >= 10
124 ;; Test for AVX2
125 xor ecx, ecx
126 mov eax, 7
127 cpuid
128 test ebx, FLAG_CPUID7_EBX_AVX2
129 je .crc_iscsi_init_done ; No AVX2 possible
130
131 ;; Test for AVX512
132 and edi, FLAG_XGETBV_EAX_ZMM_OPM
133 cmp edi, FLAG_XGETBV_EAX_ZMM_OPM
134 jne .crc_iscsi_init_done ; No AVX512 possible
135 and ebx, FLAGS_CPUID7_EBX_AVX512_G1
136 cmp ebx, FLAGS_CPUID7_EBX_AVX512_G1
137 jne .crc_iscsi_init_done
138
139 and ecx, FLAGS_CPUID7_ECX_AVX512_G2
140 cmp ecx, FLAGS_CPUID7_ECX_AVX512_G2
141 lea rbx, [crc32_iscsi_by16_10 WRT_OPT] ; AVX512/10 opt
142 cmove rsi, rbx
143 %endif
144
145 .crc_iscsi_init_done:
104146 mov [crc32_iscsi_dispatched], rsi
147 pop rdi
105148 pop rsi
106149 pop rdx
107150 pop rcx
112155 ;;;;
113156 ; crc32_ieee multibinary function
114157 ;;;;
115 global crc32_ieee:ISAL_SYM_TYPE_FUNCTION
158 mk_global crc32_ieee, function
116159 crc32_ieee_mbinit:
160 endbranch
117161 call crc32_ieee_dispatch_init
118162 crc32_ieee:
163 endbranch
119164 jmp qword [crc32_ieee_dispatched]
120165
121166 crc32_ieee_dispatch_init:
191236 ;;;;
192237 ; crc16_t10dif multibinary function
193238 ;;;;
194 global crc16_t10dif:ISAL_SYM_TYPE_FUNCTION
239 mk_global crc16_t10dif, function
195240 crc16_t10dif_mbinit:
241 endbranch
196242 call crc16_t10dif_dispatch_init
197243 crc16_t10dif:
244 endbranch
198245 jmp qword [crc16_t10dif_dispatched]
199246
200247 crc16_t10dif_dispatch_init:
0 # ISA-L Build Details
1
2 For x86-64 builds it is highly recommended to get an up-to-date version of
3 [nasm] that can understand the latest instruction sets. Building with an older
4 version is usually possible but the library may lack some function versions for
5 the best performance.
6
7 ## Windows Build Environment Details
8
9 The windows dynamic and static libraries can be built with the nmake tool on the
10 windows command line when appropriate paths and tools are setup as follows.
11
12 ### Download nasm and put into path
13
14 Download and install [nasm] and add location to path.
15
16 set PATH=%PATH%;C:\Program Files\NASM
17
18 ### Setup compiler environment
19
20 Install compiler and run environment setup script.
21
22 Compilers for windows usually have a batch file to setup environment variables
23 for the command line called `vcvarsall.bat` or `compilervars.bat` or a link to
24 run these. For Visual Studio this may be as follows for Community edition.
25
26 C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvarsall.bat x64
27
28 For the Intel compiler the path is typically as follows where yyyy, x, zzz
29 represent the version.
30
31 C:\Program Files (x86)\IntelSWTools\system_studio_for_windows_yyyy.x.zzz\compilers_and_libraries_yyyy\bin\compilervars.bat intel64
32
33 ### Build ISA-L libs and copy to appropriate place
34
35 Run `nmake /f Makefile.nmake`
36
37 This should build isa-l.dll, isa-l.lib and isa-l_static.lib. You may want to
38 copy the libs to a system directory in the dynamic linking path such as
39 `C:\windows\system32` or to a project directory.
40
41 To build a simple program with a static library.
42
43 cl /Fe: test.exe test.c isa-l_static.lib
44
45 [nasm]: https://www.nasm.us
0 # ISA-L Testing
1
2 Tests are divided into check tests, unit tests and fuzz tests. Check tests,
3 built with `make check`, should have no additional dependencies. Other unit
4 tests built with `make test` may have additional dependencies in order to make
5 comparisons of the output of ISA-L to other standard libraries and ensure
6 compatibility. Fuzz tests are meant to be run with a fuzzing tool such as [AFL]
7 or [llvm libFuzzer] fuzzing to direct the input data based on coverage. There
8 are a number of scripts in the /tools directory to help with automating the
9 running of tests.
10
11 ## Test check
12
13 `./tools/test_autorun.sh` is a helper script for kicking off check tests, that
14 typically run for a few minutes, or extended tests that could run much
15 longer. The command `test_autorun.sh check` build and runs all check tests with
16 autotools and runs other short tests to ensure check tests, unit tests,
17 examples, install, exe stack, format are correct. Each run of `test_autorun.sh`
18 builds tests with a new random test seed that ensures that each run is unique to
19 the seed but deterministic for debugging. Tests are also built with sanitizers
20 and Electric Fence if available.
21
22 ## Extended tests
23
24 Extended tests are initiated with the command `./tools/test_autorun.sh
25 ext`. These build and run check tests, unit tests, and other utilities that can
26 take much longer than check tests alone. This includes special compression tools
27 and some cross targets such as the no-arch build of base functions only and
28 mingw build if tools are available.
29
30 ## Fuzz testing
31
32 `./tools/test_fuzz.sh` is a helper script for fuzzing to setup, build and run
33 the ISA-L inflate fuzz tests on multiple fuzz tools. Fuzzing with
34 [llvm libFuzzer] requires clang compiler tools with `-fsanitize=fuzzer` or
35 `libFuzzer` installed. You can invoke the default fuzz tests under llvm with
36
37 ./tools/test_fuzz.sh -e checked
38
39 To use [AFL], install tools and system setup for `afl-fuzz` and run
40
41 ./tools/test_fuzz.sh -e checked --afl 1 --llvm -1 -d 1
42
43 This uses internal vectors as a seed. You can also specify a sample file to use
44 as a seed instead with `-f <file>`. One of three fuzz tests can be invoked:
45 checked, simple, and round_trip.
46
47 [llvm libFuzzer]: https://llvm.org/docs/LibFuzzer.html
48 [AFL]: https://github.com/google/AFL
148148
149149 other_tests += erasure_code/gen_rs_matrix_limits
150150
151 other_tests_x86_64 += \
152 erasure_code/gf_2vect_dot_prod_sse_test \
153 erasure_code/gf_3vect_dot_prod_sse_test \
154 erasure_code/gf_4vect_dot_prod_sse_test \
155 erasure_code/gf_5vect_dot_prod_sse_test \
156 erasure_code/gf_6vect_dot_prod_sse_test
157
158 other_tests_x86_32 += \
159 erasure_code/gf_2vect_dot_prod_sse_test \
160 erasure_code/gf_3vect_dot_prod_sse_test \
161 erasure_code/gf_4vect_dot_prod_sse_test \
162 erasure_code/gf_5vect_dot_prod_sse_test \
163 erasure_code/gf_6vect_dot_prod_sse_test
164
165151 other_src += include/test.h \
166152 include/types.h
5151 %define PS 8
5252 %define LOG_PS 3
5353
54 %define func(x) x:
54 %define func(x) x: endbranch
5555 %macro FUNC_SAVE 0
5656 push r12
5757 %endmacro
8383 %define func(x) proc_frame x
8484 %macro FUNC_SAVE 0
8585 alloc_stack stack_size
86 save_xmm128 xmm6, 0*16
87 save_xmm128 xmm7, 1*16
88 save_xmm128 xmm8, 2*16
86 vmovdqa [rsp + 0*16], xmm6
87 vmovdqa [rsp + 1*16], xmm7
88 vmovdqa [rsp + 2*16], xmm8
8989 save_reg r12, 3*16 + 0*8
9090 save_reg r13, 3*16 + 1*8
9191 save_reg r14, 3*16 + 2*8
126126
127127 %define PS 4
128128 %define LOG_PS 2
129 %define func(x) x:
129 %define func(x) x: endbranch
130130 %define arg(x) [ebp + PS*2 + PS*x]
131131 %define var(x) [ebp - PS - PS*x]
132132
237237 %endif
238238
239239 align 16
240 global gf_2vect_dot_prod_avx:ISAL_SYM_TYPE_FUNCTION
240 mk_global gf_2vect_dot_prod_avx, function
241241
242242 func(gf_2vect_dot_prod_avx)
243243 FUNC_SAVE
5353 %define PS 8
5454 %define LOG_PS 3
5555
56 %define func(x) x:
56 %define func(x) x: endbranch
5757 %macro FUNC_SAVE 0
5858 push r12
5959 %endmacro
130130
131131 %define PS 4
132132 %define LOG_PS 2
133 %define func(x) x:
133 %define func(x) x: endbranch
134134 %define arg(x) [ebp + PS*2 + PS*x]
135135 %define var(x) [ebp - PS - PS*x]
136136
247247 %endif
248248
249249 align 16
250 global gf_2vect_dot_prod_avx2:ISAL_SYM_TYPE_FUNCTION
250 mk_global gf_2vect_dot_prod_avx2, function
251251
252252 func(gf_2vect_dot_prod_avx2)
253253 FUNC_SAVE
4949 %define PS 8
5050 %define LOG_PS 3
5151
52 %define func(x) x:
52 %define func(x) x: endbranch
5353 %macro FUNC_SAVE 0
5454 push r12
5555 %endmacro
159159 section .text
160160
161161 align 16
162 global gf_2vect_dot_prod_avx512:ISAL_SYM_TYPE_FUNCTION
162 mk_global gf_2vect_dot_prod_avx512, function
163163 func(gf_2vect_dot_prod_avx512)
164164 FUNC_SAVE
165165 sub len, 64
5151 %define PS 8
5252 %define LOG_PS 3
5353
54 %define func(x) x:
54 %define func(x) x: endbranch
5555 %macro FUNC_SAVE 0
5656 push r12
5757 %endmacro
126126
127127 %define PS 4
128128 %define LOG_PS 2
129 %define func(x) x:
129 %define func(x) x: endbranch
130130 %define arg(x) [ebp + PS*2 + PS*x]
131131 %define var(x) [ebp - PS - PS*x]
132132
237237 %endif
238238
239239 align 16
240 global gf_2vect_dot_prod_sse:ISAL_SYM_TYPE_FUNCTION
240 mk_global gf_2vect_dot_prod_sse, function
241241
242242 func(gf_2vect_dot_prod_sse)
243243 FUNC_SAVE
+0
-480
erasure_code/gf_2vect_dot_prod_sse_test.c less more
0 /**********************************************************************
1 Copyright(c) 2011-2015 Intel Corporation All rights reserved.
2
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
5 are met:
6 * Redistributions of source code must retain the above copyright
7 notice, this list of conditions and the following disclaimer.
8 * Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in
10 the documentation and/or other materials provided with the
11 distribution.
12 * Neither the name of Intel Corporation nor the names of its
13 contributors may be used to endorse or promote products derived
14 from this software without specific prior written permission.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 **********************************************************************/
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h> // for memset, memcmp
32 #include "erasure_code.h"
33 #include "types.h"
34
35 #ifndef FUNCTION_UNDER_TEST
36 # define FUNCTION_UNDER_TEST gf_2vect_dot_prod_sse
37 #endif
38 #ifndef TEST_MIN_SIZE
39 # define TEST_MIN_SIZE 16
40 #endif
41
42 #define str(s) #s
43 #define xstr(s) str(s)
44
45 #define TEST_LEN 8192
46 #define TEST_SIZE (TEST_LEN/2)
47 #define TEST_MEM TEST_SIZE
48 #define TEST_LOOPS 10000
49 #define TEST_TYPE_STR ""
50
51 #ifndef TEST_SOURCES
52 # define TEST_SOURCES 16
53 #endif
54 #ifndef RANDOMS
55 # define RANDOMS 20
56 #endif
57
58 #ifdef EC_ALIGNED_ADDR
59 // Define power of 2 range to check ptr, len alignment
60 # define PTR_ALIGN_CHK_B 0
61 # define LEN_ALIGN_CHK_B 0 // 0 for aligned only
62 #else
63 // Define power of 2 range to check ptr, len alignment
64 # define PTR_ALIGN_CHK_B 32
65 # define LEN_ALIGN_CHK_B 32 // 0 for aligned only
66 #endif
67
68 typedef unsigned char u8;
69
70 extern void FUNCTION_UNDER_TEST(int len, int vlen, unsigned char *gftbls,
71 unsigned char **src, unsigned char **dest);
72
73 void dump(unsigned char *buf, int len)
74 {
75 int i;
76 for (i = 0; i < len;) {
77 printf(" %2x", 0xff & buf[i++]);
78 if (i % 32 == 0)
79 printf("\n");
80 }
81 printf("\n");
82 }
83
84 void dump_matrix(unsigned char **s, int k, int m)
85 {
86 int i, j;
87 for (i = 0; i < k; i++) {
88 for (j = 0; j < m; j++) {
89 printf(" %2x", s[i][j]);
90 }
91 printf("\n");
92 }
93 printf("\n");
94 }
95
96 void dump_u8xu8(unsigned char *s, int k, int m)
97 {
98 int i, j;
99 for (i = 0; i < k; i++) {
100 for (j = 0; j < m; j++) {
101 printf(" %2x", 0xff & s[j + (i * m)]);
102 }
103 printf("\n");
104 }
105 printf("\n");
106 }
107
108 int main(int argc, char *argv[])
109 {
110 int i, j, rtest, srcs;
111 void *buf;
112 u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g_tbls[2 * TEST_SOURCES * 32];
113 u8 *dest1, *dest2, *dest_ref1, *dest_ref2, *dest_ptrs[2];
114 u8 *buffs[TEST_SOURCES];
115
116 int align, size;
117 unsigned char *efence_buffs[TEST_SOURCES];
118 unsigned int offset;
119 u8 *ubuffs[TEST_SOURCES];
120 u8 *udest_ptrs[2];
121
122 printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN);
123
124 // Allocate the arrays
125 for (i = 0; i < TEST_SOURCES; i++) {
126 if (posix_memalign(&buf, 64, TEST_LEN)) {
127 printf("alloc error: Fail");
128 return -1;
129 }
130 buffs[i] = buf;
131 }
132
133 if (posix_memalign(&buf, 64, TEST_LEN)) {
134 printf("alloc error: Fail");
135 return -1;
136 }
137 dest1 = buf;
138
139 if (posix_memalign(&buf, 64, TEST_LEN)) {
140 printf("alloc error: Fail");
141 return -1;
142 }
143 dest2 = buf;
144
145 if (posix_memalign(&buf, 64, TEST_LEN)) {
146 printf("alloc error: Fail");
147 return -1;
148 }
149 dest_ref1 = buf;
150
151 if (posix_memalign(&buf, 64, TEST_LEN)) {
152 printf("alloc error: Fail");
153 return -1;
154 }
155 dest_ref2 = buf;
156
157 dest_ptrs[0] = dest1;
158 dest_ptrs[1] = dest2;
159
160 // Test of all zeros
161 for (i = 0; i < TEST_SOURCES; i++)
162 memset(buffs[i], 0, TEST_LEN);
163
164 memset(dest1, 0, TEST_LEN);
165 memset(dest2, 0, TEST_LEN);
166 memset(dest_ref1, 0, TEST_LEN);
167 memset(dest_ref2, 0, TEST_LEN);
168 memset(g1, 2, TEST_SOURCES);
169 memset(g2, 1, TEST_SOURCES);
170
171 for (i = 0; i < TEST_SOURCES; i++) {
172 gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
173 gf_vect_mul_init(g2[i], &g_tbls[32 * TEST_SOURCES + i * 32]);
174 }
175
176 gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
177 gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs,
178 dest_ref2);
179
180 FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
181
182 if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
183 printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test1\n");
184 dump_matrix(buffs, 5, TEST_SOURCES);
185 printf("dprod_base:");
186 dump(dest_ref1, 25);
187 printf("dprod_dut:");
188 dump(dest1, 25);
189 return -1;
190 }
191 if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
192 printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test2\n");
193 dump_matrix(buffs, 5, TEST_SOURCES);
194 printf("dprod_base:");
195 dump(dest_ref2, 25);
196 printf("dprod_dut:");
197 dump(dest2, 25);
198 return -1;
199 }
200
201 putchar('.');
202
203 // Rand data test
204
205 for (rtest = 0; rtest < RANDOMS; rtest++) {
206 for (i = 0; i < TEST_SOURCES; i++)
207 for (j = 0; j < TEST_LEN; j++)
208 buffs[i][j] = rand();
209
210 for (i = 0; i < TEST_SOURCES; i++) {
211 g1[i] = rand();
212 g2[i] = rand();
213 }
214
215 for (i = 0; i < TEST_SOURCES; i++) {
216 gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
217 gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]);
218 }
219
220 gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
221 gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES],
222 buffs, dest_ref2);
223
224 FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
225
226 if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
227 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest);
228 dump_matrix(buffs, 5, TEST_SOURCES);
229 printf("dprod_base:");
230 dump(dest_ref1, 25);
231 printf("dprod_dut:");
232 dump(dest1, 25);
233 return -1;
234 }
235 if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
236 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest);
237 dump_matrix(buffs, 5, TEST_SOURCES);
238 printf("dprod_base:");
239 dump(dest_ref2, 25);
240 printf("dprod_dut:");
241 dump(dest2, 25);
242 return -1;
243 }
244
245 putchar('.');
246 }
247
248 // Rand data test with varied parameters
249 for (rtest = 0; rtest < RANDOMS; rtest++) {
250 for (srcs = TEST_SOURCES; srcs > 0; srcs--) {
251 for (i = 0; i < srcs; i++)
252 for (j = 0; j < TEST_LEN; j++)
253 buffs[i][j] = rand();
254
255 for (i = 0; i < srcs; i++) {
256 g1[i] = rand();
257 g2[i] = rand();
258 }
259
260 for (i = 0; i < srcs; i++) {
261 gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
262 gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]);
263 }
264
265 gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref1);
266 gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[32 * srcs], buffs,
267 dest_ref2);
268
269 FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest_ptrs);
270
271 if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
272 printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
273 " test1 srcs=%d\n", srcs);
274 dump_matrix(buffs, 5, TEST_SOURCES);
275 printf("dprod_base:");
276 dump(dest_ref1, 25);
277 printf("dprod_dut:");
278 dump(dest1, 25);
279 return -1;
280 }
281 if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
282 printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
283 " test2 srcs=%d\n", srcs);
284 dump_matrix(buffs, 5, TEST_SOURCES);
285 printf("dprod_base:");
286 dump(dest_ref2, 25);
287 printf("dprod_dut:");
288 dump(dest2, 25);
289 return -1;
290 }
291
292 putchar('.');
293 }
294 }
295
296 // Run tests at end of buffer for Electric Fence
297 align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
298 for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) {
299 for (i = 0; i < TEST_SOURCES; i++)
300 for (j = 0; j < TEST_LEN; j++)
301 buffs[i][j] = rand();
302
303 for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end
304 efence_buffs[i] = buffs[i] + TEST_LEN - size;
305
306 for (i = 0; i < TEST_SOURCES; i++) {
307 g1[i] = rand();
308 g2[i] = rand();
309 }
310
311 for (i = 0; i < TEST_SOURCES; i++) {
312 gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
313 gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]);
314 }
315
316 gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref1);
317 gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES],
318 efence_buffs, dest_ref2);
319
320 FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest_ptrs);
321
322 if (0 != memcmp(dest_ref1, dest1, size)) {
323 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest);
324 dump_matrix(efence_buffs, 5, TEST_SOURCES);
325 printf("dprod_base:");
326 dump(dest_ref1, align);
327 printf("dprod_dut:");
328 dump(dest1, align);
329 return -1;
330 }
331
332 if (0 != memcmp(dest_ref2, dest2, size)) {
333 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest);
334 dump_matrix(efence_buffs, 5, TEST_SOURCES);
335 printf("dprod_base:");
336 dump(dest_ref2, align);
337 printf("dprod_dut:");
338 dump(dest2, align);
339 return -1;
340 }
341
342 putchar('.');
343 }
344
345 // Test rand ptr alignment if available
346
347 for (rtest = 0; rtest < RANDOMS; rtest++) {
348 size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1);
349 srcs = rand() % TEST_SOURCES;
350 if (srcs == 0)
351 continue;
352
353 offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B;
354 // Add random offsets
355 for (i = 0; i < srcs; i++)
356 ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
357
358 udest_ptrs[0] = dest1 + (rand() & (PTR_ALIGN_CHK_B - offset));
359 udest_ptrs[1] = dest2 + (rand() & (PTR_ALIGN_CHK_B - offset));
360
361 memset(dest1, 0, TEST_LEN); // zero pad to check write-over
362 memset(dest2, 0, TEST_LEN);
363
364 for (i = 0; i < srcs; i++)
365 for (j = 0; j < size; j++)
366 ubuffs[i][j] = rand();
367
368 for (i = 0; i < srcs; i++) {
369 g1[i] = rand();
370 g2[i] = rand();
371 }
372
373 for (i = 0; i < srcs; i++) {
374 gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
375 gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]);
376 }
377
378 gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref1);
379 gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], ubuffs, dest_ref2);
380
381 FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptrs);
382
383 if (memcmp(dest_ref1, udest_ptrs[0], size)) {
384 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
385 srcs);
386 dump_matrix(ubuffs, 5, TEST_SOURCES);
387 printf("dprod_base:");
388 dump(dest_ref1, 25);
389 printf("dprod_dut:");
390 dump(udest_ptrs[0], 25);
391 return -1;
392 }
393 if (memcmp(dest_ref2, udest_ptrs[1], size)) {
394 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
395 srcs);
396 dump_matrix(ubuffs, 5, TEST_SOURCES);
397 printf("dprod_base:");
398 dump(dest_ref2, 25);
399 printf("dprod_dut:");
400 dump(udest_ptrs[1], 25);
401 return -1;
402 }
403 // Confirm that padding around dests is unchanged
404 memset(dest_ref1, 0, PTR_ALIGN_CHK_B); // Make reference zero buff
405 offset = udest_ptrs[0] - dest1;
406
407 if (memcmp(dest1, dest_ref1, offset)) {
408 printf("Fail rand ualign pad1 start\n");
409 return -1;
410 }
411 if (memcmp(dest1 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
412 printf("Fail rand ualign pad1 end\n");
413 return -1;
414 }
415
416 offset = udest_ptrs[1] - dest2;
417 if (memcmp(dest2, dest_ref1, offset)) {
418 printf("Fail rand ualign pad2 start\n");
419 return -1;
420 }
421 if (memcmp(dest2 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
422 printf("Fail rand ualign pad2 end\n");
423 return -1;
424 }
425
426 putchar('.');
427 }
428
429 // Test all size alignment
430 align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
431
432 for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) {
433 srcs = TEST_SOURCES;
434
435 for (i = 0; i < srcs; i++)
436 for (j = 0; j < size; j++)
437 buffs[i][j] = rand();
438
439 for (i = 0; i < srcs; i++) {
440 g1[i] = rand();
441 g2[i] = rand();
442 }
443
444 for (i = 0; i < srcs; i++) {
445 gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
446 gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]);
447 }
448
449 gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref1);
450 gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], buffs, dest_ref2);
451
452 FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest_ptrs);
453
454 if (memcmp(dest_ref1, dest_ptrs[0], size)) {
455 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
456 size);
457 dump_matrix(buffs, 5, TEST_SOURCES);
458 printf("dprod_base:");
459 dump(dest_ref1, 25);
460 printf("dprod_dut:");
461 dump(dest_ptrs[0], 25);
462 return -1;
463 }
464 if (memcmp(dest_ref2, dest_ptrs[1], size)) {
465 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
466 size);
467 dump_matrix(buffs, 5, TEST_SOURCES);
468 printf("dprod_base:");
469 dump(dest_ref2, 25);
470 printf("dprod_dut:");
471 dump(dest_ptrs[1], 25);
472 return -1;
473 }
474 }
475
476 printf("Pass\n");
477 return 0;
478
479 }
9696 %define return rax
9797 %define return.w eax
9898
99 %define func(x) x:
99 %define func(x) x: endbranch
100100 %define FUNC_SAVE
101101 %define FUNC_RESTORE
102102 %endif
154154
155155
156156 align 16
157 global gf_2vect_mad_avx:ISAL_SYM_TYPE_FUNCTION
157 mk_global gf_2vect_mad_avx, function
158158
159159 func(gf_2vect_mad_avx)
160160 FUNC_SAVE
103103 %define return rax
104104 %define return.w eax
105105
106 %define func(x) x:
106 %define func(x) x: endbranch
107107 %define FUNC_SAVE
108108 %define FUNC_RESTORE
109109 %endif
162162 %define xtmpd2 ymm9
163163
164164 align 16
165 global gf_2vect_mad_avx2:ISAL_SYM_TYPE_FUNCTION
165 mk_global gf_2vect_mad_avx2, function
166166
167167 func(gf_2vect_mad_avx2)
168168 FUNC_SAVE
4444 %define tmp r11
4545 %define tmp2 r10
4646 %define return rax
47 %define func(x) x:
47 %define func(x) x: endbranch
4848 %define FUNC_SAVE
4949 %define FUNC_RESTORE
5050 %endif
148148 %define xmask0f zmm14
149149
150150 align 16
151 global gf_2vect_mad_avx512:ISAL_SYM_TYPE_FUNCTION
151 mk_global gf_2vect_mad_avx512, function
152152 func(gf_2vect_mad_avx512)
153153 FUNC_SAVE
154154 sub len, 64
9696 %define return rax
9797 %define return.w eax
9898
99 %define func(x) x:
99 %define func(x) x: endbranch
100100 %define FUNC_SAVE
101101 %define FUNC_RESTORE
102102 %endif
153153
154154
155155 align 16
156 global gf_2vect_mad_sse:ISAL_SYM_TYPE_FUNCTION
156 mk_global gf_2vect_mad_sse, function
157157 func(gf_2vect_mad_sse)
158158 FUNC_SAVE
159159 sub len, 16
5151 %define PS 8
5252 %define LOG_PS 3
5353
54 %define func(x) x:
54 %define func(x) x: endbranch
5555 %macro FUNC_SAVE 0
5656 push r12
5757 push r13
8686 %define func(x) proc_frame x
8787 %macro FUNC_SAVE 0
8888 alloc_stack stack_size
89 save_xmm128 xmm6, 0*16
90 save_xmm128 xmm7, 1*16
91 save_xmm128 xmm8, 2*16
92 save_xmm128 xmm9, 3*16
93 save_xmm128 xmm10, 4*16
94 save_xmm128 xmm11, 5*16
89 vmovdqa [rsp + 0*16], xmm6
90 vmovdqa [rsp + 1*16], xmm7
91 vmovdqa [rsp + 2*16], xmm8
92 vmovdqa [rsp + 3*16], xmm9
93 vmovdqa [rsp + 4*16], xmm10
94 vmovdqa [rsp + 5*16], xmm11
9595 save_reg r12, 6*16 + 0*8
9696 save_reg r13, 6*16 + 1*8
9797 save_reg r14, 6*16 + 2*8
138138
139139 %define PS 4
140140 %define LOG_PS 2
141 %define func(x) x:
141 %define func(x) x: endbranch
142142 %define arg(x) [ebp + PS*2 + PS*x]
143143 %define var(x) [ebp - PS - PS*x]
144144
260260 %endif
261261
262262 align 16
263 global gf_3vect_dot_prod_avx:ISAL_SYM_TYPE_FUNCTION
263 mk_global gf_3vect_dot_prod_avx, function
264264 func(gf_3vect_dot_prod_avx)
265265 FUNC_SAVE
266266 SLDR len, len_m
5353 %define PS 8
5454 %define LOG_PS 3
5555
56 %define func(x) x:
56 %define func(x) x: endbranch
5757 %macro FUNC_SAVE 0
5858 push r12
5959 push r13
142142
143143 %define PS 4
144144 %define LOG_PS 2
145 %define func(x) x:
145 %define func(x) x: endbranch
146146 %define arg(x) [ebp + PS*2 + PS*x]
147147 %define var(x) [ebp - PS - PS*x]
148148
268268 %endif
269269
270270 align 16
271 global gf_3vect_dot_prod_avx2:ISAL_SYM_TYPE_FUNCTION
271 mk_global gf_3vect_dot_prod_avx2, function
272272 func(gf_3vect_dot_prod_avx2)
273273 FUNC_SAVE
274274 SLDR len, len_m
5252 %define PS 8
5353 %define LOG_PS 3
5454
55 %define func(x) x:
55 %define func(x) x: endbranch
5656 %macro FUNC_SAVE 0
5757 push r12
5858 push r13
172172 section .text
173173
174174 align 16
175 global gf_3vect_dot_prod_avx512:ISAL_SYM_TYPE_FUNCTION
175 mk_global gf_3vect_dot_prod_avx512, function
176176 func(gf_3vect_dot_prod_avx512)
177177 FUNC_SAVE
178178 sub len, 64
5151 %define PS 8
5252 %define LOG_PS 3
5353
54 %define func(x) x:
54 %define func(x) x: endbranch
5555 %macro FUNC_SAVE 0
5656 push r12
5757 push r13
138138
139139 %define PS 4
140140 %define LOG_PS 2
141 %define func(x) x:
141 %define func(x) x: endbranch
142142 %define arg(x) [ebp + PS*2 + PS*x]
143143 %define var(x) [ebp - PS - PS*x]
144144
260260 %endif
261261
262262 align 16
263 global gf_3vect_dot_prod_sse:ISAL_SYM_TYPE_FUNCTION
263 mk_global gf_3vect_dot_prod_sse, function
264264 func(gf_3vect_dot_prod_sse)
265265 FUNC_SAVE
266266 SLDR len, len_m
+0
-586
erasure_code/gf_3vect_dot_prod_sse_test.c less more
0 /**********************************************************************
1 Copyright(c) 2011-2015 Intel Corporation All rights reserved.
2
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
5 are met:
6 * Redistributions of source code must retain the above copyright
7 notice, this list of conditions and the following disclaimer.
8 * Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in
10 the documentation and/or other materials provided with the
11 distribution.
12 * Neither the name of Intel Corporation nor the names of its
13 contributors may be used to endorse or promote products derived
14 from this software without specific prior written permission.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 **********************************************************************/
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h> // for memset, memcmp
32 #include "erasure_code.h"
33 #include "types.h"
34
35 #ifndef FUNCTION_UNDER_TEST
36 # define FUNCTION_UNDER_TEST gf_3vect_dot_prod_sse
37 #endif
38 #ifndef TEST_MIN_SIZE
39 # define TEST_MIN_SIZE 16
40 #endif
41
42 #define str(s) #s
43 #define xstr(s) str(s)
44
45 #define TEST_LEN 8192
46 #define TEST_SIZE (TEST_LEN/2)
47 #define TEST_MEM TEST_SIZE
48 #define TEST_LOOPS 10000
49 #define TEST_TYPE_STR ""
50
51 #ifndef TEST_SOURCES
52 # define TEST_SOURCES 16
53 #endif
54 #ifndef RANDOMS
55 # define RANDOMS 20
56 #endif
57
58 #ifdef EC_ALIGNED_ADDR
59 // Define power of 2 range to check ptr, len alignment
60 # define PTR_ALIGN_CHK_B 0
61 # define LEN_ALIGN_CHK_B 0 // 0 for aligned only
62 #else
63 // Define power of 2 range to check ptr, len alignment
64 # define PTR_ALIGN_CHK_B 32
65 # define LEN_ALIGN_CHK_B 32 // 0 for aligned only
66 #endif
67
68 typedef unsigned char u8;
69
70 extern void FUNCTION_UNDER_TEST(int len, int vlen, unsigned char *gftbls,
71 unsigned char **src, unsigned char **dest);
72
73 void dump(unsigned char *buf, int len)
74 {
75 int i;
76 for (i = 0; i < len;) {
77 printf(" %2x", 0xff & buf[i++]);
78 if (i % 32 == 0)
79 printf("\n");
80 }
81 printf("\n");
82 }
83
84 void dump_matrix(unsigned char **s, int k, int m)
85 {
86 int i, j;
87 for (i = 0; i < k; i++) {
88 for (j = 0; j < m; j++) {
89 printf(" %2x", s[i][j]);
90 }
91 printf("\n");
92 }
93 printf("\n");
94 }
95
96 void dump_u8xu8(unsigned char *s, int k, int m)
97 {
98 int i, j;
99 for (i = 0; i < k; i++) {
100 for (j = 0; j < m; j++) {
101 printf(" %2x", 0xff & s[j + (i * m)]);
102 }
103 printf("\n");
104 }
105 printf("\n");
106 }
107
108 int main(int argc, char *argv[])
109 {
110 int i, j, rtest, srcs;
111 void *buf;
112 u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g3[TEST_SOURCES];
113 u8 g_tbls[3 * TEST_SOURCES * 32], *dest_ptrs[3], *buffs[TEST_SOURCES];
114 u8 *dest1, *dest2, *dest3, *dest_ref1, *dest_ref2, *dest_ref3;
115
116 int align, size;
117 unsigned char *efence_buffs[TEST_SOURCES];
118 unsigned int offset;
119 u8 *ubuffs[TEST_SOURCES];
120 u8 *udest_ptrs[3];
121 printf(xstr(FUNCTION_UNDER_TEST) "_test: %dx%d ", TEST_SOURCES, TEST_LEN);
122
123 // Allocate the arrays
124 for (i = 0; i < TEST_SOURCES; i++) {
125 if (posix_memalign(&buf, 64, TEST_LEN)) {
126 printf("alloc error: Fail");
127 return -1;
128 }
129 buffs[i] = buf;
130 }
131
132 if (posix_memalign(&buf, 64, TEST_LEN)) {
133 printf("alloc error: Fail");
134 return -1;
135 }
136 dest1 = buf;
137
138 if (posix_memalign(&buf, 64, TEST_LEN)) {
139 printf("alloc error: Fail");
140 return -1;
141 }
142 dest2 = buf;
143
144 if (posix_memalign(&buf, 64, TEST_LEN)) {
145 printf("alloc error: Fail");
146 return -1;
147 }
148 dest3 = buf;
149
150 if (posix_memalign(&buf, 64, TEST_LEN)) {
151 printf("alloc error: Fail");
152 return -1;
153 }
154 dest_ref1 = buf;
155
156 if (posix_memalign(&buf, 64, TEST_LEN)) {
157 printf("alloc error: Fail");;
158 return -1;
159 }
160 dest_ref2 = buf;
161
162 if (posix_memalign(&buf, 64, TEST_LEN)) {
163 printf("alloc error: Fail");
164 return -1;
165 }
166 dest_ref3 = buf;
167
168 dest_ptrs[0] = dest1;
169 dest_ptrs[1] = dest2;
170 dest_ptrs[2] = dest3;
171
172 // Test of all zeros
173 for (i = 0; i < TEST_SOURCES; i++)
174 memset(buffs[i], 0, TEST_LEN);
175
176 memset(dest1, 0, TEST_LEN);
177 memset(dest2, 0, TEST_LEN);
178 memset(dest3, 0, TEST_LEN);
179 memset(dest_ref1, 0, TEST_LEN);
180 memset(dest_ref2, 0, TEST_LEN);
181 memset(dest_ref3, 0, TEST_LEN);
182 memset(g1, 2, TEST_SOURCES);
183 memset(g2, 1, TEST_SOURCES);
184 memset(g3, 7, TEST_SOURCES);
185
186 for (i = 0; i < TEST_SOURCES; i++) {
187 gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
188 gf_vect_mul_init(g2[i], &g_tbls[32 * TEST_SOURCES + i * 32]);
189 gf_vect_mul_init(g3[i], &g_tbls[64 * TEST_SOURCES + i * 32]);
190 }
191
192 gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
193 gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs,
194 dest_ref2);
195 gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], buffs,
196 dest_ref3);
197
198 FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
199
200 if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
201 printf("Fail zero" xstr(FUNCTION_UNDER_TEST) " test1\n");
202 dump_matrix(buffs, 5, TEST_SOURCES);
203 printf("dprod_base:");
204 dump(dest_ref1, 25);
205 printf("dprod_dut:");
206 dump(dest1, 25);
207 return -1;
208 }
209 if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
210 printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test2\n");
211 dump_matrix(buffs, 5, TEST_SOURCES);
212 printf("dprod_base:");
213 dump(dest_ref2, 25);
214 printf("dprod_dut:");
215 dump(dest2, 25);
216 return -1;
217 }
218 if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) {
219 printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test3\n");
220 dump_matrix(buffs, 5, TEST_SOURCES);
221 printf("dprod_base:");
222 dump(dest_ref3, 25);
223 printf("dprod_dut:");
224 dump(dest3, 25);
225 return -1;
226 }
227
228 putchar('.');
229
230 // Rand data test
231
232 for (rtest = 0; rtest < RANDOMS; rtest++) {
233 for (i = 0; i < TEST_SOURCES; i++)
234 for (j = 0; j < TEST_LEN; j++)
235 buffs[i][j] = rand();
236
237 for (i = 0; i < TEST_SOURCES; i++) {
238 g1[i] = rand();
239 g2[i] = rand();
240 g3[i] = rand();
241 }
242
243 for (i = 0; i < TEST_SOURCES; i++) {
244 gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
245 gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]);
246 gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]);
247 }
248
249 gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
250 gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES],
251 buffs, dest_ref2);
252 gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES],
253 buffs, dest_ref3);
254
255 FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
256
257 if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
258 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest);
259 dump_matrix(buffs, 5, TEST_SOURCES);
260 printf("dprod_base:");
261 dump(dest_ref1, 25);
262 printf("dprod_dut:");
263 dump(dest1, 25);
264 return -1;
265 }
266 if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
267 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest);
268 dump_matrix(buffs, 5, TEST_SOURCES);
269 printf("dprod_base:");
270 dump(dest_ref2, 25);
271 printf("dprod_dut:");
272 dump(dest2, 25);
273 return -1;
274 }
275 if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) {
276 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest);
277 dump_matrix(buffs, 5, TEST_SOURCES);
278 printf("dprod_base:");
279 dump(dest_ref3, 25);
280 printf("dprod_dut:");
281 dump(dest3, 25);
282 return -1;
283 }
284
285 putchar('.');
286 }
287
288 // Rand data test with varied parameters
289 for (rtest = 0; rtest < RANDOMS; rtest++) {
290 for (srcs = TEST_SOURCES; srcs > 0; srcs--) {
291 for (i = 0; i < srcs; i++)
292 for (j = 0; j < TEST_LEN; j++)
293 buffs[i][j] = rand();
294
295 for (i = 0; i < srcs; i++) {
296 g1[i] = rand();
297 g2[i] = rand();
298 g3[i] = rand();
299 }
300
301 for (i = 0; i < srcs; i++) {
302 gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
303 gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]);
304 gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]);
305 }
306
307 gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref1);
308 gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[32 * srcs], buffs,
309 dest_ref2);
310 gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[64 * srcs], buffs,
311 dest_ref3);
312
313 FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest_ptrs);
314
315 if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
316 printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
317 " test1 srcs=%d\n", srcs);
318 dump_matrix(buffs, 5, TEST_SOURCES);
319 printf("dprod_base:");
320 dump(dest_ref1, 25);
321 printf("dprod_dut:");
322 dump(dest1, 25);
323 return -1;
324 }
325 if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
326 printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
327 " test2 srcs=%d\n", srcs);
328 dump_matrix(buffs, 5, TEST_SOURCES);
329 printf("dprod_base:");
330 dump(dest_ref2, 25);
331 printf("dprod_dut:");
332 dump(dest2, 25);
333 return -1;
334 }
335 if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) {
336 printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
337 " test3 srcs=%d\n", srcs);
338 dump_matrix(buffs, 5, TEST_SOURCES);
339 printf("dprod_base:");
340 dump(dest_ref3, 25);
341 printf("dprod_dut:");
342 dump(dest3, 25);
343 return -1;
344 }
345
346 putchar('.');
347 }
348 }
349
350 // Run tests at end of buffer for Electric Fence
351 align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
352 for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) {
353 for (i = 0; i < TEST_SOURCES; i++)
354 for (j = 0; j < TEST_LEN; j++)
355 buffs[i][j] = rand();
356
357 for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end
358 efence_buffs[i] = buffs[i] + TEST_LEN - size;
359
360 for (i = 0; i < TEST_SOURCES; i++) {
361 g1[i] = rand();
362 g2[i] = rand();
363 g3[i] = rand();
364 }
365
366 for (i = 0; i < TEST_SOURCES; i++) {
367 gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
368 gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]);
369 gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]);
370 }
371
372 gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref1);
373 gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES],
374 efence_buffs, dest_ref2);
375 gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES],
376 efence_buffs, dest_ref3);
377
378 FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest_ptrs);
379
380 if (0 != memcmp(dest_ref1, dest1, size)) {
381 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest);
382 dump_matrix(efence_buffs, 5, TEST_SOURCES);
383 printf("dprod_base:");
384 dump(dest_ref1, align);
385 printf("dprod_dut:");
386 dump(dest1, align);
387 return -1;
388 }
389
390 if (0 != memcmp(dest_ref2, dest2, size)) {
391 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest);
392 dump_matrix(efence_buffs, 5, TEST_SOURCES);
393 printf("dprod_base:");
394 dump(dest_ref2, align);
395 printf("dprod_dut:");
396 dump(dest2, align);
397 return -1;
398 }
399
400 if (0 != memcmp(dest_ref3, dest3, size)) {
401 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest);
402 dump_matrix(efence_buffs, 5, TEST_SOURCES);
403 printf("dprod_base:");
404 dump(dest_ref3, align);
405 printf("dprod_dut:");
406 dump(dest3, align);
407 return -1;
408 }
409
410 putchar('.');
411 }
412
413 // Test rand ptr alignment if available
414
415 for (rtest = 0; rtest < RANDOMS; rtest++) {
416 size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1);
417 srcs = rand() % TEST_SOURCES;
418 if (srcs == 0)
419 continue;
420
421 offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B;
422 // Add random offsets
423 for (i = 0; i < srcs; i++)
424 ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
425
426 udest_ptrs[0] = dest1 + (rand() & (PTR_ALIGN_CHK_B - offset));
427 udest_ptrs[1] = dest2 + (rand() & (PTR_ALIGN_CHK_B - offset));
428 udest_ptrs[2] = dest3 + (rand() & (PTR_ALIGN_CHK_B - offset));
429
430 memset(dest1, 0, TEST_LEN); // zero pad to check write-over
431 memset(dest2, 0, TEST_LEN);
432 memset(dest3, 0, TEST_LEN);
433
434 for (i = 0; i < srcs; i++)
435 for (j = 0; j < size; j++)
436 ubuffs[i][j] = rand();
437
438 for (i = 0; i < srcs; i++) {
439 g1[i] = rand();
440 g2[i] = rand();
441 g3[i] = rand();
442 }
443
444 for (i = 0; i < srcs; i++) {
445 gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
446 gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]);
447 gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]);
448 }
449
450 gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref1);
451 gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], ubuffs, dest_ref2);
452 gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], ubuffs, dest_ref3);
453
454 FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptrs);
455
456 if (memcmp(dest_ref1, udest_ptrs[0], size)) {
457 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
458 srcs);
459 dump_matrix(ubuffs, 5, TEST_SOURCES);
460 printf("dprod_base:");
461 dump(dest_ref1, 25);
462 printf("dprod_dut:");
463 dump(udest_ptrs[0], 25);
464 return -1;
465 }
466 if (memcmp(dest_ref2, udest_ptrs[1], size)) {
467 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
468 srcs);
469 dump_matrix(ubuffs, 5, TEST_SOURCES);
470 printf("dprod_base:");
471 dump(dest_ref2, 25);
472 printf("dprod_dut:");
473 dump(udest_ptrs[1], 25);
474 return -1;
475 }
476 if (memcmp(dest_ref3, udest_ptrs[2], size)) {
477 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
478 srcs);
479 dump_matrix(ubuffs, 5, TEST_SOURCES);
480 printf("dprod_base:");
481 dump(dest_ref3, 25);
482 printf("dprod_dut:");
483 dump(udest_ptrs[2], 25);
484 return -1;
485 }
486 // Confirm that padding around dests is unchanged
487 memset(dest_ref1, 0, PTR_ALIGN_CHK_B); // Make reference zero buff
488 offset = udest_ptrs[0] - dest1;
489
490 if (memcmp(dest1, dest_ref1, offset)) {
491 printf("Fail rand ualign pad1 start\n");
492 return -1;
493 }
494 if (memcmp(dest1 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
495 printf("Fail rand ualign pad1 end\n");
496 return -1;
497 }
498
499 offset = udest_ptrs[1] - dest2;
500 if (memcmp(dest2, dest_ref1, offset)) {
501 printf("Fail rand ualign pad2 start\n");
502 return -1;
503 }
504 if (memcmp(dest2 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
505 printf("Fail rand ualign pad2 end\n");
506 return -1;
507 }
508
509 offset = udest_ptrs[2] - dest3;
510 if (memcmp(dest3, dest_ref1, offset)) {
511 printf("Fail rand ualign pad3 start\n");
512 return -1;
513 }
514 if (memcmp(dest3 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
515 printf("Fail rand ualign pad3 end\n");;
516 return -1;
517 }
518
519 putchar('.');
520 }
521
522 // Test all size alignment
523 align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
524
525 for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) {
526 srcs = TEST_SOURCES;
527
528 for (i = 0; i < srcs; i++)
529 for (j = 0; j < size; j++)
530 buffs[i][j] = rand();
531
532 for (i = 0; i < srcs; i++) {
533 g1[i] = rand();
534 g2[i] = rand();
535 g3[i] = rand();
536 }
537
538 for (i = 0; i < srcs; i++) {
539 gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
540 gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]);
541 gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]);
542 }
543
544 gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref1);
545 gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], buffs, dest_ref2);
546 gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], buffs, dest_ref3);
547
548 FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest_ptrs);
549
550 if (memcmp(dest_ref1, dest_ptrs[0], size)) {
551 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
552 size);
553 dump_matrix(buffs, 5, TEST_SOURCES);
554 printf("dprod_base:");
555 dump(dest_ref1, 25);
556 printf("dprod_dut:");
557 dump(dest_ptrs[0], 25);
558 return -1;
559 }
560 if (memcmp(dest_ref2, dest_ptrs[1], size)) {
561 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
562 size);
563 dump_matrix(buffs, 5, TEST_SOURCES);
564 printf("dprod_base:");
565 dump(dest_ref2, 25);
566 printf("dprod_dut:");
567 dump(dest_ptrs[1], 25);
568 return -1;
569 }
570 if (memcmp(dest_ref3, dest_ptrs[2], size)) {
571 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
572 size);
573 dump_matrix(buffs, 5, TEST_SOURCES);
574 printf("dprod_base:");
575 dump(dest_ref3, 25);
576 printf("dprod_dut:");
577 dump(dest_ptrs[2], 25);
578 return -1;
579 }
580 }
581
582 printf("Pass\n");
583 return 0;
584
585 }
9696 %define return rax
9797 %define return.w eax
9898
99 %define func(x) x:
99 %define func(x) x: endbranch
100100 %define FUNC_SAVE
101101 %define FUNC_RESTORE
102102 %endif
157157 %define xd3 xtmph1
158158
159159 align 16
160 global gf_3vect_mad_avx:ISAL_SYM_TYPE_FUNCTION
160 mk_global gf_3vect_mad_avx, function
161161 func(gf_3vect_mad_avx)
162162 FUNC_SAVE
163163 sub len, 16
102102 %define return rax
103103 %define return.w eax
104104
105 %define func(x) x:
105 %define func(x) x: endbranch
106106 %define FUNC_SAVE
107107 %define FUNC_RESTORE
108108 %endif
164164 %define xd3 ymm10
165165
166166 align 16
167 global gf_3vect_mad_avx2:ISAL_SYM_TYPE_FUNCTION
167 mk_global gf_3vect_mad_avx2, function
168168 func(gf_3vect_mad_avx2)
169169 FUNC_SAVE
170170 sub len, 32
4343 %define arg5 r9
4444 %define tmp r11
4545 %define return rax
46 %define func(x) x:
46 %define func(x) x: endbranch
4747 %define FUNC_SAVE
4848 %define FUNC_RESTORE
4949 %endif
151151 %define xmask0f zmm17
152152
153153 align 16
154 global gf_3vect_mad_avx512:ISAL_SYM_TYPE_FUNCTION
154 mk_global gf_3vect_mad_avx512, function
155155 func(gf_3vect_mad_avx512)
156156 FUNC_SAVE
157157 sub len, 64
9595 %define return rax
9696 %define return.w eax
9797
98 %define func(x) x:
98 %define func(x) x: endbranch
9999 %define FUNC_SAVE
100100 %define FUNC_RESTORE
101101 %endif
155155 %define xd3 xtmph1
156156
157157 align 16
158 global gf_3vect_mad_sse:ISAL_SYM_TYPE_FUNCTION
158 mk_global gf_3vect_mad_sse, function
159159 func(gf_3vect_mad_sse)
160160 FUNC_SAVE
161161 sub len, 16
5353 %define PS 8
5454 %define LOG_PS 3
5555
56 %define func(x) x:
56 %define func(x) x: endbranch
5757 %macro FUNC_SAVE 0
5858 push r12
5959 push r13
9494 %define func(x) proc_frame x
9595 %macro FUNC_SAVE 0
9696 alloc_stack stack_size
97 save_xmm128 xmm6, 0*16
98 save_xmm128 xmm7, 1*16
99 save_xmm128 xmm8, 2*16
100 save_xmm128 xmm9, 3*16
101 save_xmm128 xmm10, 4*16
102 save_xmm128 xmm11, 5*16
103 save_xmm128 xmm12, 6*16
104 save_xmm128 xmm13, 7*16
105 save_xmm128 xmm14, 8*16
97 vmovdqa [rsp + 0*16], xmm6
98 vmovdqa [rsp + 1*16], xmm7
99 vmovdqa [rsp + 2*16], xmm8
100 vmovdqa [rsp + 3*16], xmm9
101 vmovdqa [rsp + 4*16], xmm10
102 vmovdqa [rsp + 5*16], xmm11
103 vmovdqa [rsp + 6*16], xmm12
104 vmovdqa [rsp + 7*16], xmm13
105 vmovdqa [rsp + 8*16], xmm14
106106 save_reg r12, 9*16 + 0*8
107107 save_reg r13, 9*16 + 1*8
108108 save_reg r14, 9*16 + 2*8
158158
159159 %define PS 4
160160 %define LOG_PS 2
161 %define func(x) x:
161 %define func(x) x: endbranch
162162 %define arg(x) [ebp + PS*2 + PS*x]
163163 %define var(x) [ebp - PS - PS*x]
164164
293293 %define xp4 xmm5
294294 %endif
295295 align 16
296 global gf_4vect_dot_prod_avx:ISAL_SYM_TYPE_FUNCTION
296 mk_global gf_4vect_dot_prod_avx, function
297297 func(gf_4vect_dot_prod_avx)
298298 FUNC_SAVE
299299 SLDR len, len_m
5555 %define PS 8
5656 %define LOG_PS 3
5757
58 %define func(x) x:
58 %define func(x) x: endbranch
5959 %macro FUNC_SAVE 0
6060 push r12
6161 push r13
162162
163163 %define PS 4
164164 %define LOG_PS 2
165 %define func(x) x:
165 %define func(x) x: endbranch
166166 %define arg(x) [ebp + PS*2 + PS*x]
167167 %define var(x) [ebp - PS - PS*x]
168168
301301 %define xp4 ymm5
302302 %endif
303303 align 16
304 global gf_4vect_dot_prod_avx2:ISAL_SYM_TYPE_FUNCTION
304 mk_global gf_4vect_dot_prod_avx2, function
305305 func(gf_4vect_dot_prod_avx2)
306306 FUNC_SAVE
307307 SLDR len, len_m
5454 %define PS 8
5555 %define LOG_PS 3
5656
57 %define func(x) x:
57 %define func(x) x: endbranch
5858 %macro FUNC_SAVE 0
5959 push r12
6060 push r13
190190 section .text
191191
192192 align 16
193 global gf_4vect_dot_prod_avx512:ISAL_SYM_TYPE_FUNCTION
193 mk_global gf_4vect_dot_prod_avx512, function
194194 func(gf_4vect_dot_prod_avx512)
195195 FUNC_SAVE
196196 sub len, 64
5353 %define PS 8
5454 %define LOG_PS 3
5555
56 %define func(x) x:
56 %define func(x) x: endbranch
5757 %macro FUNC_SAVE 0
5858 push r12
5959 push r13
158158
159159 %define PS 4
160160 %define LOG_PS 2
161 %define func(x) x:
161 %define func(x) x: endbranch
162162 %define arg(x) [ebp + PS*2 + PS*x]
163163 %define var(x) [ebp - PS - PS*x]
164164
293293 %define xp4 xmm5
294294 %endif
295295 align 16
296 global gf_4vect_dot_prod_sse:ISAL_SYM_TYPE_FUNCTION
296 mk_global gf_4vect_dot_prod_sse, function
297297 func(gf_4vect_dot_prod_sse)
298298 FUNC_SAVE
299299 SLDR len, len_m
+0
-695
erasure_code/gf_4vect_dot_prod_sse_test.c less more
0 /**********************************************************************
1 Copyright(c) 2011-2015 Intel Corporation All rights reserved.
2
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
5 are met:
6 * Redistributions of source code must retain the above copyright
7 notice, this list of conditions and the following disclaimer.
8 * Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in
10 the documentation and/or other materials provided with the
11 distribution.
12 * Neither the name of Intel Corporation nor the names of its
13 contributors may be used to endorse or promote products derived
14 from this software without specific prior written permission.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 **********************************************************************/
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h> // for memset, memcmp
32 #include "erasure_code.h"
33 #include "types.h"
34
35 #ifndef FUNCTION_UNDER_TEST
36 # define FUNCTION_UNDER_TEST gf_4vect_dot_prod_sse
37 #endif
38 #ifndef TEST_MIN_SIZE
39 # define TEST_MIN_SIZE 16
40 #endif
41
42 #define str(s) #s
43 #define xstr(s) str(s)
44
45 #define TEST_LEN 8192
46 #define TEST_SIZE (TEST_LEN/2)
47 #define TEST_MEM TEST_SIZE
48 #define TEST_LOOPS 10000
49 #define TEST_TYPE_STR ""
50
51 #ifndef TEST_SOURCES
52 # define TEST_SOURCES 16
53 #endif
54 #ifndef RANDOMS
55 # define RANDOMS 20
56 #endif
57
58 #ifdef EC_ALIGNED_ADDR
59 // Define power of 2 range to check ptr, len alignment
60 # define PTR_ALIGN_CHK_B 0
61 # define LEN_ALIGN_CHK_B 0 // 0 for aligned only
62 #else
63 // Define power of 2 range to check ptr, len alignment
64 # define PTR_ALIGN_CHK_B 32
65 # define LEN_ALIGN_CHK_B 32 // 0 for aligned only
66 #endif
67
68 typedef unsigned char u8;
69
70 extern void FUNCTION_UNDER_TEST(int len, int vlen, unsigned char *gftbls,
71 unsigned char **src, unsigned char **dest);
72
73 void dump(unsigned char *buf, int len)
74 {
75 int i;
76 for (i = 0; i < len;) {
77 printf(" %2x", 0xff & buf[i++]);
78 if (i % 32 == 0)
79 printf("\n");
80 }
81 printf("\n");
82 }
83
84 void dump_matrix(unsigned char **s, int k, int m)
85 {
86 int i, j;
87 for (i = 0; i < k; i++) {
88 for (j = 0; j < m; j++) {
89 printf(" %2x", s[i][j]);
90 }
91 printf("\n");
92 }
93 printf("\n");
94 }
95
96 void dump_u8xu8(unsigned char *s, int k, int m)
97 {
98 int i, j;
99 for (i = 0; i < k; i++) {
100 for (j = 0; j < m; j++) {
101 printf(" %2x", 0xff & s[j + (i * m)]);
102 }
103 printf("\n");
104 }
105 printf("\n");
106 }
107
108 int main(int argc, char *argv[])
109 {
110 int i, j, rtest, srcs;
111 void *buf;
112 u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g3[TEST_SOURCES];
113 u8 g4[TEST_SOURCES], g_tbls[4 * TEST_SOURCES * 32], *buffs[TEST_SOURCES];
114 u8 *dest1, *dest2, *dest3, *dest4, *dest_ref1, *dest_ref2, *dest_ref3;
115 u8 *dest_ref4, *dest_ptrs[4];
116
117 int align, size;
118 unsigned char *efence_buffs[TEST_SOURCES];
119 unsigned int offset;
120 u8 *ubuffs[TEST_SOURCES];
121 u8 *udest_ptrs[4];
122 printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN);
123
124 // Allocate the arrays
125 for (i = 0; i < TEST_SOURCES; i++) {
126 if (posix_memalign(&buf, 64, TEST_LEN)) {
127 printf("alloc error: Fail");
128 return -1;
129 }
130 buffs[i] = buf;
131 }
132
133 if (posix_memalign(&buf, 64, TEST_LEN)) {
134 printf("alloc error: Fail");
135 return -1;
136 }
137 dest1 = buf;
138
139 if (posix_memalign(&buf, 64, TEST_LEN)) {
140 printf("alloc error: Fail");
141 return -1;
142 }
143 dest2 = buf;
144
145 if (posix_memalign(&buf, 64, TEST_LEN)) {
146 printf("alloc error: Fail");
147 return -1;
148 }
149 dest3 = buf;
150
151 if (posix_memalign(&buf, 64, TEST_LEN)) {
152 printf("alloc error: Fail");
153 return -1;
154 }
155 dest4 = buf;
156
157 if (posix_memalign(&buf, 64, TEST_LEN)) {
158 printf("alloc error: Fail");
159 return -1;
160 }
161 dest_ref1 = buf;
162
163 if (posix_memalign(&buf, 64, TEST_LEN)) {
164 printf("alloc error: Fail");
165 return -1;
166 }
167 dest_ref2 = buf;
168
169 if (posix_memalign(&buf, 64, TEST_LEN)) {
170 printf("alloc error: Fail");
171 return -1;
172 }
173 dest_ref3 = buf;
174
175 if (posix_memalign(&buf, 64, TEST_LEN)) {
176 printf("alloc error: Fail");
177 return -1;
178 }
179 dest_ref4 = buf;
180
181 dest_ptrs[0] = dest1;
182 dest_ptrs[1] = dest2;
183 dest_ptrs[2] = dest3;
184 dest_ptrs[3] = dest4;
185
186 // Test of all zeros
187 for (i = 0; i < TEST_SOURCES; i++)
188 memset(buffs[i], 0, TEST_LEN);
189
190 memset(dest1, 0, TEST_LEN);
191 memset(dest2, 0, TEST_LEN);
192 memset(dest3, 0, TEST_LEN);
193 memset(dest4, 0, TEST_LEN);
194 memset(dest_ref1, 0, TEST_LEN);
195 memset(dest_ref2, 0, TEST_LEN);
196 memset(dest_ref3, 0, TEST_LEN);
197 memset(dest_ref4, 0, TEST_LEN);
198 memset(g1, 2, TEST_SOURCES);
199 memset(g2, 1, TEST_SOURCES);
200 memset(g3, 7, TEST_SOURCES);
201 memset(g4, 3, TEST_SOURCES);
202
203 for (i = 0; i < TEST_SOURCES; i++) {
204 gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
205 gf_vect_mul_init(g2[i], &g_tbls[32 * TEST_SOURCES + i * 32]);
206 gf_vect_mul_init(g3[i], &g_tbls[64 * TEST_SOURCES + i * 32]);
207 gf_vect_mul_init(g4[i], &g_tbls[96 * TEST_SOURCES + i * 32]);
208 }
209
210 gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
211 gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs,
212 dest_ref2);
213 gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], buffs,
214 dest_ref3);
215 gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], buffs,
216 dest_ref4);
217
218 FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
219
220 if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
221 printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test1\n");
222 dump_matrix(buffs, 5, TEST_SOURCES);
223 printf("dprod_base:");
224 dump(dest_ref1, 25);
225 printf("dprod_dut:");
226 dump(dest1, 25);
227 return -1;
228 }
229 if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
230 printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test2\n");
231 dump_matrix(buffs, 5, TEST_SOURCES);
232 printf("dprod_base:");
233 dump(dest_ref2, 25);
234 printf("dprod_dut:");
235 dump(dest2, 25);
236 return -1;
237 }
238 if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) {
239 printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test3\n");
240 dump_matrix(buffs, 5, TEST_SOURCES);
241 printf("dprod_base:");
242 dump(dest_ref3, 25);
243 printf("dprod_dut:");
244 dump(dest3, 25);
245 return -1;
246 }
247 if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) {
248 printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test4\n");
249 dump_matrix(buffs, 5, TEST_SOURCES);
250 printf("dprod_base:");
251 dump(dest_ref4, 25);
252 printf("dprod_dut:");
253 dump(dest4, 25);
254 return -1;
255 }
256
257 putchar('.');
258
259 // Rand data test
260
261 for (rtest = 0; rtest < RANDOMS; rtest++) {
262 for (i = 0; i < TEST_SOURCES; i++)
263 for (j = 0; j < TEST_LEN; j++)
264 buffs[i][j] = rand();
265
266 for (i = 0; i < TEST_SOURCES; i++) {
267 g1[i] = rand();
268 g2[i] = rand();
269 g3[i] = rand();
270 g4[i] = rand();
271 }
272
273 for (i = 0; i < TEST_SOURCES; i++) {
274 gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
275 gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]);
276 gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]);
277 gf_vect_mul_init(g4[i], &g_tbls[(96 * TEST_SOURCES) + (i * 32)]);
278 }
279
280 gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
281 gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES],
282 buffs, dest_ref2);
283 gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES],
284 buffs, dest_ref3);
285 gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES],
286 buffs, dest_ref4);
287
288 FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
289
290 if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
291 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest);
292 dump_matrix(buffs, 5, TEST_SOURCES);
293 printf("dprod_base:");
294 dump(dest_ref1, 25);
295 printf("dprod_dut:");
296 dump(dest1, 25);
297 return -1;
298 }
299 if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
300 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest);
301 dump_matrix(buffs, 5, TEST_SOURCES);
302 printf("dprod_base:");
303 dump(dest_ref2, 25);
304 printf("dprod_dut:");
305 dump(dest2, 25);
306 return -1;
307 }
308 if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) {
309 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest);
310 dump_matrix(buffs, 5, TEST_SOURCES);
311 printf("dprod_base:");
312 dump(dest_ref3, 25);
313 printf("dprod_dut:");
314 dump(dest3, 25);
315 return -1;
316 }
317 if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) {
318 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test4 %d\n", rtest);
319 dump_matrix(buffs, 5, TEST_SOURCES);
320 printf("dprod_base:");
321 dump(dest_ref4, 25);
322 printf("dprod_dut:");
323 dump(dest4, 25);
324 return -1;
325 }
326
327 putchar('.');
328 }
329
330 // Rand data test with varied parameters
331 for (rtest = 0; rtest < RANDOMS; rtest++) {
332 for (srcs = TEST_SOURCES; srcs > 0; srcs--) {
333 for (i = 0; i < srcs; i++)
334 for (j = 0; j < TEST_LEN; j++)
335 buffs[i][j] = rand();
336
337 for (i = 0; i < srcs; i++) {
338 g1[i] = rand();
339 g2[i] = rand();
340 g3[i] = rand();
341 g4[i] = rand();
342 }
343
344 for (i = 0; i < srcs; i++) {
345 gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
346 gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]);
347 gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]);
348 gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]);
349 }
350
351 gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref1);
352 gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[32 * srcs], buffs,
353 dest_ref2);
354 gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[64 * srcs], buffs,
355 dest_ref3);
356 gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[96 * srcs], buffs,
357 dest_ref4);
358
359 FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest_ptrs);
360
361 if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
362 printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
363 " test1 srcs=%d\n", srcs);
364 dump_matrix(buffs, 5, TEST_SOURCES);
365 printf("dprod_base:");
366 dump(dest_ref1, 25);
367 printf("dprod_dut:");
368 dump(dest1, 25);
369 return -1;
370 }
371 if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
372 printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
373 " test2 srcs=%d\n", srcs);
374 dump_matrix(buffs, 5, TEST_SOURCES);
375 printf("dprod_base:");
376 dump(dest_ref2, 25);
377 printf("dprod_dut:");
378 dump(dest2, 25);
379 return -1;
380 }
381 if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) {
382 printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
383 " test3 srcs=%d\n", srcs);
384 dump_matrix(buffs, 5, TEST_SOURCES);
385 printf("dprod_base:");
386 dump(dest_ref3, 25);
387 printf("dprod_dut:");
388 dump(dest3, 25);
389 return -1;
390 }
391 if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) {
392 printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
393 " test4 srcs=%d\n", srcs);
394 dump_matrix(buffs, 5, TEST_SOURCES);
395 printf("dprod_base:");
396 dump(dest_ref4, 25);
397 printf("dprod_dut:");
398 dump(dest4, 25);
399 return -1;
400 }
401
402 putchar('.');
403 }
404 }
405
406 // Run tests at end of buffer for Electric Fence
407 align = (LEN_ALIGN_CHK_B != 0) ? 1 : 32;
408 for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) {
409 for (i = 0; i < TEST_SOURCES; i++)
410 for (j = 0; j < TEST_LEN; j++)
411 buffs[i][j] = rand();
412
413 for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end
414 efence_buffs[i] = buffs[i] + TEST_LEN - size;
415
416 for (i = 0; i < TEST_SOURCES; i++) {
417 g1[i] = rand();
418 g2[i] = rand();
419 g3[i] = rand();
420 g4[i] = rand();
421 }
422
423 for (i = 0; i < TEST_SOURCES; i++) {
424 gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
425 gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]);
426 gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]);
427 gf_vect_mul_init(g4[i], &g_tbls[(96 * TEST_SOURCES) + (i * 32)]);
428 }
429
430 gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref1);
431 gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES],
432 efence_buffs, dest_ref2);
433 gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES],
434 efence_buffs, dest_ref3);
435 gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES],
436 efence_buffs, dest_ref4);
437
438 FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest_ptrs);
439
440 if (0 != memcmp(dest_ref1, dest1, size)) {
441 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest);
442 dump_matrix(efence_buffs, 5, TEST_SOURCES);
443 printf("dprod_base:");
444 dump(dest_ref1, align);
445 printf("dprod_dut:");
446 dump(dest1, align);
447 return -1;
448 }
449
450 if (0 != memcmp(dest_ref2, dest2, size)) {
451 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest);
452 dump_matrix(efence_buffs, 5, TEST_SOURCES);
453 printf("dprod_base:");
454 dump(dest_ref2, align);
455 printf("dprod_dut:");
456 dump(dest2, align);
457 return -1;
458 }
459
460 if (0 != memcmp(dest_ref3, dest3, size)) {
461 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest);
462 dump_matrix(efence_buffs, 5, TEST_SOURCES);
463 printf("dprod_base:");
464 dump(dest_ref3, align);
465 printf("dprod_dut:");
466 dump(dest3, align);
467 return -1;
468 }
469
470 if (0 != memcmp(dest_ref4, dest4, size)) {
471 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test4 %d\n", rtest);
472 dump_matrix(efence_buffs, 5, TEST_SOURCES);
473 printf("dprod_base:");
474 dump(dest_ref4, align);
475 printf("dprod_dut:");
476 dump(dest4, align);
477 return -1;
478 }
479
480 putchar('.');
481 }
482
483 // Test rand ptr alignment if available
484
485 for (rtest = 0; rtest < RANDOMS; rtest++) {
486 size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1);
487 srcs = rand() % TEST_SOURCES;
488 if (srcs == 0)
489 continue;
490
491 offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B;
492 // Add random offsets
493 for (i = 0; i < srcs; i++)
494 ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
495
496 udest_ptrs[0] = dest1 + (rand() & (PTR_ALIGN_CHK_B - offset));
497 udest_ptrs[1] = dest2 + (rand() & (PTR_ALIGN_CHK_B - offset));
498 udest_ptrs[2] = dest3 + (rand() & (PTR_ALIGN_CHK_B - offset));
499 udest_ptrs[3] = dest4 + (rand() & (PTR_ALIGN_CHK_B - offset));
500
501 memset(dest1, 0, TEST_LEN); // zero pad to check write-over
502 memset(dest2, 0, TEST_LEN);
503 memset(dest3, 0, TEST_LEN);
504 memset(dest4, 0, TEST_LEN);
505
506 for (i = 0; i < srcs; i++)
507 for (j = 0; j < size; j++)
508 ubuffs[i][j] = rand();
509
510 for (i = 0; i < srcs; i++) {
511 g1[i] = rand();
512 g2[i] = rand();
513 g3[i] = rand();
514 g4[i] = rand();
515 }
516
517 for (i = 0; i < srcs; i++) {
518 gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
519 gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]);
520 gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]);
521 gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]);
522 }
523
524 gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref1);
525 gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], ubuffs, dest_ref2);
526 gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], ubuffs, dest_ref3);
527 gf_vect_dot_prod_base(size, srcs, &g_tbls[96 * srcs], ubuffs, dest_ref4);
528
529 FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptrs);
530
531 if (memcmp(dest_ref1, udest_ptrs[0], size)) {
532 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
533 srcs);
534 dump_matrix(ubuffs, 5, TEST_SOURCES);
535 printf("dprod_base:");
536 dump(dest_ref1, 25);
537 printf("dprod_dut:");
538 dump(udest_ptrs[0], 25);
539 return -1;
540 }
541 if (memcmp(dest_ref2, udest_ptrs[1], size)) {
542 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
543 srcs);
544 dump_matrix(ubuffs, 5, TEST_SOURCES);
545 printf("dprod_base:");
546 dump(dest_ref2, 25);
547 printf("dprod_dut:");
548 dump(udest_ptrs[1], 25);
549 return -1;
550 }
551 if (memcmp(dest_ref3, udest_ptrs[2], size)) {
552 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
553 srcs);
554 dump_matrix(ubuffs, 5, TEST_SOURCES);
555 printf("dprod_base:");
556 dump(dest_ref3, 25);
557 printf("dprod_dut:");
558 dump(udest_ptrs[2], 25);
559 return -1;
560 }
561 if (memcmp(dest_ref4, udest_ptrs[3], size)) {
562 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
563 srcs);
564 dump_matrix(ubuffs, 5, TEST_SOURCES);
565 printf("dprod_base:");
566 dump(dest_ref4, 25);
567 printf("dprod_dut:");
568 dump(udest_ptrs[3], 25);
569 return -1;
570 }
571 // Confirm that padding around dests is unchanged
572 memset(dest_ref1, 0, PTR_ALIGN_CHK_B); // Make reference zero buff
573 offset = udest_ptrs[0] - dest1;
574
575 if (memcmp(dest1, dest_ref1, offset)) {
576 printf("Fail rand ualign pad1 start\n");
577 return -1;
578 }
579 if (memcmp(dest1 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
580 printf("Fail rand ualign pad1 end\n");
581 printf("size=%d offset=%d srcs=%d\n", size, offset, srcs);
582 return -1;
583 }
584
585 offset = udest_ptrs[1] - dest2;
586 if (memcmp(dest2, dest_ref1, offset)) {
587 printf("Fail rand ualign pad2 start\n");
588 return -1;
589 }
590 if (memcmp(dest2 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
591 printf("Fail rand ualign pad2 end\n");
592 return -1;
593 }
594
595 offset = udest_ptrs[2] - dest3;
596 if (memcmp(dest3, dest_ref1, offset)) {
597 printf("Fail rand ualign pad3 start\n");
598 return -1;
599 }
600 if (memcmp(dest3 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
601 printf("Fail rand ualign pad3 end\n");
602 return -1;
603 }
604
605 offset = udest_ptrs[3] - dest4;
606 if (memcmp(dest4, dest_ref1, offset)) {
607 printf("Fail rand ualign pad4 start\n");
608 return -1;
609 }
610 if (memcmp(dest4 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
611 printf("Fail rand ualign pad4 end\n");
612 return -1;
613 }
614
615 putchar('.');
616 }
617
618 // Test all size alignment
619 align = (LEN_ALIGN_CHK_B != 0) ? 1 : 32;
620
621 for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) {
622 srcs = TEST_SOURCES;
623
624 for (i = 0; i < srcs; i++)
625 for (j = 0; j < size; j++)
626 buffs[i][j] = rand();
627
628 for (i = 0; i < srcs; i++) {
629 g1[i] = rand();
630 g2[i] = rand();
631 g3[i] = rand();
632 g4[i] = rand();
633 }
634
635 for (i = 0; i < srcs; i++) {
636 gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
637 gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]);
638 gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]);
639 gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]);
640 }
641
642 gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref1);
643 gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], buffs, dest_ref2);
644 gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], buffs, dest_ref3);
645 gf_vect_dot_prod_base(size, srcs, &g_tbls[96 * srcs], buffs, dest_ref4);
646
647 FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest_ptrs);
648
649 if (memcmp(dest_ref1, dest_ptrs[0], size)) {
650 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
651 size);
652 dump_matrix(buffs, 5, TEST_SOURCES);
653 printf("dprod_base:");
654 dump(dest_ref1, 25);
655 printf("dprod_dut:");
656 dump(dest_ptrs[0], 25);
657 return -1;
658 }
659 if (memcmp(dest_ref2, dest_ptrs[1], size)) {
660 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
661 size);
662 dump_matrix(buffs, 5, TEST_SOURCES);
663 printf("dprod_base:");
664 dump(dest_ref2, 25);
665 printf("dprod_dut:");
666 dump(dest_ptrs[1], 25);
667 return -1;
668 }
669 if (memcmp(dest_ref3, dest_ptrs[2], size)) {
670 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
671 size);
672 dump_matrix(buffs, 5, TEST_SOURCES);
673 printf("dprod_base:");
674 dump(dest_ref3, 25);
675 printf("dprod_dut:");
676 dump(dest_ptrs[2], 25);
677 return -1;
678 }
679 if (memcmp(dest_ref4, dest_ptrs[3], size)) {
680 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
681 size);
682 dump_matrix(buffs, 5, TEST_SOURCES);
683 printf("dprod_base:");
684 dump(dest_ref4, 25);
685 printf("dprod_dut:");
686 dump(dest_ptrs[3], 25);
687 return -1;
688 }
689 }
690
691 printf("Pass\n");
692 return 0;
693
694 }
102102 %define return rax
103103 %define return.w eax
104104
105 %define func(x) x:
105 %define func(x) x: endbranch
106106 %macro FUNC_SAVE 0
107107 push r12
108108 %endmacro
168168 %define xd4 xtmpl1
169169
170170 align 16
171 global gf_4vect_mad_avx:ISAL_SYM_TYPE_FUNCTION
171 mk_global gf_4vect_mad_avx, function
172172 func(gf_4vect_mad_avx)
173173 FUNC_SAVE
174174 sub len, 16
100100 %define return rax
101101 %define return.w eax
102102
103 %define func(x) x:
103 %define func(x) x: endbranch
104104 %define FUNC_SAVE
105105 %define FUNC_RESTORE
106106 %endif
164164 %define xd4 ymm10
165165
166166 align 16
167 global gf_4vect_mad_avx2:ISAL_SYM_TYPE_FUNCTION
167 mk_global gf_4vect_mad_avx2, function
168168 func(gf_4vect_mad_avx2)
169169 FUNC_SAVE
170170 sub len, 32
4343 %define arg5 r9
4444 %define tmp r11
4545 %define return rax
46 %define func(x) x:
46 %define func(x) x: endbranch
4747 %define FUNC_SAVE
4848 %define FUNC_RESTORE
4949 %endif
158158 %define xtmpl5 zmm23
159159
160160 align 16
161 global gf_4vect_mad_avx512:ISAL_SYM_TYPE_FUNCTION
161 mk_global gf_4vect_mad_avx512, function
162162 func(gf_4vect_mad_avx512)
163163 FUNC_SAVE
164164 sub len, 64
102102 %define return rax
103103 %define return.w eax
104104
105 %define func(x) x:
105 %define func(x) x: endbranch
106106 %macro FUNC_SAVE 0
107107 push r12
108108 %endmacro
167167 %define xd4 xtmpl1
168168
169169 align 16
170 global gf_4vect_mad_sse:ISAL_SYM_TYPE_FUNCTION
170 mk_global gf_4vect_mad_sse, function
171171 func(gf_4vect_mad_sse)
172172 FUNC_SAVE
173173 sub len, 16
5050 %define PS 8
5151 %define LOG_PS 3
5252
53 %define func(x) x:
53 %define func(x) x: endbranch
5454 %macro FUNC_SAVE 0
5555 push r12
5656 push r13
8888 %define func(x) proc_frame x
8989 %macro FUNC_SAVE 0
9090 alloc_stack stack_size
91 save_xmm128 xmm6, 0*16
92 save_xmm128 xmm7, 1*16
93 save_xmm128 xmm8, 2*16
94 save_xmm128 xmm9, 3*16
95 save_xmm128 xmm10, 4*16
96 save_xmm128 xmm11, 5*16
97 save_xmm128 xmm12, 6*16
98 save_xmm128 xmm13, 7*16
99 save_xmm128 xmm14, 8*16
100 save_xmm128 xmm15, 9*16
91 vmovdqa [rsp + 0*16], xmm6
92 vmovdqa [rsp + 1*16], xmm7
93 vmovdqa [rsp + 2*16], xmm8
94 vmovdqa [rsp + 3*16], xmm9
95 vmovdqa [rsp + 4*16], xmm10
96 vmovdqa [rsp + 5*16], xmm11
97 vmovdqa [rsp + 6*16], xmm12
98 vmovdqa [rsp + 7*16], xmm13
99 vmovdqa [rsp + 8*16], xmm14
100 vmovdqa [rsp + 9*16], xmm15
101101 save_reg r12, 10*16 + 0*8
102102 save_reg r13, 10*16 + 1*8
103103 save_reg r14, 10*16 + 2*8
183183 %define xp5 xmm6
184184
185185 align 16
186 global gf_5vect_dot_prod_avx:ISAL_SYM_TYPE_FUNCTION
186 mk_global gf_5vect_dot_prod_avx, function
187187 func(gf_5vect_dot_prod_avx)
188188 FUNC_SAVE
189189 sub len, 16
5252 %define PS 8
5353 %define LOG_PS 3
5454
55 %define func(x) x:
55 %define func(x) x: endbranch
5656 %macro FUNC_SAVE 0
5757 push r12
5858 push r13
188188 %define xp5 ymm6
189189
190190 align 16
191 global gf_5vect_dot_prod_avx2:ISAL_SYM_TYPE_FUNCTION
191 mk_global gf_5vect_dot_prod_avx2, function
192192 func(gf_5vect_dot_prod_avx2)
193193 FUNC_SAVE
194194 sub len, 32
5656 %define PS 8
5757 %define LOG_PS 3
5858
59 %define func(x) x:
59 %define func(x) x: endbranch
6060 %macro FUNC_SAVE 0
6161 push r12
6262 push r13
112112 vmovdqa [rsp + 7*16], xmm13
113113 vmovdqa [rsp + 8*16], xmm14
114114 vmovdqa [rsp + 9*16], xmm15
115 save_reg r12, 9*16 + 0*8
116 save_reg r13, 9*16 + 1*8
117 save_reg r14, 9*16 + 2*8
118 save_reg r15, 9*16 + 3*8
119 save_reg rdi, 9*16 + 4*8
120 save_reg rsi, 9*16 + 5*8
121 save_reg rbp, 9*16 + 6*8
122 save_reg rbx, 9*16 + 7*8
115 save_reg r12, 10*16 + 0*8
116 save_reg r13, 10*16 + 1*8
117 save_reg r14, 10*16 + 2*8
118 save_reg r15, 10*16 + 3*8
119 save_reg rdi, 10*16 + 4*8
120 save_reg rsi, 10*16 + 5*8
121 save_reg rbp, 10*16 + 6*8
122 save_reg rbx, 10*16 + 7*8
123123 end_prolog
124124 mov arg4, arg(4)
125125 %endmacro
135135 vmovdqa xmm13, [rsp + 7*16]
136136 vmovdqa xmm14, [rsp + 8*16]
137137 vmovdqa xmm15, [rsp + 9*16]
138 mov r12, [rsp + 9*16 + 0*8]
139 mov r13, [rsp + 9*16 + 1*8]
140 mov r14, [rsp + 9*16 + 2*8]
141 mov r15, [rsp + 9*16 + 3*8]
142 mov rdi, [rsp + 9*16 + 4*8]
143 mov rsi, [rsp + 9*16 + 5*8]
144 mov rbp, [rsp + 9*16 + 6*8]
145 mov rbx, [rsp + 9*16 + 7*8]
138 mov r12, [rsp + 10*16 + 0*8]
139 mov r13, [rsp + 10*16 + 1*8]
140 mov r14, [rsp + 10*16 + 2*8]
141 mov r15, [rsp + 10*16 + 3*8]
142 mov rdi, [rsp + 10*16 + 4*8]
143 mov rsi, [rsp + 10*16 + 5*8]
144 mov rbp, [rsp + 10*16 + 6*8]
145 mov rbx, [rsp + 10*16 + 7*8]
146146 add rsp, stack_size
147147 %endmacro
148148 %endif
210210 section .text
211211
212212 align 16
213 global gf_5vect_dot_prod_avx512:ISAL_SYM_TYPE_FUNCTION
213 mk_global gf_5vect_dot_prod_avx512, function
214214 func(gf_5vect_dot_prod_avx512)
215215 FUNC_SAVE
216216 sub len, 64
5050 %define PS 8
5151 %define LOG_PS 3
5252
53 %define func(x) x:
53 %define func(x) x: endbranch
5454 %macro FUNC_SAVE 0
5555 push r12
5656 push r13
183183 %define xp5 xmm14
184184
185185 align 16
186 global gf_5vect_dot_prod_sse:ISAL_SYM_TYPE_FUNCTION
186 mk_global gf_5vect_dot_prod_sse, function
187187 func(gf_5vect_dot_prod_sse)
188188 FUNC_SAVE
189189 sub len, 16
+0
-805
erasure_code/gf_5vect_dot_prod_sse_test.c less more
0 /**********************************************************************
1 Copyright(c) 2011-2015 Intel Corporation All rights reserved.
2
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
5 are met:
6 * Redistributions of source code must retain the above copyright
7 notice, this list of conditions and the following disclaimer.
8 * Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in
10 the documentation and/or other materials provided with the
11 distribution.
12 * Neither the name of Intel Corporation nor the names of its
13 contributors may be used to endorse or promote products derived
14 from this software without specific prior written permission.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 **********************************************************************/
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h> // for memset, memcmp
32 #include "erasure_code.h"
33 #include "types.h"
34
35 #ifndef FUNCTION_UNDER_TEST
36 # define FUNCTION_UNDER_TEST gf_5vect_dot_prod_sse
37 #endif
38 #ifndef TEST_MIN_SIZE
39 # define TEST_MIN_SIZE 16
40 #endif
41
42 #define str(s) #s
43 #define xstr(s) str(s)
44
45 #define TEST_LEN 8192
46 #define TEST_SIZE (TEST_LEN/2)
47 #define TEST_MEM TEST_SIZE
48 #define TEST_LOOPS 20000
49 #define TEST_TYPE_STR ""
50
51 #ifndef TEST_SOURCES
52 # define TEST_SOURCES 16
53 #endif
54 #ifndef RANDOMS
55 # define RANDOMS 20
56 #endif
57
58 #ifdef EC_ALIGNED_ADDR
59 // Define power of 2 range to check ptr, len alignment
60 # define PTR_ALIGN_CHK_B 0
61 # define LEN_ALIGN_CHK_B 0 // 0 for aligned only
62 #else
63 // Define power of 2 range to check ptr, len alignment
64 # define PTR_ALIGN_CHK_B 32
65 # define LEN_ALIGN_CHK_B 32 // 0 for aligned only
66 #endif
67
68 typedef unsigned char u8;
69
70 void dump(unsigned char *buf, int len)
71 {
72 int i;
73 for (i = 0; i < len;) {
74 printf(" %2x", 0xff & buf[i++]);
75 if (i % 32 == 0)
76 printf("\n");
77 }
78 printf("\n");
79 }
80
81 void dump_matrix(unsigned char **s, int k, int m)
82 {
83 int i, j;
84 for (i = 0; i < k; i++) {
85 for (j = 0; j < m; j++) {
86 printf(" %2x", s[i][j]);
87 }
88 printf("\n");
89 }
90 printf("\n");
91 }
92
93 void dump_u8xu8(unsigned char *s, int k, int m)
94 {
95 int i, j;
96 for (i = 0; i < k; i++) {
97 for (j = 0; j < m; j++) {
98 printf(" %2x", 0xff & s[j + (i * m)]);
99 }
100 printf("\n");
101 }
102 printf("\n");
103 }
104
105 int main(int argc, char *argv[])
106 {
107 int i, j, rtest, srcs;
108 void *buf;
109 u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g3[TEST_SOURCES];
110 u8 g4[TEST_SOURCES], g5[TEST_SOURCES], *g_tbls;
111 u8 *dest1, *dest2, *dest3, *dest4, *dest5, *buffs[TEST_SOURCES];
112 u8 *dest_ref1, *dest_ref2, *dest_ref3, *dest_ref4, *dest_ref5;
113 u8 *dest_ptrs[5];
114
115 int align, size;
116 unsigned char *efence_buffs[TEST_SOURCES];
117 unsigned int offset;
118 u8 *ubuffs[TEST_SOURCES];
119 u8 *udest_ptrs[5];
120 printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN);
121
122 // Allocate the arrays
123 for (i = 0; i < TEST_SOURCES; i++) {
124 if (posix_memalign(&buf, 64, TEST_LEN)) {
125 printf("alloc error: Fail");
126 return -1;
127 }
128 buffs[i] = buf;
129 }
130
131 if (posix_memalign(&buf, 16, 2 * (6 * TEST_SOURCES * 32))) {
132 printf("alloc error: Fail");
133 return -1;
134 }
135 g_tbls = buf;
136
137 if (posix_memalign(&buf, 64, TEST_LEN)) {
138 printf("alloc error: Fail");
139 return -1;
140 }
141 dest1 = buf;
142
143 if (posix_memalign(&buf, 64, TEST_LEN)) {
144 printf("alloc error: Fail");
145 return -1;
146 }
147 dest2 = buf;
148
149 if (posix_memalign(&buf, 64, TEST_LEN)) {
150 printf("alloc error: Fail");
151 return -1;
152 }
153 dest3 = buf;
154
155 if (posix_memalign(&buf, 64, TEST_LEN)) {
156 printf("alloc error: Fail");
157 return -1;
158 }
159 dest4 = buf;
160
161 if (posix_memalign(&buf, 64, TEST_LEN)) {
162 printf("alloc error: Fail");
163 return -1;
164 }
165 dest5 = buf;
166
167 if (posix_memalign(&buf, 64, TEST_LEN)) {
168 printf("alloc error: Fail");
169 return -1;
170 }
171 dest_ref1 = buf;
172
173 if (posix_memalign(&buf, 64, TEST_LEN)) {
174 printf("alloc error: Fail");
175 return -1;
176 }
177 dest_ref2 = buf;
178
179 if (posix_memalign(&buf, 64, TEST_LEN)) {
180 printf("alloc error: Fail");
181 return -1;
182 }
183 dest_ref3 = buf;
184
185 if (posix_memalign(&buf, 64, TEST_LEN)) {
186 printf("alloc error: Fail");
187 return -1;
188 }
189 dest_ref4 = buf;
190
191 if (posix_memalign(&buf, 64, TEST_LEN)) {
192 printf("alloc error: Fail");
193 return -1;
194 }
195 dest_ref5 = buf;
196
197 dest_ptrs[0] = dest1;
198 dest_ptrs[1] = dest2;
199 dest_ptrs[2] = dest3;
200 dest_ptrs[3] = dest4;
201 dest_ptrs[4] = dest5;
202
203 // Test of all zeros
204 for (i = 0; i < TEST_SOURCES; i++)
205 memset(buffs[i], 0, TEST_LEN);
206
207 memset(dest1, 0, TEST_LEN);
208 memset(dest2, 0, TEST_LEN);
209 memset(dest3, 0, TEST_LEN);
210 memset(dest4, 0, TEST_LEN);
211 memset(dest5, 0, TEST_LEN);
212 memset(dest_ref1, 0, TEST_LEN);
213 memset(dest_ref2, 0, TEST_LEN);
214 memset(dest_ref3, 0, TEST_LEN);
215 memset(dest_ref4, 0, TEST_LEN);
216 memset(dest_ref5, 0, TEST_LEN);
217 memset(g1, 2, TEST_SOURCES);
218 memset(g2, 1, TEST_SOURCES);
219 memset(g3, 7, TEST_SOURCES);
220 memset(g4, 9, TEST_SOURCES);
221 memset(g5, 4, TEST_SOURCES);
222
223 for (i = 0; i < TEST_SOURCES; i++) {
224 gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
225 gf_vect_mul_init(g2[i], &g_tbls[32 * TEST_SOURCES + i * 32]);
226 gf_vect_mul_init(g3[i], &g_tbls[64 * TEST_SOURCES + i * 32]);
227 gf_vect_mul_init(g4[i], &g_tbls[96 * TEST_SOURCES + i * 32]);
228 gf_vect_mul_init(g5[i], &g_tbls[128 * TEST_SOURCES + i * 32]);
229 }
230
231 gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
232 gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs,
233 dest_ref2);
234 gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], buffs,
235 dest_ref3);
236 gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], buffs,
237 dest_ref4);
238 gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES], buffs,
239 dest_ref5);
240
241 FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
242
243 if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
244 printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test1\n");
245 dump_matrix(buffs, 5, TEST_SOURCES);
246 printf("dprod_base:");
247 dump(dest_ref1, 25);
248 printf("dprod_dut:");
249 dump(dest1, 25);
250 return -1;
251 }
252 if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
253 printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test2\n");
254 dump_matrix(buffs, 5, TEST_SOURCES);
255 printf("dprod_base:");
256 dump(dest_ref2, 25);
257 printf("dprod_dut:");
258 dump(dest2, 25);
259 return -1;
260 }
261 if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) {
262 printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test3\n");
263 dump_matrix(buffs, 5, TEST_SOURCES);
264 printf("dprod_base:");
265 dump(dest_ref3, 25);
266 printf("dprod_dut:");
267 dump(dest3, 25);
268 return -1;
269 }
270 if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) {
271 printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test4\n");
272 dump_matrix(buffs, 5, TEST_SOURCES);
273 printf("dprod_base:");
274 dump(dest_ref4, 25);
275 printf("dprod_dut:");
276 dump(dest4, 25);
277 return -1;
278 }
279 if (0 != memcmp(dest_ref5, dest5, TEST_LEN)) {
280 printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test5\n");
281 dump_matrix(buffs, 5, TEST_SOURCES);
282 printf("dprod_base:");
283 dump(dest_ref5, 25);
284 printf("dprod_dut:");
285 dump(dest5, 25);
286 return -1;
287 }
288 putchar('.');
289
290 // Rand data test
291
292 for (rtest = 0; rtest < RANDOMS; rtest++) {
293 for (i = 0; i < TEST_SOURCES; i++)
294 for (j = 0; j < TEST_LEN; j++)
295 buffs[i][j] = rand();
296
297 for (i = 0; i < TEST_SOURCES; i++) {
298 g1[i] = rand();
299 g2[i] = rand();
300 g3[i] = rand();
301 g4[i] = rand();
302 g5[i] = rand();
303 }
304
305 for (i = 0; i < TEST_SOURCES; i++) {
306 gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
307 gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]);
308 gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]);
309 gf_vect_mul_init(g4[i], &g_tbls[(96 * TEST_SOURCES) + (i * 32)]);
310 gf_vect_mul_init(g5[i], &g_tbls[(128 * TEST_SOURCES) + (i * 32)]);
311 }
312
313 gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
314 gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES],
315 buffs, dest_ref2);
316 gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES],
317 buffs, dest_ref3);
318 gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES],
319 buffs, dest_ref4);
320 gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES],
321 buffs, dest_ref5);
322
323 FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
324
325 if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
326 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest);
327 dump_matrix(buffs, 5, TEST_SOURCES);
328 printf("dprod_base:");
329 dump(dest_ref1, 25);
330 printf("dprod_dut:");
331 dump(dest1, 25);
332 return -1;
333 }
334 if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
335 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest);
336 dump_matrix(buffs, 5, TEST_SOURCES);
337 printf("dprod_base:");
338 dump(dest_ref2, 25);
339 printf("dprod_dut:");
340 dump(dest2, 25);
341 return -1;
342 }
343 if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) {
344 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest);
345 dump_matrix(buffs, 5, TEST_SOURCES);
346 printf("dprod_base:");
347 dump(dest_ref3, 25);
348 printf("dprod_dut:");
349 dump(dest3, 25);
350 return -1;
351 }
352 if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) {
353 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test4 %d\n", rtest);
354 dump_matrix(buffs, 5, TEST_SOURCES);
355 printf("dprod_base:");
356 dump(dest_ref4, 25);
357 printf("dprod_dut:");
358 dump(dest4, 25);
359 return -1;
360 }
361 if (0 != memcmp(dest_ref5, dest5, TEST_LEN)) {
362 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test5 %d\n", rtest);
363 dump_matrix(buffs, 5, TEST_SOURCES);
364 printf("dprod_base:");
365 dump(dest_ref5, 25);
366 printf("dprod_dut:");
367 dump(dest5, 25);
368 return -1;
369 }
370
371 putchar('.');
372 }
373
374 // Rand data test with varied parameters
375 for (rtest = 0; rtest < RANDOMS; rtest++) {
376 for (srcs = TEST_SOURCES; srcs > 0; srcs--) {
377 for (i = 0; i < srcs; i++)
378 for (j = 0; j < TEST_LEN; j++)
379 buffs[i][j] = rand();
380
381 for (i = 0; i < srcs; i++) {
382 g1[i] = rand();
383 g2[i] = rand();
384 g3[i] = rand();
385 g4[i] = rand();
386 g5[i] = rand();
387 }
388
389 for (i = 0; i < srcs; i++) {
390 gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
391 gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]);
392 gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]);
393 gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]);
394 gf_vect_mul_init(g5[i], &g_tbls[(128 * srcs) + (i * 32)]);
395 }
396
397 gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref1);
398 gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[32 * srcs], buffs,
399 dest_ref2);
400 gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[64 * srcs], buffs,
401 dest_ref3);
402 gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[96 * srcs], buffs,
403 dest_ref4);
404 gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[128 * srcs], buffs,
405 dest_ref5);
406
407 FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest_ptrs);
408
409 if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
410 printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
411 " test1 srcs=%d\n", srcs);
412 dump_matrix(buffs, 5, TEST_SOURCES);
413 printf("dprod_base:");
414 dump(dest_ref1, 25);
415 printf("dprod_dut:");
416 dump(dest1, 25);
417 return -1;
418 }
419 if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
420 printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
421 " test2 srcs=%d\n", srcs);
422 dump_matrix(buffs, 5, TEST_SOURCES);
423 printf("dprod_base:");
424 dump(dest_ref2, 25);
425 printf("dprod_dut:");
426 dump(dest2, 25);
427 return -1;
428 }
429 if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) {
430 printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
431 " test3 srcs=%d\n", srcs);
432 dump_matrix(buffs, 5, TEST_SOURCES);
433 printf("dprod_base:");
434 dump(dest_ref3, 25);
435 printf("dprod_dut:");
436 dump(dest3, 25);
437 return -1;
438 }
439 if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) {
440 printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
441 " test4 srcs=%d\n", srcs);
442 dump_matrix(buffs, 5, TEST_SOURCES);
443 printf("dprod_base:");
444 dump(dest_ref4, 25);
445 printf("dprod_dut:");
446 dump(dest4, 25);
447 return -1;
448 }
449 if (0 != memcmp(dest_ref5, dest5, TEST_LEN)) {
450 printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
451 " test5 srcs=%d\n", srcs);
452 dump_matrix(buffs, 5, TEST_SOURCES);
453 printf("dprod_base:");
454 dump(dest_ref5, 25);
455 printf("dprod_dut:");
456 dump(dest5, 25);
457 return -1;
458 }
459
460 putchar('.');
461 }
462 }
463
464 // Run tests at end of buffer for Electric Fence
465 align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
466 for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) {
467 for (i = 0; i < TEST_SOURCES; i++)
468 for (j = 0; j < TEST_LEN; j++)
469 buffs[i][j] = rand();
470
471 for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end
472 efence_buffs[i] = buffs[i] + TEST_LEN - size;
473
474 for (i = 0; i < TEST_SOURCES; i++) {
475 g1[i] = rand();
476 g2[i] = rand();
477 g3[i] = rand();
478 g4[i] = rand();
479 g5[i] = rand();
480 }
481
482 for (i = 0; i < TEST_SOURCES; i++) {
483 gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
484 gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]);
485 gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]);
486 gf_vect_mul_init(g4[i], &g_tbls[(96 * TEST_SOURCES) + (i * 32)]);
487 gf_vect_mul_init(g5[i], &g_tbls[(128 * TEST_SOURCES) + (i * 32)]);
488 }
489
490 gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref1);
491 gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES],
492 efence_buffs, dest_ref2);
493 gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES],
494 efence_buffs, dest_ref3);
495 gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES],
496 efence_buffs, dest_ref4);
497 gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES],
498 efence_buffs, dest_ref5);
499
500 FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest_ptrs);
501
502 if (0 != memcmp(dest_ref1, dest1, size)) {
503 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest);
504 dump_matrix(efence_buffs, 5, TEST_SOURCES);
505 printf("dprod_base:");
506 dump(dest_ref1, align);
507 printf("dprod_dut:");
508 dump(dest1, align);
509 return -1;
510 }
511
512 if (0 != memcmp(dest_ref2, dest2, size)) {
513 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest);
514 dump_matrix(efence_buffs, 5, TEST_SOURCES);
515 printf("dprod_base:");
516 dump(dest_ref2, align);
517 printf("dprod_dut:");
518 dump(dest2, align);
519 return -1;
520 }
521
522 if (0 != memcmp(dest_ref3, dest3, size)) {
523 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest);
524 dump_matrix(efence_buffs, 5, TEST_SOURCES);
525 printf("dprod_base:");
526 dump(dest_ref3, align);
527 printf("dprod_dut:");
528 dump(dest3, align);
529 return -1;
530 }
531
532 if (0 != memcmp(dest_ref4, dest4, size)) {
533 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test4 %d\n", rtest);
534 dump_matrix(efence_buffs, 5, TEST_SOURCES);
535 printf("dprod_base:");
536 dump(dest_ref4, align);
537 printf("dprod_dut:");
538 dump(dest4, align);
539 return -1;
540 }
541
542 if (0 != memcmp(dest_ref5, dest5, size)) {
543 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test5 %d\n", rtest);
544 dump_matrix(efence_buffs, 5, TEST_SOURCES);
545 printf("dprod_base:");
546 dump(dest_ref5, align);
547 printf("dprod_dut:");
548 dump(dest5, align);
549 return -1;
550 }
551
552 putchar('.');
553 }
554
555 // Test rand ptr alignment if available
556
557 for (rtest = 0; rtest < RANDOMS; rtest++) {
558 size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1);
559 srcs = rand() % TEST_SOURCES;
560 if (srcs == 0)
561 continue;
562
563 offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B;
564 // Add random offsets
565 for (i = 0; i < srcs; i++)
566 ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
567
568 udest_ptrs[0] = dest1 + (rand() & (PTR_ALIGN_CHK_B - offset));
569 udest_ptrs[1] = dest2 + (rand() & (PTR_ALIGN_CHK_B - offset));
570 udest_ptrs[2] = dest3 + (rand() & (PTR_ALIGN_CHK_B - offset));
571 udest_ptrs[3] = dest4 + (rand() & (PTR_ALIGN_CHK_B - offset));
572 udest_ptrs[4] = dest5 + (rand() & (PTR_ALIGN_CHK_B - offset));
573
574 memset(dest1, 0, TEST_LEN); // zero pad to check write-over
575 memset(dest2, 0, TEST_LEN);
576 memset(dest3, 0, TEST_LEN);
577 memset(dest4, 0, TEST_LEN);
578 memset(dest5, 0, TEST_LEN);
579
580 for (i = 0; i < srcs; i++)
581 for (j = 0; j < size; j++)
582 ubuffs[i][j] = rand();
583
584 for (i = 0; i < srcs; i++) {
585 g1[i] = rand();
586 g2[i] = rand();
587 g3[i] = rand();
588 g4[i] = rand();
589 g5[i] = rand();
590 }
591
592 for (i = 0; i < srcs; i++) {
593 gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
594 gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]);
595 gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]);
596 gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]);
597 gf_vect_mul_init(g5[i], &g_tbls[(128 * srcs) + (i * 32)]);
598 }
599
600 gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref1);
601 gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], ubuffs, dest_ref2);
602 gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], ubuffs, dest_ref3);
603 gf_vect_dot_prod_base(size, srcs, &g_tbls[96 * srcs], ubuffs, dest_ref4);
604 gf_vect_dot_prod_base(size, srcs, &g_tbls[128 * srcs], ubuffs, dest_ref5);
605
606 FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptrs);
607
608 if (memcmp(dest_ref1, udest_ptrs[0], size)) {
609 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
610 srcs);
611 dump_matrix(ubuffs, 5, TEST_SOURCES);
612 printf("dprod_base:");
613 dump(dest_ref1, 25);
614 printf("dprod_dut:");
615 dump(udest_ptrs[0], 25);
616 return -1;
617 }
618 if (memcmp(dest_ref2, udest_ptrs[1], size)) {
619 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
620 srcs);
621 dump_matrix(ubuffs, 5, TEST_SOURCES);
622 printf("dprod_base:");
623 dump(dest_ref2, 25);
624 printf("dprod_dut:");
625 dump(udest_ptrs[1], 25);
626 return -1;
627 }
628 if (memcmp(dest_ref3, udest_ptrs[2], size)) {
629 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
630 srcs);
631 dump_matrix(ubuffs, 5, TEST_SOURCES);
632 printf("dprod_base:");
633 dump(dest_ref3, 25);
634 printf("dprod_dut:");
635 dump(udest_ptrs[2], 25);
636 return -1;
637 }
638 if (memcmp(dest_ref4, udest_ptrs[3], size)) {
639 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
640 srcs);
641 dump_matrix(ubuffs, 5, TEST_SOURCES);
642 printf("dprod_base:");
643 dump(dest_ref4, 25);
644 printf("dprod_dut:");
645 dump(udest_ptrs[3], 25);
646 return -1;
647 }
648 if (memcmp(dest_ref5, udest_ptrs[4], size)) {
649 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
650 srcs);
651 dump_matrix(ubuffs, 5, TEST_SOURCES);
652 printf("dprod_base:");
653 dump(dest_ref5, 25);
654 printf("dprod_dut:");
655 dump(udest_ptrs[4], 25);
656 return -1;
657 }
658 // Confirm that padding around dests is unchanged
659 memset(dest_ref1, 0, PTR_ALIGN_CHK_B); // Make reference zero buff
660 offset = udest_ptrs[0] - dest1;
661
662 if (memcmp(dest1, dest_ref1, offset)) {
663 printf("Fail rand ualign pad1 start\n");
664 return -1;
665 }
666 if (memcmp(dest1 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
667 printf("Fail rand ualign pad1 end\n");
668 return -1;
669 }
670
671 offset = udest_ptrs[1] - dest2;
672 if (memcmp(dest2, dest_ref1, offset)) {
673 printf("Fail rand ualign pad2 start\n");
674 return -1;
675 }
676 if (memcmp(dest2 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
677 printf("Fail rand ualign pad2 end\n");
678 return -1;
679 }
680
681 offset = udest_ptrs[2] - dest3;
682 if (memcmp(dest3, dest_ref1, offset)) {
683 printf("Fail rand ualign pad3 start\n");
684 return -1;
685 }
686 if (memcmp(dest3 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
687 printf("Fail rand ualign pad3 end\n");
688 return -1;
689 }
690
691 offset = udest_ptrs[3] - dest4;
692 if (memcmp(dest4, dest_ref1, offset)) {
693 printf("Fail rand ualign pad4 start\n");
694 return -1;
695 }
696 if (memcmp(dest4 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
697 printf("Fail rand ualign pad4 end\n");
698 return -1;
699 }
700
701 offset = udest_ptrs[4] - dest5;
702 if (memcmp(dest5, dest_ref1, offset)) {
703 printf("Fail rand ualign pad5 start\n");
704 return -1;
705 }
706 if (memcmp(dest5 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
707 printf("Fail rand ualign pad5 end\n");
708 return -1;
709 }
710
711 putchar('.');
712 }
713
714 // Test all size alignment
715 align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
716
717 for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) {
718 srcs = TEST_SOURCES;
719
720 for (i = 0; i < srcs; i++)
721 for (j = 0; j < size; j++)
722 buffs[i][j] = rand();
723
724 for (i = 0; i < srcs; i++) {
725 g1[i] = rand();
726 g2[i] = rand();
727 g3[i] = rand();
728 g4[i] = rand();
729 g5[i] = rand();
730 }
731
732 for (i = 0; i < srcs; i++) {
733 gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
734 gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]);
735 gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]);
736 gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]);
737 gf_vect_mul_init(g5[i], &g_tbls[(128 * srcs) + (i * 32)]);
738 }
739
740 gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref1);
741 gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], buffs, dest_ref2);
742 gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], buffs, dest_ref3);
743 gf_vect_dot_prod_base(size, srcs, &g_tbls[96 * srcs], buffs, dest_ref4);
744 gf_vect_dot_prod_base(size, srcs, &g_tbls[128 * srcs], buffs, dest_ref5);
745
746 FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest_ptrs);
747
748 if (memcmp(dest_ref1, dest_ptrs[0], size)) {
749 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
750 size);
751 dump_matrix(buffs, 5, TEST_SOURCES);
752 printf("dprod_base:");
753 dump(dest_ref1, 25);
754 printf("dprod_dut:");
755 dump(dest_ptrs[0], 25);
756
757 return -1;
758 }
759 if (memcmp(dest_ref2, dest_ptrs[1], size)) {
760 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
761 size);
762 dump_matrix(buffs, 5, TEST_SOURCES);
763 printf("dprod_base:");
764 dump(dest_ref2, 25);
765 printf("dprod_dut:");
766 dump(dest_ptrs[1], 25);
767 return -1;
768 }
769 if (memcmp(dest_ref3, dest_ptrs[2], size)) {
770 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
771 size);
772 dump_matrix(buffs, 5, TEST_SOURCES);
773 printf("dprod_base:");
774 dump(dest_ref3, 25);
775 printf("dprod_dut:");
776 dump(dest_ptrs[2], 25);
777 return -1;
778 }
779 if (memcmp(dest_ref4, dest_ptrs[3], size)) {
780 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
781 size);
782 dump_matrix(buffs, 5, TEST_SOURCES);
783 printf("dprod_base:");
784 dump(dest_ref4, 25);
785 printf("dprod_dut:");
786 dump(dest_ptrs[3], 25);
787 return -1;
788 }
789 if (memcmp(dest_ref5, dest_ptrs[4], size)) {
790 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
791 size);
792 dump_matrix(buffs, 5, TEST_SOURCES);
793 printf("dprod_base:");
794 dump(dest_ref5, 25);
795 printf("dprod_dut:");
796 dump(dest_ptrs[4], 25);
797 return -1;
798 }
799 }
800
801 printf("Pass\n");
802 return 0;
803
804 }
106106 %define return rax
107107 %define return.w eax
108108
109 %define func(x) x:
109 %define func(x) x: endbranch
110110 %macro FUNC_SAVE 0
111111 push r12
112112 push r13
177177
178178
179179 align 16
180 global gf_5vect_mad_avx:ISAL_SYM_TYPE_FUNCTION
180 mk_global gf_5vect_mad_avx, function
181181 func(gf_5vect_mad_avx)
182182 FUNC_SAVE
183183 sub len, 16
102102 %define return rax
103103 %define return.w eax
104104
105 %define func(x) x:
105 %define func(x) x: endbranch
106106 %define FUNC_SAVE
107107 %define FUNC_RESTORE
108108 %endif
165165 %define xd5 ymm9
166166
167167 align 16
168 global gf_5vect_mad_avx2:ISAL_SYM_TYPE_FUNCTION
168 mk_global gf_5vect_mad_avx2, function
169169 func(gf_5vect_mad_avx2)
170170 FUNC_SAVE
171171 sub len, 32
4444 %define tmp r11
4545 %define tmp2 r10
4646 %define return rax
47 %define func(x) x:
47 %define func(x) x: endbranch
4848 %define FUNC_SAVE
4949 %define FUNC_RESTORE
5050 %endif
166166 %define xtmph5 zmm27
167167
168168 align 16
169 global gf_5vect_mad_avx512:ISAL_SYM_TYPE_FUNCTION
169 mk_global gf_5vect_mad_avx512, function
170170 func(gf_5vect_mad_avx512)
171171 FUNC_SAVE
172172 sub len, 64
106106 %define return rax
107107 %define return.w eax
108108
109 %define func(x) x:
109 %define func(x) x: endbranch
110110 %macro FUNC_SAVE 0
111111 push r12
112112 push r13
176176
177177
178178 align 16
179 global gf_5vect_mad_sse:ISAL_SYM_TYPE_FUNCTION
179 mk_global gf_5vect_mad_sse, function
180180 func(gf_5vect_mad_sse)
181181 FUNC_SAVE
182182 sub len, 16
5050 %define PS 8
5151 %define LOG_PS 3
5252
53 %define func(x) x:
53 %define func(x) x: endbranch
5454 %macro FUNC_SAVE 0
5555 push r12
5656 push r13
8888 %define func(x) proc_frame x
8989 %macro FUNC_SAVE 0
9090 alloc_stack stack_size
91 save_xmm128 xmm6, 0*16
92 save_xmm128 xmm7, 1*16
93 save_xmm128 xmm8, 2*16
94 save_xmm128 xmm9, 3*16
95 save_xmm128 xmm10, 4*16
96 save_xmm128 xmm11, 5*16
97 save_xmm128 xmm12, 6*16
98 save_xmm128 xmm13, 7*16
99 save_xmm128 xmm14, 8*16
100 save_xmm128 xmm15, 9*16
91 vmovdqa [rsp + 0*16], xmm6
92 vmovdqa [rsp + 1*16], xmm7
93 vmovdqa [rsp + 2*16], xmm8
94 vmovdqa [rsp + 3*16], xmm9
95 vmovdqa [rsp + 4*16], xmm10
96 vmovdqa [rsp + 5*16], xmm11
97 vmovdqa [rsp + 6*16], xmm12
98 vmovdqa [rsp + 7*16], xmm13
99 vmovdqa [rsp + 8*16], xmm14
100 vmovdqa [rsp + 9*16], xmm15
101101 save_reg r12, 10*16 + 0*8
102102 save_reg r13, 10*16 + 1*8
103103 save_reg r14, 10*16 + 2*8
181181 %define xp6 xmm7
182182
183183 align 16
184 global gf_6vect_dot_prod_avx:ISAL_SYM_TYPE_FUNCTION
184 mk_global gf_6vect_dot_prod_avx, function
185185 func(gf_6vect_dot_prod_avx)
186186 FUNC_SAVE
187187 sub len, 16
5252 %define PS 8
5353 %define LOG_PS 3
5454
55 %define func(x) x:
55 %define func(x) x: endbranch
5656 %macro FUNC_SAVE 0
5757 push r12
5858 push r13
186186 %define xp6 ymm7
187187
188188 align 16
189 global gf_6vect_dot_prod_avx2:ISAL_SYM_TYPE_FUNCTION
189 mk_global gf_6vect_dot_prod_avx2, function
190190 func(gf_6vect_dot_prod_avx2)
191191 FUNC_SAVE
192192 sub len, 32
5656 %define PS 8
5757 %define LOG_PS 3
5858
59 %define func(x) x:
59 %define func(x) x: endbranch
6060 %macro FUNC_SAVE 0
6161 push r12
6262 push r13
112112 vmovdqa [rsp + 7*16], xmm13
113113 vmovdqa [rsp + 8*16], xmm14
114114 vmovdqa [rsp + 9*16], xmm15
115 save_reg r12, 9*16 + 0*8
116 save_reg r13, 9*16 + 1*8
117 save_reg r14, 9*16 + 2*8
118 save_reg r15, 9*16 + 3*8
119 save_reg rdi, 9*16 + 4*8
120 save_reg rsi, 9*16 + 5*8
121 save_reg rbp, 9*16 + 6*8
122 save_reg rbx, 9*16 + 7*8
115 save_reg r12, 10*16 + 0*8
116 save_reg r13, 10*16 + 1*8
117 save_reg r14, 10*16 + 2*8
118 save_reg r15, 10*16 + 3*8
119 save_reg rdi, 10*16 + 4*8
120 save_reg rsi, 10*16 + 5*8
121 save_reg rbp, 10*16 + 6*8
122 save_reg rbx, 10*16 + 7*8
123123 end_prolog
124124 mov arg4, arg(4)
125125 %endmacro
135135 vmovdqa xmm13, [rsp + 7*16]
136136 vmovdqa xmm14, [rsp + 8*16]
137137 vmovdqa xmm15, [rsp + 9*16]
138 mov r12, [rsp + 9*16 + 0*8]
139 mov r13, [rsp + 9*16 + 1*8]
140 mov r14, [rsp + 9*16 + 2*8]
141 mov r15, [rsp + 9*16 + 3*8]
142 mov rdi, [rsp + 9*16 + 4*8]
143 mov rsi, [rsp + 9*16 + 5*8]
144 mov rbp, [rsp + 9*16 + 6*8]
145 mov rbx, [rsp + 9*16 + 7*8]
138 mov r12, [rsp + 10*16 + 0*8]
139 mov r13, [rsp + 10*16 + 1*8]
140 mov r14, [rsp + 10*16 + 2*8]
141 mov r15, [rsp + 10*16 + 3*8]
142 mov rdi, [rsp + 10*16 + 4*8]
143 mov rsi, [rsp + 10*16 + 5*8]
144 mov rbp, [rsp + 10*16 + 6*8]
145 mov rbx, [rsp + 10*16 + 7*8]
146146 add rsp, stack_size
147147 %endmacro
148148 %endif
214214 section .text
215215
216216 align 16
217 global gf_6vect_dot_prod_avx512:ISAL_SYM_TYPE_FUNCTION
217 mk_global gf_6vect_dot_prod_avx512, function
218218 func(gf_6vect_dot_prod_avx512)
219219 FUNC_SAVE
220220 sub len, 64
5050 %define PS 8
5151 %define LOG_PS 3
5252
53 %define func(x) x:
53 %define func(x) x: endbranch
5454 %macro FUNC_SAVE 0
5555 push r12
5656 push r13
181181 %define xp6 xmm13
182182
183183 align 16
184 global gf_6vect_dot_prod_sse:ISAL_SYM_TYPE_FUNCTION
184 mk_global gf_6vect_dot_prod_sse, function
185185 func(gf_6vect_dot_prod_sse)
186186 FUNC_SAVE
187187 sub len, 16
+0
-911
erasure_code/gf_6vect_dot_prod_sse_test.c less more
0 /**********************************************************************
1 Copyright(c) 2011-2015 Intel Corporation All rights reserved.
2
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
5 are met:
6 * Redistributions of source code must retain the above copyright
7 notice, this list of conditions and the following disclaimer.
8 * Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in
10 the documentation and/or other materials provided with the
11 distribution.
12 * Neither the name of Intel Corporation nor the names of its
13 contributors may be used to endorse or promote products derived
14 from this software without specific prior written permission.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 **********************************************************************/
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h> // for memset, memcmp
32 #include "erasure_code.h"
33 #include "types.h"
34
35 #ifndef FUNCTION_UNDER_TEST
36 # define FUNCTION_UNDER_TEST gf_6vect_dot_prod_sse
37 #endif
38 #ifndef TEST_MIN_SIZE
39 # define TEST_MIN_SIZE 16
40 #endif
41
42 #define str(s) #s
43 #define xstr(s) str(s)
44
45 #define TEST_LEN 8192
46 #define TEST_SIZE (TEST_LEN/2)
47 #define TEST_MEM TEST_SIZE
48 #define TEST_LOOPS 20000
49 #define TEST_TYPE_STR ""
50
51 #ifndef TEST_SOURCES
52 # define TEST_SOURCES 16
53 #endif
54 #ifndef RANDOMS
55 # define RANDOMS 20
56 #endif
57
58 #ifdef EC_ALIGNED_ADDR
59 // Define power of 2 range to check ptr, len alignment
60 # define PTR_ALIGN_CHK_B 0
61 # define LEN_ALIGN_CHK_B 0 // 0 for aligned only
62 #else
63 // Define power of 2 range to check ptr, len alignment
64 # define PTR_ALIGN_CHK_B 32
65 # define LEN_ALIGN_CHK_B 32 // 0 for aligned only
66 #endif
67
68 typedef unsigned char u8;
69
70 void dump(unsigned char *buf, int len)
71 {
72 int i;
73 for (i = 0; i < len;) {
74 printf(" %2x", 0xff & buf[i++]);
75 if (i % 32 == 0)
76 printf("\n");
77 }
78 printf("\n");
79 }
80
81 void dump_matrix(unsigned char **s, int k, int m)
82 {
83 int i, j;
84 for (i = 0; i < k; i++) {
85 for (j = 0; j < m; j++) {
86 printf(" %2x", s[i][j]);
87 }
88 printf("\n");
89 }
90 printf("\n");
91 }
92
93 void dump_u8xu8(unsigned char *s, int k, int m)
94 {
95 int i, j;
96 for (i = 0; i < k; i++) {
97 for (j = 0; j < m; j++) {
98 printf(" %2x", 0xff & s[j + (i * m)]);
99 }
100 printf("\n");
101 }
102 printf("\n");
103 }
104
105 int main(int argc, char *argv[])
106 {
107 int i, j, rtest, srcs;
108 void *buf;
109 u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g3[TEST_SOURCES];
110 u8 g4[TEST_SOURCES], g5[TEST_SOURCES], g6[TEST_SOURCES], *g_tbls;
111 u8 *dest1, *dest2, *dest3, *dest4, *dest5, *dest6, *dest_ref1;
112 u8 *dest_ref2, *dest_ref3, *dest_ref4, *dest_ref5, *dest_ref6;
113 u8 *dest_ptrs[6], *buffs[TEST_SOURCES];
114
115 int align, size;
116 unsigned char *efence_buffs[TEST_SOURCES];
117 unsigned int offset;
118 u8 *ubuffs[TEST_SOURCES];
119 u8 *udest_ptrs[6];
120 printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN);
121
122 // Allocate the arrays
123 for (i = 0; i < TEST_SOURCES; i++) {
124 if (posix_memalign(&buf, 64, TEST_LEN)) {
125 printf("alloc error: Fail");
126 return -1;
127 }
128 buffs[i] = buf;
129 }
130
131 if (posix_memalign(&buf, 16, 2 * (6 * TEST_SOURCES * 32))) {
132 printf("alloc error: Fail");
133 return -1;
134 }
135 g_tbls = buf;
136
137 if (posix_memalign(&buf, 64, TEST_LEN)) {
138 printf("alloc error: Fail");
139 return -1;
140 }
141 dest1 = buf;
142
143 if (posix_memalign(&buf, 64, TEST_LEN)) {
144 printf("alloc error: Fail");
145 return -1;
146 }
147 dest2 = buf;
148
149 if (posix_memalign(&buf, 64, TEST_LEN)) {
150 printf("alloc error: Fail");
151 return -1;
152 }
153 dest3 = buf;
154
155 if (posix_memalign(&buf, 64, TEST_LEN)) {
156 printf("alloc error: Fail");
157 return -1;
158 }
159 dest4 = buf;
160
161 if (posix_memalign(&buf, 64, TEST_LEN)) {
162 printf("alloc error: Fail");
163 return -1;
164 }
165 dest5 = buf;
166
167 if (posix_memalign(&buf, 64, TEST_LEN)) {
168 printf("alloc error: Fail");
169 return -1;
170 }
171 dest6 = buf;
172
173 if (posix_memalign(&buf, 64, TEST_LEN)) {
174 printf("alloc error: Fail");
175 return -1;
176 }
177 dest_ref1 = buf;
178
179 if (posix_memalign(&buf, 64, TEST_LEN)) {
180 printf("alloc error: Fail");
181 return -1;
182 }
183 dest_ref2 = buf;
184
185 if (posix_memalign(&buf, 64, TEST_LEN)) {
186 printf("alloc error: Fail");
187 return -1;
188 }
189 dest_ref3 = buf;
190
191 if (posix_memalign(&buf, 64, TEST_LEN)) {
192 printf("alloc error: Fail");
193 return -1;
194 }
195 dest_ref4 = buf;
196
197 if (posix_memalign(&buf, 64, TEST_LEN)) {
198 printf("alloc error: Fail");
199 return -1;
200 }
201 dest_ref5 = buf;
202
203 if (posix_memalign(&buf, 64, TEST_LEN)) {
204 printf("alloc error: Fail");
205 return -1;
206 }
207 dest_ref6 = buf;
208
209 dest_ptrs[0] = dest1;
210 dest_ptrs[1] = dest2;
211 dest_ptrs[2] = dest3;
212 dest_ptrs[3] = dest4;
213 dest_ptrs[4] = dest5;
214 dest_ptrs[5] = dest6;
215
216 // Test of all zeros
217 for (i = 0; i < TEST_SOURCES; i++)
218 memset(buffs[i], 0, TEST_LEN);
219
220 memset(dest1, 0, TEST_LEN);
221 memset(dest2, 0, TEST_LEN);
222 memset(dest3, 0, TEST_LEN);
223 memset(dest4, 0, TEST_LEN);
224 memset(dest5, 0, TEST_LEN);
225 memset(dest6, 0, TEST_LEN);
226 memset(dest_ref1, 0, TEST_LEN);
227 memset(dest_ref2, 0, TEST_LEN);
228 memset(dest_ref3, 0, TEST_LEN);
229 memset(dest_ref4, 0, TEST_LEN);
230 memset(dest_ref5, 0, TEST_LEN);
231 memset(dest_ref6, 0, TEST_LEN);
232 memset(g1, 2, TEST_SOURCES);
233 memset(g2, 1, TEST_SOURCES);
234 memset(g3, 7, TEST_SOURCES);
235 memset(g4, 9, TEST_SOURCES);
236 memset(g5, 4, TEST_SOURCES);
237 memset(g6, 0xe6, TEST_SOURCES);
238
239 for (i = 0; i < TEST_SOURCES; i++) {
240 gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
241 gf_vect_mul_init(g2[i], &g_tbls[32 * TEST_SOURCES + i * 32]);
242 gf_vect_mul_init(g3[i], &g_tbls[64 * TEST_SOURCES + i * 32]);
243 gf_vect_mul_init(g4[i], &g_tbls[96 * TEST_SOURCES + i * 32]);
244 gf_vect_mul_init(g5[i], &g_tbls[128 * TEST_SOURCES + i * 32]);
245 gf_vect_mul_init(g6[i], &g_tbls[160 * TEST_SOURCES + i * 32]);
246 }
247
248 gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
249 gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs,
250 dest_ref2);
251 gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], buffs,
252 dest_ref3);
253 gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], buffs,
254 dest_ref4);
255 gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES], buffs,
256 dest_ref5);
257 gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[160 * TEST_SOURCES], buffs,
258 dest_ref6);
259
260 FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
261
262 if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
263 printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test1\n");
264 dump_matrix(buffs, 5, TEST_SOURCES);
265 printf("dprod_base:");
266 dump(dest_ref1, 25);
267 printf("dprod_dut:");
268 dump(dest1, 25);
269 return -1;
270 }
271 if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
272 printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test2\n");
273 dump_matrix(buffs, 5, TEST_SOURCES);
274 printf("dprod_base:");
275 dump(dest_ref2, 25);
276 printf("dprod_dut:");
277 dump(dest2, 25);
278 return -1;
279 }
280 if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) {
281 printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test3\n");
282 dump_matrix(buffs, 5, TEST_SOURCES);
283 printf("dprod_base:");
284 dump(dest_ref3, 25);
285 printf("dprod_dut:");
286 dump(dest3, 25);
287 return -1;
288 }
289 if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) {
290 printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test4\n");
291 dump_matrix(buffs, 5, TEST_SOURCES);
292 printf("dprod_base:");
293 dump(dest_ref4, 25);
294 printf("dprod_dut:");
295 dump(dest4, 25);
296 return -1;
297 }
298 if (0 != memcmp(dest_ref5, dest5, TEST_LEN)) {
299 printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test5\n");
300 dump_matrix(buffs, 5, TEST_SOURCES);
301 printf("dprod_base:");
302 dump(dest_ref5, 25);
303 printf("dprod_dut:");
304 dump(dest5, 25);
305 return -1;
306 }
307 if (0 != memcmp(dest_ref6, dest6, TEST_LEN)) {
308 printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test6\n");
309 dump_matrix(buffs, 5, TEST_SOURCES);
310 printf("dprod_base:");
311 dump(dest_ref6, 25);
312 printf("dprod_dut:");
313 dump(dest6, 25);
314 return -1;
315 }
316 putchar('.');
317
318 // Rand data test
319
320 for (rtest = 0; rtest < RANDOMS; rtest++) {
321 for (i = 0; i < TEST_SOURCES; i++)
322 for (j = 0; j < TEST_LEN; j++)
323 buffs[i][j] = rand();
324
325 for (i = 0; i < TEST_SOURCES; i++) {
326 g1[i] = rand();
327 g2[i] = rand();
328 g3[i] = rand();
329 g4[i] = rand();
330 g5[i] = rand();
331 g6[i] = rand();
332 }
333
334 for (i = 0; i < TEST_SOURCES; i++) {
335 gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
336 gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]);
337 gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]);
338 gf_vect_mul_init(g4[i], &g_tbls[(96 * TEST_SOURCES) + (i * 32)]);
339 gf_vect_mul_init(g5[i], &g_tbls[(128 * TEST_SOURCES) + (i * 32)]);
340 gf_vect_mul_init(g6[i], &g_tbls[(160 * TEST_SOURCES) + (i * 32)]);
341 }
342
343 gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
344 gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES],
345 buffs, dest_ref2);
346 gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES],
347 buffs, dest_ref3);
348 gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES],
349 buffs, dest_ref4);
350 gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES],
351 buffs, dest_ref5);
352 gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[160 * TEST_SOURCES],
353 buffs, dest_ref6);
354
355 FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
356
357 if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
358 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest);
359 dump_matrix(buffs, 5, TEST_SOURCES);
360 printf("dprod_base:");
361 dump(dest_ref1, 25);
362 printf("dprod_dut:");
363 dump(dest1, 25);
364 return -1;
365 }
366 if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
367 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest);
368 dump_matrix(buffs, 5, TEST_SOURCES);
369 printf("dprod_base:");
370 dump(dest_ref2, 25);
371 printf("dprod_dut:");
372 dump(dest2, 25);
373 return -1;
374 }
375 if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) {
376 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest);
377 dump_matrix(buffs, 5, TEST_SOURCES);
378 printf("dprod_base:");
379 dump(dest_ref3, 25);
380 printf("dprod_dut:");
381 dump(dest3, 25);
382 return -1;
383 }
384 if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) {
385 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test4 %d\n", rtest);
386 dump_matrix(buffs, 5, TEST_SOURCES);
387 printf("dprod_base:");
388 dump(dest_ref4, 25);
389 printf("dprod_dut:");
390 dump(dest4, 25);
391 return -1;
392 }
393 if (0 != memcmp(dest_ref5, dest5, TEST_LEN)) {
394 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test5 %d\n", rtest);
395 dump_matrix(buffs, 5, TEST_SOURCES);
396 printf("dprod_base:");
397 dump(dest_ref5, 25);
398 printf("dprod_dut:");
399 dump(dest5, 25);
400 return -1;
401 }
402 if (0 != memcmp(dest_ref6, dest6, TEST_LEN)) {
403 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test6 %d\n", rtest);
404 dump_matrix(buffs, 5, TEST_SOURCES);
405 printf("dprod_base:");
406 dump(dest_ref6, 25);
407 printf("dprod_dut:");
408 dump(dest6, 25);
409 return -1;
410 }
411
412 putchar('.');
413 }
414
415 // Rand data test with varied parameters
416 for (rtest = 0; rtest < RANDOMS; rtest++) {
417 for (srcs = TEST_SOURCES; srcs > 0; srcs--) {
418 for (i = 0; i < srcs; i++)
419 for (j = 0; j < TEST_LEN; j++)
420 buffs[i][j] = rand();
421
422 for (i = 0; i < srcs; i++) {
423 g1[i] = rand();
424 g2[i] = rand();
425 g3[i] = rand();
426 g4[i] = rand();
427 g5[i] = rand();
428 g6[i] = rand();
429 }
430
431 for (i = 0; i < srcs; i++) {
432 gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
433 gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]);
434 gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]);
435 gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]);
436 gf_vect_mul_init(g5[i], &g_tbls[(128 * srcs) + (i * 32)]);
437 gf_vect_mul_init(g6[i], &g_tbls[(160 * srcs) + (i * 32)]);
438 }
439
440 gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref1);
441 gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[32 * srcs], buffs,
442 dest_ref2);
443 gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[64 * srcs], buffs,
444 dest_ref3);
445 gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[96 * srcs], buffs,
446 dest_ref4);
447 gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[128 * srcs], buffs,
448 dest_ref5);
449 gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[160 * srcs], buffs,
450 dest_ref6);
451
452 FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest_ptrs);
453
454 if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
455 printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
456 " test1 srcs=%d\n", srcs);
457 dump_matrix(buffs, 5, TEST_SOURCES);
458 printf("dprod_base:");
459 dump(dest_ref1, 25);
460 printf("dprod_dut:");
461 dump(dest1, 25);
462 return -1;
463 }
464 if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
465 printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
466 " test2 srcs=%d\n", srcs);
467 dump_matrix(buffs, 5, TEST_SOURCES);
468 printf("dprod_base:");
469 dump(dest_ref2, 25);
470 printf("dprod_dut:");
471 dump(dest2, 25);
472 return -1;
473 }
474 if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) {
475 printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
476 " test3 srcs=%d\n", srcs);
477 dump_matrix(buffs, 5, TEST_SOURCES);
478 printf("dprod_base:");
479 dump(dest_ref3, 25);
480 printf("dprod_dut:");
481 dump(dest3, 25);
482 return -1;
483 }
484 if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) {
485 printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
486 " test4 srcs=%d\n", srcs);
487 dump_matrix(buffs, 5, TEST_SOURCES);
488 printf("dprod_base:");
489 dump(dest_ref4, 25);
490 printf("dprod_dut:");
491 dump(dest4, 25);
492 return -1;
493 }
494 if (0 != memcmp(dest_ref5, dest5, TEST_LEN)) {
495 printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
496 " test5 srcs=%d\n", srcs);
497 dump_matrix(buffs, 5, TEST_SOURCES);
498 printf("dprod_base:");
499 dump(dest_ref5, 25);
500 printf("dprod_dut:");
501 dump(dest5, 25);
502 return -1;
503 }
504 if (0 != memcmp(dest_ref6, dest6, TEST_LEN)) {
505 printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
506 " test6 srcs=%d\n", srcs);
507 dump_matrix(buffs, 5, TEST_SOURCES);
508 printf("dprod_base:");
509 dump(dest_ref6, 25);
510 printf("dprod_dut:");
511 dump(dest6, 25);
512 return -1;
513 }
514
515 putchar('.');
516 }
517 }
518
519 // Run tests at end of buffer for Electric Fence
520 align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
521 for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) {
522 for (i = 0; i < TEST_SOURCES; i++)
523 for (j = 0; j < TEST_LEN; j++)
524 buffs[i][j] = rand();
525
526 for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end
527 efence_buffs[i] = buffs[i] + TEST_LEN - size;
528
529 for (i = 0; i < TEST_SOURCES; i++) {
530 g1[i] = rand();
531 g2[i] = rand();
532 g3[i] = rand();
533 g4[i] = rand();
534 g5[i] = rand();
535 g6[i] = rand();
536 }
537
538 for (i = 0; i < TEST_SOURCES; i++) {
539 gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
540 gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]);
541 gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]);
542 gf_vect_mul_init(g4[i], &g_tbls[(96 * TEST_SOURCES) + (i * 32)]);
543 gf_vect_mul_init(g5[i], &g_tbls[(128 * TEST_SOURCES) + (i * 32)]);
544 gf_vect_mul_init(g6[i], &g_tbls[(160 * TEST_SOURCES) + (i * 32)]);
545 }
546
547 gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref1);
548 gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES],
549 efence_buffs, dest_ref2);
550 gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES],
551 efence_buffs, dest_ref3);
552 gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES],
553 efence_buffs, dest_ref4);
554 gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES],
555 efence_buffs, dest_ref5);
556 gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[160 * TEST_SOURCES],
557 efence_buffs, dest_ref6);
558
559 FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest_ptrs);
560
561 if (0 != memcmp(dest_ref1, dest1, size)) {
562 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest);
563 dump_matrix(efence_buffs, 5, TEST_SOURCES);
564 printf("dprod_base:");
565 dump(dest_ref1, align);
566 printf("dprod_dut:");
567 dump(dest1, align);
568 return -1;
569 }
570
571 if (0 != memcmp(dest_ref2, dest2, size)) {
572 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest);
573 dump_matrix(efence_buffs, 5, TEST_SOURCES);
574 printf("dprod_base:");
575 dump(dest_ref2, align);
576 printf("dprod_dut:");
577 dump(dest2, align);
578 return -1;
579 }
580
581 if (0 != memcmp(dest_ref3, dest3, size)) {
582 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest);
583 dump_matrix(efence_buffs, 5, TEST_SOURCES);
584 printf("dprod_base:");
585 dump(dest_ref3, align);
586 printf("dprod_dut:");
587 dump(dest3, align);
588 return -1;
589 }
590
591 if (0 != memcmp(dest_ref4, dest4, size)) {
592 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test4 %d\n", rtest);
593 dump_matrix(efence_buffs, 5, TEST_SOURCES);
594 printf("dprod_base:");
595 dump(dest_ref4, align);
596 printf("dprod_dut:");
597 dump(dest4, align);
598 return -1;
599 }
600
601 if (0 != memcmp(dest_ref5, dest5, size)) {
602 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test5 %d\n", rtest);
603 dump_matrix(efence_buffs, 5, TEST_SOURCES);
604 printf("dprod_base:");
605 dump(dest_ref5, align);
606 printf("dprod_dut:");
607 dump(dest5, align);
608 return -1;
609 }
610
611 if (0 != memcmp(dest_ref6, dest6, size)) {
612 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test6 %d\n", rtest);
613 dump_matrix(efence_buffs, 5, TEST_SOURCES);
614 printf("dprod_base:");
615 dump(dest_ref6, align);
616 printf("dprod_dut:");
617 dump(dest6, align);
618 return -1;
619 }
620
621 putchar('.');
622 }
623
624 // Test rand ptr alignment if available
625
626 for (rtest = 0; rtest < RANDOMS; rtest++) {
627 size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1);
628 srcs = rand() % TEST_SOURCES;
629 if (srcs == 0)
630 continue;
631
632 offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B;
633 // Add random offsets
634 for (i = 0; i < srcs; i++)
635 ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
636
637 udest_ptrs[0] = dest1 + (rand() & (PTR_ALIGN_CHK_B - offset));
638 udest_ptrs[1] = dest2 + (rand() & (PTR_ALIGN_CHK_B - offset));
639 udest_ptrs[2] = dest3 + (rand() & (PTR_ALIGN_CHK_B - offset));
640 udest_ptrs[3] = dest4 + (rand() & (PTR_ALIGN_CHK_B - offset));
641 udest_ptrs[4] = dest5 + (rand() & (PTR_ALIGN_CHK_B - offset));
642 udest_ptrs[5] = dest6 + (rand() & (PTR_ALIGN_CHK_B - offset));
643
644 memset(dest1, 0, TEST_LEN); // zero pad to check write-over
645 memset(dest2, 0, TEST_LEN);
646 memset(dest3, 0, TEST_LEN);
647 memset(dest4, 0, TEST_LEN);
648 memset(dest5, 0, TEST_LEN);
649 memset(dest6, 0, TEST_LEN);
650
651 for (i = 0; i < srcs; i++)
652 for (j = 0; j < size; j++)
653 ubuffs[i][j] = rand();
654
655 for (i = 0; i < srcs; i++) {
656 g1[i] = rand();
657 g2[i] = rand();
658 g3[i] = rand();
659 g4[i] = rand();
660 g5[i] = rand();
661 g6[i] = rand();
662 }
663
664 for (i = 0; i < srcs; i++) {
665 gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
666 gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]);
667 gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]);
668 gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]);
669 gf_vect_mul_init(g5[i], &g_tbls[(128 * srcs) + (i * 32)]);
670 gf_vect_mul_init(g6[i], &g_tbls[(160 * srcs) + (i * 32)]);
671 }
672
673 gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref1);
674 gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], ubuffs, dest_ref2);
675 gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], ubuffs, dest_ref3);
676 gf_vect_dot_prod_base(size, srcs, &g_tbls[96 * srcs], ubuffs, dest_ref4);
677 gf_vect_dot_prod_base(size, srcs, &g_tbls[128 * srcs], ubuffs, dest_ref5);
678 gf_vect_dot_prod_base(size, srcs, &g_tbls[160 * srcs], ubuffs, dest_ref6);
679
680 FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptrs);
681
682 if (memcmp(dest_ref1, udest_ptrs[0], size)) {
683 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
684 srcs);
685 dump_matrix(ubuffs, 5, TEST_SOURCES);
686 printf("dprod_base:");
687 dump(dest_ref1, 25);
688 printf("dprod_dut:");
689 dump(udest_ptrs[0], 25);
690 return -1;
691 }
692 if (memcmp(dest_ref2, udest_ptrs[1], size)) {
693 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
694 srcs);
695 dump_matrix(ubuffs, 5, TEST_SOURCES);
696 printf("dprod_base:");
697 dump(dest_ref2, 25);
698 printf("dprod_dut:");
699 dump(udest_ptrs[1], 25);
700 return -1;
701 }
702 if (memcmp(dest_ref3, udest_ptrs[2], size)) {
703 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
704 srcs);
705 dump_matrix(ubuffs, 5, TEST_SOURCES);
706 printf("dprod_base:");
707 dump(dest_ref3, 25);
708 printf("dprod_dut:");
709 dump(udest_ptrs[2], 25);
710 return -1;
711 }
712 if (memcmp(dest_ref4, udest_ptrs[3], size)) {
713 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
714 srcs);
715 dump_matrix(ubuffs, 5, TEST_SOURCES);
716 printf("dprod_base:");
717 dump(dest_ref4, 25);
718 printf("dprod_dut:");
719 dump(udest_ptrs[3], 25);
720 return -1;
721 }
722 if (memcmp(dest_ref5, udest_ptrs[4], size)) {
723 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
724 srcs);
725 dump_matrix(ubuffs, 5, TEST_SOURCES);
726 printf("dprod_base:");
727 dump(dest_ref5, 25);
728 printf("dprod_dut:");
729 dump(udest_ptrs[4], 25);
730 return -1;
731 }
732 if (memcmp(dest_ref6, udest_ptrs[5], size)) {
733 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
734 srcs);
735 dump_matrix(ubuffs, 5, TEST_SOURCES);
736 printf("dprod_base:");
737 dump(dest_ref6, 25);
738 printf("dprod_dut:");
739 dump(udest_ptrs[5], 25);
740 return -1;
741 }
742 // Confirm that padding around dests is unchanged
743 memset(dest_ref1, 0, PTR_ALIGN_CHK_B); // Make reference zero buff
744 offset = udest_ptrs[0] - dest1;
745
746 if (memcmp(dest1, dest_ref1, offset)) {
747 printf("Fail rand ualign pad1 start\n");
748 return -1;
749 }
750 if (memcmp(dest1 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
751 printf("Fail rand ualign pad1 end\n");
752 return -1;
753 }
754
755 offset = udest_ptrs[1] - dest2;
756 if (memcmp(dest2, dest_ref1, offset)) {
757 printf("Fail rand ualign pad2 start\n");
758 return -1;
759 }
760 if (memcmp(dest2 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
761 printf("Fail rand ualign pad2 end\n");
762 return -1;
763 }
764
765 offset = udest_ptrs[2] - dest3;
766 if (memcmp(dest3, dest_ref1, offset)) {
767 printf("Fail rand ualign pad3 start\n");
768 return -1;
769 }
770 if (memcmp(dest3 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
771 printf("Fail rand ualign pad3 end\n");
772 return -1;
773 }
774
775 offset = udest_ptrs[3] - dest4;
776 if (memcmp(dest4, dest_ref1, offset)) {
777 printf("Fail rand ualign pad4 start\n");
778 return -1;
779 }
780 if (memcmp(dest4 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
781 printf("Fail rand ualign pad4 end\n");
782 return -1;
783 }
784
785 offset = udest_ptrs[4] - dest5;
786 if (memcmp(dest5, dest_ref1, offset)) {
787 printf("Fail rand ualign pad5 start\n");
788 return -1;
789 }
790 if (memcmp(dest5 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
791 printf("Fail rand ualign pad5 end\n");
792 return -1;
793 }
794
795 offset = udest_ptrs[5] - dest6;
796 if (memcmp(dest6, dest_ref1, offset)) {
797 printf("Fail rand ualign pad6 start\n");
798 return -1;
799 }
800 if (memcmp(dest6 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
801 printf("Fail rand ualign pad6 end\n");
802 return -1;
803 }
804
805 putchar('.');
806 }
807
808 // Test all size alignment
809 align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
810
811 for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) {
812 srcs = TEST_SOURCES;
813
814 for (i = 0; i < srcs; i++)
815 for (j = 0; j < size; j++)
816 buffs[i][j] = rand();
817
818 for (i = 0; i < srcs; i++) {
819 g1[i] = rand();
820 g2[i] = rand();
821 g3[i] = rand();
822 g4[i] = rand();
823 g5[i] = rand();
824 g6[i] = rand();
825 }
826
827 for (i = 0; i < srcs; i++) {
828 gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
829 gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]);
830 gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]);
831 gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]);
832 gf_vect_mul_init(g5[i], &g_tbls[(128 * srcs) + (i * 32)]);
833 gf_vect_mul_init(g6[i], &g_tbls[(160 * srcs) + (i * 32)]);
834 }
835
836 gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref1);
837 gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], buffs, dest_ref2);
838 gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], buffs, dest_ref3);
839 gf_vect_dot_prod_base(size, srcs, &g_tbls[96 * srcs], buffs, dest_ref4);
840 gf_vect_dot_prod_base(size, srcs, &g_tbls[128 * srcs], buffs, dest_ref5);
841 gf_vect_dot_prod_base(size, srcs, &g_tbls[160 * srcs], buffs, dest_ref6);
842
843 FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest_ptrs);
844
845 if (memcmp(dest_ref1, dest_ptrs[0], size)) {
846 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
847 size);
848 dump_matrix(buffs, 5, TEST_SOURCES);
849 printf("dprod_base:");
850 dump(dest_ref1, 25);
851 printf("dprod_dut:");
852 dump(dest_ptrs[0], 25);
853 return -1;
854 }
855 if (memcmp(dest_ref2, dest_ptrs[1], size)) {
856 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
857 size);
858 dump_matrix(buffs, 5, TEST_SOURCES);
859 printf("dprod_base:");
860 dump(dest_ref2, 25);
861 printf("dprod_dut:");
862 dump(dest_ptrs[1], 25);
863 return -1;
864 }
865 if (memcmp(dest_ref3, dest_ptrs[2], size)) {
866 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
867 size);
868 dump_matrix(buffs, 5, TEST_SOURCES);
869 printf("dprod_base:");
870 dump(dest_ref3, 25);
871 printf("dprod_dut:");
872 dump(dest_ptrs[2], 25);
873 return -1;
874 }
875 if (memcmp(dest_ref4, dest_ptrs[3], size)) {
876 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
877 size);
878 dump_matrix(buffs, 5, TEST_SOURCES);
879 printf("dprod_base:");
880 dump(dest_ref4, 25);
881 printf("dprod_dut:");
882 dump(dest_ptrs[3], 25);
883 return -1;
884 }
885 if (memcmp(dest_ref5, dest_ptrs[4], size)) {
886 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
887 size);
888 dump_matrix(buffs, 5, TEST_SOURCES);
889 printf("dprod_base:");
890 dump(dest_ref5, 25);
891 printf("dprod_dut:");
892 dump(dest_ptrs[4], 25);
893 return -1;
894 }
895 if (memcmp(dest_ref6, dest_ptrs[5], size)) {
896 printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
897 size);
898 dump_matrix(buffs, 5, TEST_SOURCES);
899 printf("dprod_base:");
900 dump(dest_ref6, 25);
901 printf("dprod_dut:");
902 dump(dest_ptrs[5], 25);
903 return -1;
904 }
905 }
906
907 printf("Pass\n");
908 return 0;
909
910 }
110110 %define return rax
111111 %define return.w eax
112112
113 %define func(x) x:
113 %define func(x) x: endbranch
114114 %macro FUNC_SAVE 0
115115 push r12
116116 push r13
183183
184184
185185 align 16
186 global gf_6vect_mad_avx:ISAL_SYM_TYPE_FUNCTION
186 mk_global gf_6vect_mad_avx, function
187187 func(gf_6vect_mad_avx)
188188 FUNC_SAVE
189189 sub len, 16
106106 %define return rax
107107 %define return.w eax
108108
109 %define func(x) x:
109 %define func(x) x: endbranch
110110 %macro FUNC_SAVE 0
111111 push r12
112112 %endmacro
176176 %define xd6 xd1
177177
178178 align 16
179 global gf_6vect_mad_avx2:ISAL_SYM_TYPE_FUNCTION
179 mk_global gf_6vect_mad_avx2, function
180180 func(gf_6vect_mad_avx2)
181181 FUNC_SAVE
182182 sub len, 32
4545 %define tmp2 r10
4646 %define tmp3 r12 ;must be saved and restored
4747 %define return rax
48 %define func(x) x:
48 %define func(x) x: endbranch
4949 %macro FUNC_SAVE 0
5050 push r12
5151 %endmacro
180180 %define xtmph6 zmm31
181181
182182 align 16
183 global gf_6vect_mad_avx512:ISAL_SYM_TYPE_FUNCTION
183 mk_global gf_6vect_mad_avx512, function
184184 func(gf_6vect_mad_avx512)
185185 FUNC_SAVE
186186 sub len, 64
112112 %define return rax
113113 %define return.w eax
114114
115 %define func(x) x:
115 %define func(x) x: endbranch
116116 %macro FUNC_SAVE 0
117117 push r12
118118 push r13
184184
185185
186186 align 16
187 global gf_6vect_mad_sse:ISAL_SYM_TYPE_FUNCTION
187 mk_global gf_6vect_mad_sse, function
188188 func(gf_6vect_mad_sse)
189189 FUNC_SAVE
190190 sub len, 16
4747 %endmacro
4848 %define SSTR SLDR
4949 %define PS 8
50 %define func(x) x:
50 %define func(x) x: endbranch
5151 %define FUNC_SAVE
5252 %define FUNC_RESTORE
5353 %endif
105105
106106 %define PS 4
107107 %define LOG_PS 2
108 %define func(x) x:
108 %define func(x) x: endbranch
109109 %define arg(x) [ebp + PS*2 + PS*x]
110110
111111 %define trans ecx ;trans is for the variables in stack
193193 %define xp xmm2
194194
195195 align 16
196 global gf_vect_dot_prod_avx:ISAL_SYM_TYPE_FUNCTION
196 mk_global gf_vect_dot_prod_avx, function
197197 func(gf_vect_dot_prod_avx)
198198 FUNC_SAVE
199199 SLDR len, len_m
5050 %endmacro
5151 %define SSTR SLDR
5252 %define PS 8
53 %define func(x) x:
53 %define func(x) x: endbranch
5454 %define FUNC_SAVE
5555 %define FUNC_RESTORE
5656 %endif
110110
111111 %define PS 4
112112 %define LOG_PS 2
113 %define func(x) x:
113 %define func(x) x: endbranch
114114 %define arg(x) [ebp + PS*2 + PS*x]
115115
116116 %define trans ecx ;trans is for the variables in stack
201201 %define xp ymm2
202202
203203 align 16
204 global gf_vect_dot_prod_avx2:ISAL_SYM_TYPE_FUNCTION
204 mk_global gf_vect_dot_prod_avx2, function
205205 func(gf_vect_dot_prod_avx2)
206206 FUNC_SAVE
207207 SLDR len, len_m
4848 %define PS 8
4949 %define LOG_PS 3
5050
51 %define func(x) x:
51 %define func(x) x: endbranch
5252 %define FUNC_SAVE
5353 %define FUNC_RESTORE
5454 %endif
7272 %define func(x) proc_frame x
7373 %macro FUNC_SAVE 0
7474 alloc_stack stack_size
75 save_reg r12, 9*16 + 0*8
76 save_reg r15, 9*16 + 3*8
75 save_reg r12, 0*8
76 save_reg r15, 1*8
7777 end_prolog
7878 mov arg4, arg(4)
7979 %endmacro
8080
8181 %macro FUNC_RESTORE 0
82 mov r12, [rsp + 9*16 + 0*8]
83 mov r15, [rsp + 9*16 + 3*8]
82 mov r12, [rsp + 0*8]
83 mov r15, [rsp + 1*8]
8484 add rsp, stack_size
8585 %endmacro
8686 %endif
127127 section .text
128128
129129 align 16
130 global gf_vect_dot_prod_avx512:ISAL_SYM_TYPE_FUNCTION
130 mk_global gf_vect_dot_prod_avx512, function
131131 func(gf_vect_dot_prod_avx512)
132132 FUNC_SAVE
133133 xor pos, pos
4747 %endmacro
4848 %define SSTR SLDR
4949 %define PS 8
50 %define func(x) x:
50 %define func(x) x: endbranch
5151 %define FUNC_SAVE
5252 %define FUNC_RESTORE
5353 %endif
105105
106106 %define PS 4
107107 %define LOG_PS 2
108 %define func(x) x:
108 %define func(x) x: endbranch
109109 %define arg(x) [ebp + PS*2 + PS*x]
110110
111111 %define trans ecx ;trans is for the variables in stack
193193 %define xp xmm2
194194
195195 align 16
196 global gf_vect_dot_prod_sse:ISAL_SYM_TYPE_FUNCTION
196 mk_global gf_vect_dot_prod_sse, function
197197 func(gf_vect_dot_prod_sse)
198198 FUNC_SAVE
199199 SLDR len, len_m
8181 %define return rax
8282 %define return.w eax
8383
84 %define func(x) x:
84 %define func(x) x: endbranch
8585 %define FUNC_SAVE
8686 %define FUNC_RESTORE
8787 %endif
130130 %define xtmpd xmm5
131131
132132 align 16
133 global gf_vect_mad_avx:ISAL_SYM_TYPE_FUNCTION
133 mk_global gf_vect_mad_avx, function
134134 func(gf_vect_mad_avx)
135135 FUNC_SAVE
136136 sub len, 16
8787 %define return rax
8888 %define return.w eax
8989
90 %define func(x) x:
90 %define func(x) x: endbranch
9191 %define FUNC_SAVE
9292 %define FUNC_RESTORE
9393 %endif
138138 %define xtmpd ymm5
139139
140140 align 16
141 global gf_vect_mad_avx2:ISAL_SYM_TYPE_FUNCTION
141 mk_global gf_vect_mad_avx2, function
142142 func(gf_vect_mad_avx2)
143143 FUNC_SAVE
144144 sub len, 32
4343 %define arg5 r9
4444 %define tmp r11
4545 %define return rax
46 %define func(x) x:
46 %define func(x) x: endbranch
4747 %define FUNC_SAVE
4848 %define FUNC_RESTORE
4949 %endif
126126 %define xmask0f zmm8
127127
128128 align 16
129 global gf_vect_mad_avx512:ISAL_SYM_TYPE_FUNCTION
129 mk_global gf_vect_mad_avx512, function
130130 func(gf_vect_mad_avx512)
131131 FUNC_SAVE
132132 sub len, 64
8181 %define return rax
8282 %define return.w eax
8383
84 %define func(x) x:
84 %define func(x) x: endbranch
8585 %define FUNC_SAVE
8686 %define FUNC_RESTORE
8787 %endif
130130
131131
132132 align 16
133 global gf_vect_mad_sse:ISAL_SYM_TYPE_FUNCTION
133 mk_global gf_vect_mad_sse, function
134134 func(gf_vect_mad_sse)
135135 FUNC_SAVE
136136 sub len, 16
4141 %define arg5 r9
4242 %define tmp r11
4343 %define return rax
44 %define func(x) x:
44 %define func(x) x: endbranch
4545 %define FUNC_SAVE
4646 %define FUNC_RESTORE
4747
5555 %define func(x) proc_frame x
5656 %macro FUNC_SAVE 0
5757 alloc_stack stack_size
58 save_xmm128 xmm6, 0*16
59 save_xmm128 xmm7, 1*16
60 save_xmm128 xmm13, 2*16
61 save_xmm128 xmm14, 3*16
62 save_xmm128 xmm15, 4*16
58 vmovdqa [rsp + 0*16], xmm6
59 vmovdqa [rsp + 1*16], xmm7
60 vmovdqa [rsp + 2*16], xmm13
61 vmovdqa [rsp + 3*16], xmm14
62 vmovdqa [rsp + 4*16], xmm15
6363 end_prolog
6464 %endmacro
6565
110110 %define xtmp2c xmm7
111111
112112 align 16
113 global gf_vect_mul_avx:ISAL_SYM_TYPE_FUNCTION
113 mk_global gf_vect_mul_avx, function
114114 func(gf_vect_mul_avx)
115115 FUNC_SAVE
116116 mov pos, 0
4141 %define arg5 r9
4242 %define tmp r11
4343 %define return rax
44 %define func(x) x:
44 %define func(x) x: endbranch
4545 %define FUNC_SAVE
4646 %define FUNC_RESTORE
4747
111111
112112
113113 align 16
114 global gf_vect_mul_sse:ISAL_SYM_TYPE_FUNCTION
114 mk_global gf_vect_mul_sse, function
115115 func(gf_vect_mul_sse)
116116 FUNC_SAVE
117117 mov pos, 0
5454 %define b_d r8d
5555 %define end r13
5656
57 %define func(x) x:
57 %define func(x) x: endbranch
5858 %macro FUNC_SAVE 0
5959 push r12
6060 push r13
122122 %define yshuf0 ymm6
123123 %define yshuf1 ymm7
124124
125
126 global adler32_avx2_4:ISAL_SYM_TYPE_FUNCTION
125 [bits 64]
126 default rel
127 section .text
128
129 mk_global adler32_avx2_4, function
127130 func(adler32_avx2_4)
128131 FUNC_SAVE
129132
5151 %define b_d r8d
5252 %define end r13
5353
54 %define func(x) x:
54 %define func(x) x: endbranch
5555 %macro FUNC_SAVE 0
5656 push r12
5757 push r13
103103 %define xdata1 xmm3
104104 %define xsa xmm4
105105
106 global adler32_sse:ISAL_SYM_TYPE_FUNCTION
106 [bits 64]
107 default rel
108 section .text
109
110 mk_global adler32_sse, function
107111 func(adler32_sse)
108112 FUNC_SAVE
109113
171171
172172 %endmacro
173173
174 default rel
175 section .text
176
174177 global encode_deflate_icf_ %+ ARCH
175178 encode_deflate_icf_ %+ ARCH:
179 endbranch
176180 FUNC_SAVE
177181
178182 %ifnidn ptr, arg1
184184
185185 %endmacro
186186
187 default rel
188 section .text
189
187190 global encode_deflate_icf_ %+ ARCH
188191 encode_deflate_icf_ %+ ARCH:
192 endbranch
189193 FUNC_SAVE
190194
191195 %ifnidn ptr, arg1
6363 #include <stdlib.h>
6464 #include "igzip_lib.h"
6565
66 #include "huff_codes.h"
67 #include "huffman.h"
68
6669 /*These max code lengths are limited by how the data is stored in
6770 * hufftables.asm. The deflate standard max is 15.*/
6871
232235 fprintf(output_file, "const uint32_t zlib_trl_bytes = %d;\n", ZLIB_TRAILER_SIZE);
233236 }
234237
238 static uint32_t convert_dist_to_dist_sym(uint32_t dist)
239 {
240 assert(dist <= 32768 && dist > 0);
241 if (dist <= 32768) {
242 uint32_t msb = dist > 4 ? bsr(dist - 1) - 2 : 0;
243 return (msb * 2) + ((dist - 1) >> msb);
244 } else {
245 return ~0;
246 }
247 }
248
249 /**
250 * @brief Returns the deflate symbol value for a repeat length.
251 */
252 static uint32_t convert_length_to_len_sym(uint32_t length)
253 {
254 assert(length > 2 && length < 259);
255
256 /* Based on tables on page 11 in RFC 1951 */
257 if (length < 11)
258 return 257 + length - 3;
259 else if (length < 19)
260 return 261 + (length - 3) / 2;
261 else if (length < 35)
262 return 265 + (length - 3) / 4;
263 else if (length < 67)
264 return 269 + (length - 3) / 8;
265 else if (length < 131)
266 return 273 + (length - 3) / 16;
267 else if (length < 258)
268 return 277 + (length - 3) / 32;
269 else
270 return 285;
271 }
272
273 void isal_update_histogram_dict(uint8_t * start_stream, int dict_length, int length,
274 struct isal_huff_histogram *histogram)
275 {
276 uint32_t literal = 0, hash;
277 uint16_t seen, *last_seen = histogram->hash_table;
278 uint8_t *current, *end_stream, *next_hash, *end, *end_dict;
279 uint32_t match_length;
280 uint32_t dist;
281 uint64_t *lit_len_histogram = histogram->lit_len_histogram;
282 uint64_t *dist_histogram = histogram->dist_histogram;
283
284 if (length <= 0)
285 return;
286
287 end_stream = start_stream + dict_length + length;
288 end_dict = start_stream + dict_length;
289
290 memset(last_seen, 0, sizeof(histogram->hash_table)); /* Initialize last_seen to be 0. */
291
292 for (current = start_stream; current < end_dict - 4; current++) {
293 literal = load_u32(current);
294 hash = compute_hash(literal) & LVL0_HASH_MASK;
295 last_seen[hash] = (current - start_stream) & 0xFFFF;
296 }
297
298 for (current = start_stream + dict_length; current < end_stream - 3; current++) {
299 literal = load_u32(current);
300 hash = compute_hash(literal) & LVL0_HASH_MASK;
301 seen = last_seen[hash];
302 last_seen[hash] = (current - start_stream) & 0xFFFF;
303 dist = (current - start_stream - seen) & 0xFFFF;
304 if (dist - 1 < D - 1) {
305 assert(start_stream <= current - dist);
306 match_length =
307 compare258(current - dist, current, end_stream - current);
308 if (match_length >= SHORTEST_MATCH) {
309 next_hash = current;
310 #ifdef ISAL_LIMIT_HASH_UPDATE
311 end = next_hash + 3;
312 #else
313 end = next_hash + match_length;
314 #endif
315 if (end > end_stream - 3)
316 end = end_stream - 3;
317 next_hash++;
318 for (; next_hash < end; next_hash++) {
319 literal = load_u32(next_hash);
320 hash = compute_hash(literal) & LVL0_HASH_MASK;
321 last_seen[hash] = (next_hash - start_stream) & 0xFFFF;
322 }
323
324 dist_histogram[convert_dist_to_dist_sym(dist)] += 1;
325 lit_len_histogram[convert_length_to_len_sym(match_length)] +=
326 1;
327 current += match_length - 1;
328 continue;
329 }
330 }
331 lit_len_histogram[literal & 0xFF] += 1;
332 }
333
334 for (; current < end_stream; current++)
335 lit_len_histogram[*current] += 1;
336
337 lit_len_histogram[256] += 1;
338 return;
339 }
340
235341 int main(int argc, char *argv[])
236342 {
237343 long int file_length;
344 int argi = 1;
238345 uint8_t *stream = NULL;
239346 struct isal_hufftables hufftables;
240347 struct isal_huff_histogram histogram;
241348 struct isal_zstream tmp_stream;
242 FILE *file;
349 FILE *file = NULL;
350 FILE *dict_file = NULL;
351 FILE *hist_file = NULL;
352 long int dict_file_length = 0;
353 long int hist_file_length = 0;
354 uint8_t *dict_stream = NULL;
243355
244356 if (argc == 1) {
245357 printf("Error, no input file.\n");
246358 return 1;
247359 }
248360
249 memset(&histogram, 0, sizeof(histogram)); /* Initialize histograms. */
250
251 while (argc > 1) {
252 printf("Processing %s\n", argv[argc - 1]);
253 file = fopen(argv[argc - 1], "r");
361 if (argc > 3 && argv[1][0] == '-' && argv[1][1] == 'd') {
362 dict_file = fopen(argv[2], "r");
363
364 fseek(dict_file, 0, SEEK_END);
365 dict_file_length = ftell(dict_file);
366 fseek(dict_file, 0, SEEK_SET);
367 dict_file_length -= ftell(dict_file);
368 dict_stream = malloc(dict_file_length);
369 if (dict_stream == NULL) {
370 printf("Failed to allocate memory to read in dictionary file\n");
371 fclose(dict_file);
372 return 1;
373 }
374 if (fread(dict_stream, 1, dict_file_length, dict_file) != dict_file_length) {
375 printf("Error occurred when reading dictionary file");
376 fclose(dict_file);
377 free(dict_stream);
378 return 1;
379 }
380 isal_update_histogram(dict_stream, dict_file_length, &histogram);
381
382 printf("Read %ld bytes of dictionary file %s\n", dict_file_length, argv[2]);
383 argi += 2;
384 fclose(dict_file);
385 free(dict_stream);
386 }
387
388 if ((argc > argi + 1) && argv[argi][0] == '-' && argv[argi][1] == 'h') {
389 hist_file = fopen(argv[argi + 1], "r+");
390 fseek(hist_file, 0, SEEK_END);
391 hist_file_length = ftell(hist_file);
392 fseek(hist_file, 0, SEEK_SET);
393 hist_file_length -= ftell(hist_file);
394 if (hist_file_length > sizeof(histogram)) {
395 printf("Histogram file too long\n");
396 return 1;
397 }
398 if (fread(&histogram, 1, hist_file_length, hist_file) != hist_file_length) {
399 printf("Error occurred when reading history file");
400 fclose(hist_file);
401 return 1;
402 }
403 fseek(hist_file, 0, SEEK_SET);
404
405 printf("Read %ld bytes of history file %s\n", hist_file_length,
406 argv[argi + 1]);
407 argi += 2;
408 } else
409 memset(&histogram, 0, sizeof(histogram)); /* Initialize histograms. */
410
411 while (argi < argc) {
412 printf("Processing %s\n", argv[argi]);
413 file = fopen(argv[argi], "r");
254414 if (file == NULL) {
255415 printf("Error opening file\n");
256416 return 1;
259419 file_length = ftell(file);
260420 fseek(file, 0, SEEK_SET);
261421 file_length -= ftell(file);
262 stream = malloc(file_length);
422 stream = malloc(file_length + dict_file_length);
263423 if (stream == NULL) {
264424 printf("Failed to allocate memory to read in file\n");
265425 fclose(file);
266426 return 1;
267427 }
268 if (fread(stream, 1, file_length, file) != file_length) {
428 if (dict_file_length > 0)
429 memcpy(stream, dict_stream, dict_file_length);
430
431 if (fread(&stream[dict_file_length], 1, file_length, file) != file_length) {
269432 printf("Error occurred when reading file");
270433 fclose(file);
271434 free(stream);
274437
275438 /* Create a histogram of frequency of symbols found in stream to
276439 * generate the huffman tree.*/
277 isal_update_histogram(stream, file_length, &histogram);
440 if (0 == dict_file_length)
441 isal_update_histogram(stream, file_length, &histogram);
442 else
443 isal_update_histogram_dict(stream, dict_file_length, file_length,
444 &histogram);
278445
279446 fclose(file);
280447 free(stream);
281 argc--;
448 argi++;
282449 }
283450
284451 isal_create_hufftables(&hufftables, &histogram);
303470
304471 fclose(file);
305472
473 if (hist_file) {
474 int len = fwrite(&histogram, 1, sizeof(histogram), hist_file);
475 printf("wrote %d bytes of histogram file\n", len);
476 fclose(hist_file);
477 }
306478 return 0;
307479 }
3434 #include "igzip_lib.h"
3535
3636 #define STATIC_INFLATE_FILE "static_inflate.h"
37 #define DOUBLE_SYM_THRESH (4 * 1024)
38
39 extern struct isal_hufftables hufftables_default;
3740
3841 /**
3942 * @brief Prints a table of uint16_t elements to a file.
115118 struct inflate_state state;
116119 FILE *file;
117120 uint8_t static_deflate_hdr = 3;
118 uint8_t tmp_space[8];
121 uint8_t tmp_space[8], *in_buf;
122
123 if (NULL == (in_buf = malloc(DOUBLE_SYM_THRESH + 1))) {
124 printf("Can not allocote memory\n");
125 return 1;
126 }
119127
120128 isal_inflate_init(&state);
121129
122 state.next_in = &static_deflate_hdr;
123 state.avail_in = sizeof(static_deflate_hdr);
130 memcpy(in_buf, &static_deflate_hdr, sizeof(static_deflate_hdr));
131 state.next_in = in_buf;
132 state.avail_in = DOUBLE_SYM_THRESH + 1;
124133 state.next_out = tmp_space;
125134 state.avail_out = sizeof(tmp_space);
126135
132141 printf("Error creating file hufftables_c.c\n");
133142 return 1;
134143 }
144 // Add decode tables describing a type 2 static (fixed) header
135145
136146 fprintf(file, "#ifndef STATIC_HEADER_H\n" "#define STATIC_HEADER_H\n\n");
137147
156166 fprintf(file, "};\n\n");
157167
158168 fprintf(file, "#endif\n");
169
170 // Add other tables for known dynamic headers - level 0
171
172 isal_inflate_init(&state);
173
174 memcpy(in_buf, &hufftables_default.deflate_hdr,
175 sizeof(hufftables_default.deflate_hdr));
176 state.next_in = in_buf;
177 state.avail_in = DOUBLE_SYM_THRESH + 1;
178 state.next_out = tmp_space;
179 state.avail_out = sizeof(tmp_space);
180
181 isal_inflate(&state);
182
183 fprintf(file, "struct inflate_huff_code_large pregen_lit_huff_code = {\n");
184 fprint_uint32_table(file, state.lit_huff_code.short_code_lookup,
185 sizeof(state.lit_huff_code.short_code_lookup) / sizeof(uint32_t),
186 "\t.short_code_lookup = {", "\t},\n\n", "\t\t");
187 fprint_uint16_table(file, state.lit_huff_code.long_code_lookup,
188 sizeof(state.lit_huff_code.long_code_lookup) / sizeof(uint16_t),
189 "\t.long_code_lookup = {", "\t}\n", "\t\t");
190 fprintf(file, "};\n\n");
191
192 fprintf(file, "struct inflate_huff_code_small pregen_dist_huff_code = {\n");
193 fprint_uint16_table(file, state.dist_huff_code.short_code_lookup,
194 sizeof(state.dist_huff_code.short_code_lookup) / sizeof(uint16_t),
195 "\t.short_code_lookup = {", "\t},\n\n", "\t\t");
196 fprint_uint16_table(file, state.dist_huff_code.long_code_lookup,
197 sizeof(state.dist_huff_code.long_code_lookup) / sizeof(uint16_t),
198 "\t.long_code_lookup = {", "\t}\n", "\t\t");
199 fprintf(file, "};\n\n");
200
159201 fclose(file);
160
202 free(in_buf);
161203 return 0;
162204 }
12401240 stream->internal_state.has_hist = IGZIP_HIST;
12411241 }
12421242
1243 int isal_deflate_process_dict(struct isal_zstream *stream, struct isal_dict *dict,
1244 uint8_t * dict_data, uint32_t dict_len)
1245 {
1246 if ((dict == NULL)
1247 || (dict_len == 0)
1248 || (dict->level > ISAL_DEF_MAX_LEVEL))
1249 return ISAL_INVALID_STATE;
1250
1251 if (dict_len > IGZIP_HIST_SIZE) {
1252 dict_data = dict_data + dict_len - IGZIP_HIST_SIZE;
1253 dict_len = IGZIP_HIST_SIZE;
1254 }
1255
1256 dict->level = stream->level;
1257 dict->hist_size = dict_len;
1258 memcpy(dict->history, dict_data, dict_len);
1259 memset(dict->hashtable, -1, sizeof(dict->hashtable));
1260
1261 switch (stream->level) {
1262 case 3:
1263 dict->hash_size = IGZIP_LVL3_HASH_SIZE;
1264 isal_deflate_hash_lvl3(dict->hashtable, LVL3_HASH_MASK,
1265 0, dict_data, dict_len);
1266 break;
1267
1268 case 2:
1269 dict->hash_size = IGZIP_LVL2_HASH_SIZE;
1270 isal_deflate_hash_lvl2(dict->hashtable, LVL2_HASH_MASK,
1271 0, dict_data, dict_len);
1272 break;
1273 case 1:
1274 dict->hash_size = IGZIP_LVL1_HASH_SIZE;
1275 isal_deflate_hash_lvl1(dict->hashtable, LVL1_HASH_MASK,
1276 0, dict_data, dict_len);
1277 break;
1278 default:
1279 dict->hash_size = IGZIP_LVL0_HASH_SIZE;
1280 isal_deflate_hash_lvl0(dict->hashtable, LVL0_HASH_MASK,
1281 0, dict_data, dict_len);
1282 }
1283 return COMP_OK;
1284 }
1285
1286 int isal_deflate_reset_dict(struct isal_zstream *stream, struct isal_dict *dict)
1287 {
1288 struct isal_zstate *state = &stream->internal_state;
1289 struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
1290 int ret;
1291
1292 if ((state->state != ZSTATE_NEW_HDR)
1293 || (state->b_bytes_processed != state->b_bytes_valid)
1294 || (dict->level != stream->level)
1295 || (dict->hist_size == 0)
1296 || (dict->hist_size > IGZIP_HIST_SIZE)
1297 || (dict->hash_size > IGZIP_LVL3_HASH_SIZE))
1298 return ISAL_INVALID_STATE;
1299
1300 ret = check_level_req(stream);
1301 if (ret)
1302 return ret;
1303
1304 memcpy(state->buffer, dict->history, dict->hist_size);
1305 state->b_bytes_processed = dict->hist_size;
1306 state->b_bytes_valid = dict->hist_size;
1307 state->has_hist = IGZIP_DICT_HASH_SET;
1308
1309 switch (stream->level) {
1310 case 3:
1311 memcpy(level_buf->lvl3.hash_table, dict->hashtable,
1312 sizeof(level_buf->lvl3.hash_table));
1313 break;
1314
1315 case 2:
1316 memcpy(level_buf->lvl2.hash_table, dict->hashtable,
1317 sizeof(level_buf->lvl2.hash_table));
1318 break;
1319 case 1:
1320 memcpy(level_buf->lvl1.hash_table, dict->hashtable,
1321 sizeof(level_buf->lvl1.hash_table));
1322 break;
1323 default:
1324 memcpy(stream->internal_state.head, dict->hashtable,
1325 sizeof(stream->internal_state.head));
1326 }
1327
1328 return COMP_OK;
1329 }
1330
12431331 int isal_deflate_set_dict(struct isal_zstream *stream, uint8_t * dict, uint32_t dict_len)
12441332 {
12451333 struct isal_zstate *state = &stream->internal_state;
14641552 set_dist_mask(stream);
14651553 set_hash_mask(stream);
14661554 isal_deflate_hash(stream, state->buffer, state->b_bytes_processed);
1555 } else if (state->has_hist == IGZIP_DICT_HASH_SET) {
1556 set_dist_mask(stream);
1557 set_hash_mask(stream);
14671558 }
14681559
14691560 in_size = stream->avail_in + buffered_size;
133133 %if ARCH == 04
134134 %define USE_HSWNI
135135 %endif
136
137 [bits 64]
138 default rel
139 section .text
140
136141 ; void isal_deflate_body ( isal_zstream *stream )
137142 ; arg 1: rcx: addr of stream
138143 global isal_deflate_body_ %+ ARCH
139144 isal_deflate_body_ %+ ARCH %+ :
145 endbranch
140146 %ifidn __OUTPUT_FORMAT__, elf64
141147 mov rcx, rdi
142148 %endif
458458 decode_next_dist %%state, %%lookup_size, %%state_offset, %%read_in, %%read_in_length, %%next_sym, %%next_extra_bits, %%next_bits
459459 %endm
460460
461 [bits 64]
462 default rel
463 section .text
464
461465 global decode_huffman_code_block_stateless_ %+ ARCH
462466 decode_huffman_code_block_stateless_ %+ ARCH %+ :
467 endbranch
463468
464469 FUNC_SAVE
465470
9797 %endif
9898 %endm
9999
100 [bits 64]
101 default rel
102 section .text
103
100104 global isal_deflate_hash_crc_01
101105 isal_deflate_hash_crc_01:
106 endbranch
102107 FUNC_SAVE
103108
104109 neg f_i
9797 " -h help\n"
9898 " -X use compression level X with 0 <= X <= 1\n"
9999 " -b <size> input buffer size, 0 buffers all the input\n"
100 " -i <time> time in seconds to benchmark (at least 1)\n"
100 " -i <time> time in seconds to benchmark (at least 0)\n"
101101 " -o <file> output file for compresed data\n"
102102 " -d <file> dictionary file used by compression\n"
103103 " -w <size> log base 2 size of history window, between 8 and 15\n");
108108 void deflate_perf(struct isal_zstream *stream, uint8_t * inbuf, size_t infile_size,
109109 size_t inbuf_size, uint8_t * outbuf, size_t outbuf_size, int level,
110110 uint8_t * level_buf, int level_size, uint32_t hist_bits, uint8_t * dictbuf,
111 size_t dictfile_size, struct isal_hufftables *hufftables_custom)
111 size_t dictfile_size, struct isal_dict *dict_str,
112 struct isal_hufftables *hufftables_custom)
112113 {
113114 int avail_in;
114115 isal_deflate_init(stream);
115 if (dictbuf != NULL)
116 isal_deflate_set_dict(stream, dictbuf, dictfile_size);
117 stream->end_of_stream = 0;
118 stream->flush = NO_FLUSH;
119116 stream->level = level;
120117 stream->level_buf = level_buf;
121118 stream->level_buf_size = level_size;
119
120 if (COMP_OK != isal_deflate_reset_dict(stream, dict_str))
121 if (dictbuf != NULL)
122 isal_deflate_set_dict(stream, dictbuf, dictfile_size);
123
124 stream->end_of_stream = 0;
125 stream->flush = NO_FLUSH;
122126 stream->next_out = outbuf;
123127 stream->avail_out = outbuf_size;
124128 stream->next_in = inbuf;
174178 break;
175179 case 'i':
176180 time = atoi(optarg);
177 if (time < 1)
181 if (time < 0)
178182 usage();
179183 break;
180184 case 'b':
284288 exit(0);
285289 }
286290
291 struct isal_dict dict_str;
292 stream.level = level;
293 isal_deflate_process_dict(&stream, &dict_str, dictbuf, dictfile_size);
294
287295 struct perf start;
288 BENCHMARK(&start, time,
289 deflate_perf(&stream, inbuf, infile_size, inbuf_size, outbuf, outbuf_size,
290 level, level_buf, level_size, hist_bits, dictbuf,
291 dictfile_size, NULL));
296 if (time > 0) {
297 BENCHMARK(&start, time,
298 deflate_perf(&stream, inbuf, infile_size, inbuf_size, outbuf,
299 outbuf_size, level, level_buf, level_size, hist_bits,
300 dictbuf, dictfile_size, &dict_str, NULL));
301 } else {
302 deflate_perf(&stream, inbuf, infile_size, inbuf_size, outbuf, outbuf_size,
303 level, level_buf, level_size, hist_bits, dictbuf,
304 dictfile_size, &dict_str, NULL);
305 }
292306 if (stream.avail_in != 0) {
293307 fprintf(stderr, "Could not compress all of inbuf\n");
294308 exit(0);
306320
307321 deflate_perf(&stream, inbuf, infile_size, inbuf_size, outbuf, outbuf_size,
308322 level, level_buf, level_size, hist_bits, dictbuf,
309 dictfile_size, &hufftables_custom);
323 dictfile_size, &dict_str, &hufftables_custom);
310324
311325 printf(" ratio_custom=%3.1f%%", 100.0 * stream.total_out / infile_size);
312326 }
8484 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8585 f_end_i_mem_offset equ 0 ; local variable (8 bytes)
8686 stack_size equ 8
87
88 [bits 64]
89 default rel
90 section .text
91
8792 ; void isal_deflate_finish ( isal_zstream *stream )
8893 ; arg 1: rcx: addr of stream
8994 global isal_deflate_finish_01
9095 isal_deflate_finish_01:
96 endbranch
9197 PUSH_ALL rbx, rsi, rdi, rbp, r12, r13, r14, r15
9298 sub rsp, stack_size
9399
146146 %define stack_size 16
147147 %define local_storage_offset 0
148148
149 %define func(x) x:
149 %define func(x) x: endbranch
150150 %macro FUNC_SAVE 0
151151 push rbp
152152 push r12
168168 %define VECT_SIZE 8
169169 %define HASH_BYTES 2
170170
171 [bits 64]
172 default rel
173 section .text
174
171175 global gen_icf_map_lh1_04
172176 func(gen_icf_map_lh1_04)
177 endbranch
173178 FUNC_SAVE
174179
175180 mov file_start, [stream + _next_in]
142142 add rsp, stack_size
143143 %endm
144144 %else
145 %define func(x) x:
145 %define func(x) x: endbranch
146146 %macro FUNC_SAVE 0
147147 push rbp
148148 push r12
159159 %define VECT_SIZE 16
160160 %define HASH_BYTES 2
161161
162 [bits 64]
163 default rel
164 section .text
165
162166 global gen_icf_map_lh1_06
163167 func(gen_icf_map_lh1_06)
168 endbranch
164169 FUNC_SAVE
165170
166171 mov file_start, [stream + _next_in]
154154 %define USE_HSWNI
155155 %endif
156156
157 [bits 64]
158 default rel
159 section .text
160
157161 ; void isal_deflate_icf_body <hashsize> <arch> ( isal_zstream *stream )
158162 ; we make 6 different versions of this function
159163 ; arg 1: rcx: addr of stream
160164 global isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH
161165 isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ :
166 endbranch
162167 %ifidn __OUTPUT_FORMAT__, elf64
163168 mov rcx, rdi
164169 %endif
9393
9494 %xdefine METHOD hash_hist
9595
96 [bits 64]
97 default rel
98 section .text
99
96100 ; void isal_deflate_icf_finish ( isal_zstream *stream )
97101 ; arg 1: rcx: addr of stream
98102 global isal_deflate_icf_finish_ %+ METHOD %+ _01
99103 isal_deflate_icf_finish_ %+ METHOD %+ _01:
104 endbranch
100105 PUSH_ALL rbx, rsi, rdi, rbp, r12, r13, r14, r15
101106 sub rsp, stack_size
102107
5252 #endif
5353
5454 extern int decode_huffman_code_block_stateless(struct inflate_state *, uint8_t * start_out);
55 extern struct isal_hufftables hufftables_default; /* For known header detection */
5556
5657 #define LARGE_SHORT_SYM_LEN 25
5758 #define LARGE_SHORT_SYM_MASK ((1 << LARGE_SHORT_SYM_LEN) - 1)
931932 }
932933 }
933934
935 static int header_matches_pregen(struct inflate_state *state)
936 {
937 #ifndef ISAL_STATIC_INFLATE_TABLE
938 return 0;
939 #else
940 uint8_t *in, *hdr;
941 uint32_t in_end_bits, hdr_end_bits;
942 uint32_t bytes_read_in, header_len, last_bits, last_bit_mask;
943 uint64_t bits_read_mask;
944 uint64_t hdr_stash, in_stash;
945 const uint64_t bits_read_prior = 3; // Have read bfinal(1) and btype(2)
946
947 /* Check if stashed read_in_bytes match header */
948 hdr = &(hufftables_default.deflate_hdr[0]);
949 bits_read_mask = (1ull << state->read_in_length) - 1;
950 hdr_stash = (load_u64(hdr) >> bits_read_prior) & bits_read_mask;
951 in_stash = state->read_in & bits_read_mask;
952
953 if (hdr_stash != in_stash)
954 return 0;
955
956 /* Check if input is byte aligned */
957 if ((state->read_in_length + bits_read_prior) % 8)
958 return 0;
959
960 /* Check if header bulk is the same */
961 in = state->next_in;
962 bytes_read_in = (state->read_in_length + bits_read_prior) / 8;
963 header_len = hufftables_default.deflate_hdr_count;
964
965 if (memcmp(in, &hdr[bytes_read_in], header_len - bytes_read_in))
966 return 0;
967
968 /* If there are any last/end bits to the header check them too */
969 last_bits = hufftables_default.deflate_hdr_extra_bits;
970 last_bit_mask = (1 << last_bits) - 1;
971
972 if (0 == last_bits) {
973 state->next_in += header_len - bytes_read_in;
974 state->avail_in -= header_len - bytes_read_in;
975 state->read_in_length = 0;
976 state->read_in = 0;
977 return 1;
978 }
979
980 in_end_bits = in[header_len - bytes_read_in] & last_bit_mask;
981 hdr_end_bits = hdr[header_len] & last_bit_mask;
982 if (in_end_bits == hdr_end_bits) {
983 state->next_in += header_len - bytes_read_in;
984 state->avail_in -= header_len - bytes_read_in;
985 state->read_in_length = 0;
986 state->read_in = 0;
987 inflate_in_read_bits(state, last_bits);
988 return 1;
989 }
990
991 return 0;
992 #endif // ISAL_STATIC_INFLATE_TABLE
993 }
994
995 static int setup_pregen_header(struct inflate_state *state)
996 {
997 #ifdef ISAL_STATIC_INFLATE_TABLE
998 memcpy(&state->lit_huff_code, &pregen_lit_huff_code, sizeof(pregen_lit_huff_code));
999 memcpy(&state->dist_huff_code, &pregen_dist_huff_code, sizeof(pregen_dist_huff_code));
1000 state->block_state = ISAL_BLOCK_CODED;
1001 #endif // ISAL_STATIC_INFLATE_TABLE
1002 return 0;
1003 }
1004
9341005 /* Sets the inflate_huff_codes in state to be the huffcodes corresponding to the
9351006 * deflate static header */
9361007 static int inline setup_static_header(struct inflate_state *state)
11881259 0x10, 0x11, 0x12, 0x00, 0x08, 0x07, 0x09, 0x06,
11891260 0x0a, 0x05, 0x0b, 0x04, 0x0c, 0x03, 0x0d, 0x02, 0x0e, 0x01, 0x0f
11901261 };
1262
1263 /* If you are given a whole header and it matches the pregen header */
1264 if (state->avail_in > (hufftables_default.deflate_hdr_count + sizeof(uint64_t))
1265 && header_matches_pregen(state))
1266 return setup_pregen_header(state);
11911267
11921268 if (state->bfinal && state->avail_in <= SINGLE_SYM_THRESH) {
11931269 multisym = SINGLE_SYM_FLAG;
17481824 state->write_overflow_len = 0;
17491825 state->copy_overflow_length = 0;
17501826 state->copy_overflow_distance = 0;
1827 state->wrapper_flag = 0;
17511828 state->tmp_in_size = 0;
17521829 state->tmp_out_processed = 0;
17531830 state->tmp_out_valid = 0;
17851862 }
17861863
17871864 static inline uint32_t buffer_header_copy(struct inflate_state *state, uint32_t in_len,
1788 uint8_t * buf, uint32_t buf_len, uint32_t buf_error)
1865 uint8_t * buf, uint32_t buffer_len, uint32_t offset,
1866 uint32_t buf_error)
17891867 {
17901868 uint32_t len = in_len;
1869 uint32_t buf_len = buffer_len - offset;
1870
17911871 if (len > state->avail_in)
17921872 len = state->avail_in;
17931873
17941874 if (buf != NULL && buf_len < len) {
1795 memcpy(buf, state->next_in, buf_len);
1875 memcpy(&buf[offset], state->next_in, buf_len);
17961876 state->next_in += buf_len;
17971877 state->avail_in -= buf_len;
17981878 state->count = in_len - buf_len;
17991879 return buf_error;
18001880 } else {
18011881 if (buf != NULL)
1802 memcpy(buf, state->next_in, len);
1882 memcpy(&buf[offset], state->next_in, len);
18031883 state->next_in += len;
18041884 state->avail_in -= len;
18051885 state->count = in_len - len;
18121892 }
18131893
18141894 static inline uint32_t string_header_copy(struct inflate_state *state,
1815 char *str_buf, uint32_t str_len, uint32_t str_error)
1816 {
1817 uint32_t len, max_len = str_len;
1895 char *str_buf, uint32_t str_len,
1896 uint32_t offset, uint32_t str_error)
1897 {
1898 uint32_t len, max_len = str_len - offset;
18181899
18191900 if (max_len > state->avail_in || str_buf == NULL)
18201901 max_len = state->avail_in;
18221903 len = strnlen((char *)state->next_in, max_len);
18231904
18241905 if (str_buf != NULL)
1825 memcpy(str_buf, state->next_in, len);
1906 memcpy(&str_buf[offset], state->next_in, len);
18261907
18271908 state->next_in += len;
18281909 state->avail_in -= len;
18291910 state->count += len;
18301911
1831 if (str_buf != NULL && len == str_len)
1912 if (str_buf != NULL && len == (str_len - offset))
18321913 return str_error;
18331914 else if (state->avail_in <= 0)
18341915 return ISAL_END_INPUT;
20012082 case ISAL_GZIP_EXTRA:
20022083 offset = gz_hdr->extra_len - count;
20032084 ret =
2004 buffer_header_copy(state, count, gz_hdr->extra + offset,
2005 gz_hdr->extra_buf_len - offset,
2006 ISAL_EXTRA_OVERFLOW);
2085 buffer_header_copy(state, count, gz_hdr->extra,
2086 gz_hdr->extra_buf_len,
2087 offset, ISAL_EXTRA_OVERFLOW);
20072088
20082089 if (ret) {
20092090 state->block_state = ISAL_GZIP_EXTRA;
20162097 if (flags & NAME_FLAG) {
20172098 case ISAL_GZIP_NAME:
20182099 offset = state->count;
2019 ret = string_header_copy(state, gz_hdr->name + offset,
2020 gz_hdr->name_buf_len - offset,
2021 ISAL_NAME_OVERFLOW);
2100 ret = string_header_copy(state, gz_hdr->name,
2101 gz_hdr->name_buf_len,
2102 offset, ISAL_NAME_OVERFLOW);
20222103 if (ret) {
20232104 state->block_state = ISAL_GZIP_NAME;
20242105 break;
20282109 if (flags & COMMENT_FLAG) {
20292110 case ISAL_GZIP_COMMENT:
20302111 offset = state->count;
2031 ret = string_header_copy(state, gz_hdr->comment + offset,
2032 gz_hdr->comment_buf_len - offset,
2033 ISAL_COMMENT_OVERFLOW);
2112 ret = string_header_copy(state, gz_hdr->comment,
2113 gz_hdr->comment_buf_len,
2114 offset, ISAL_COMMENT_OVERFLOW);
20342115 if (ret) {
20352116 state->block_state = ISAL_GZIP_COMMENT;
20362117 break;
21462227
21472228 if (state->crc_flag == IGZIP_GZIP) {
21482229 struct isal_gzip_header gz_hdr;
2230 isal_gzip_header_init(&gz_hdr);
21492231 ret = isal_read_gzip_header(state, &gz_hdr);
21502232 if (ret)
21512233 return ret;
21522234 } else if (state->crc_flag == IGZIP_ZLIB) {
2153 struct isal_zlib_header z_hdr;
2235 struct isal_zlib_header z_hdr = { 0 };
21542236 ret = isal_read_zlib_header(state, &z_hdr);
21552237 if (ret)
21562238 return ret;
22182300
22192301 if (!state->wrapper_flag && state->crc_flag == IGZIP_GZIP) {
22202302 struct isal_gzip_header gz_hdr;
2303 isal_gzip_header_init(&gz_hdr);
22212304 ret = isal_read_gzip_header(state, &gz_hdr);
22222305 if (ret < 0)
22232306 return ret;
22242307 else if (ret > 0)
22252308 return ISAL_DECOMP_OK;
22262309 } else if (!state->wrapper_flag && state->crc_flag == IGZIP_ZLIB) {
2227 struct isal_zlib_header z_hdr;
2310 struct isal_zlib_header z_hdr = { 0 };
22282311 ret = isal_read_zlib_header(state, &z_hdr);
22292312 if (ret < 0)
22302313 return ret;
10961096 uint32_t reset_test_flag = 0;
10971097 uint8_t tmp_symbol;
10981098 int no_mod = 0;
1099 struct isal_dict dict_str;
10991100
11001101 log_print("Starting Compress Multi Pass\n");
11011102
11461147 if (reset_test_flag)
11471148 isal_deflate_reset(stream);
11481149
1149 if (dict != NULL)
1150 isal_deflate_set_dict(stream, dict, dict_len);
1150 if (dict != NULL) {
1151 if (rand() % 2 == 0)
1152 isal_deflate_set_dict(stream, dict, dict_len);
1153 else {
1154 isal_deflate_process_dict(stream, &dict_str, dict, dict_len);
1155 isal_deflate_reset_dict(stream, &dict_str);
1156 }
1157 }
11511158
11521159 while (1) {
11531160 loop_count++;
12871294 uint8_t *level_buf = NULL;
12881295 struct isal_hufftables *huff_tmp;
12891296 uint32_t reset_test_flag = 0;
1297 struct isal_dict dict_str;
12901298
12911299 log_print("Starting Compress Single Pass\n");
12921300
13341342 if (reset_test_flag)
13351343 isal_deflate_reset(&stream);
13361344
1337 if (dict != NULL)
1338 isal_deflate_set_dict(&stream, dict, dict_len);
1345 if (dict != NULL) {
1346 if (rand() % 2 == 0)
1347 isal_deflate_set_dict(&stream, dict, dict_len);
1348 else {
1349 isal_deflate_process_dict(&stream, &dict_str, dict, dict_len);
1350 isal_deflate_reset_dict(&stream, &dict_str);
1351 }
1352 }
13391353
13401354 ret =
13411355 isal_deflate_with_checks(&stream, data_size, *compressed_size, data, data_size,
121121 add rsp, stack_size
122122 %endm
123123 %else
124 %define func(x) x:
124 %define func(x) x: endbranch
125125 %macro FUNC_SAVE 0
126126 push r12
127127 push r13
134134 %endif
135135 %define VECT_SIZE 8
136136
137 [bits 64]
138 default rel
139 section .text
140
137141 global set_long_icf_fg_04
138142 func(set_long_icf_fg_04)
143 endbranch
139144 FUNC_SAVE
140145
141146 lea end_in, [next_in + arg3]
128128 add rsp, stack_size
129129 %endm
130130 %else
131 %define func(x) x:
131 %define func(x) x: endbranch
132132 %macro FUNC_SAVE 0
133133 push r12
134134 push r13
141141 %endif
142142 %define VECT_SIZE 16
143143
144 [bits 64]
145 default rel
146 section .text
147
144148 global set_long_icf_fg_06
145149 func(set_long_icf_fg_06)
150 endbranch
146151 FUNC_SAVE
147152
148153 lea end_in, [next_in + arg3]
248248 cmovle %%dist_coded, %%dist
249249 %endm
250250
251 [bits 64]
252 default rel
253 section .text
254
251255 ; void isal_update_histogram
252256 global isal_update_histogram_ %+ ARCH
253257 isal_update_histogram_ %+ ARCH %+ :
258 endbranch
254259 FUNC_SAVE
255260
256261 %ifnidn file_start, arg0
5353 %define i r11
5454 %define tmp2 r12
5555
56 [bits 64]
57 default rel
58 section .text
59
5660 global build_huff_tree
5761 build_huff_tree:
62 endbranch
5863 %ifidn __OUTPUT_FORMAT__, win64
5964 push rsi
6065 push rdi
103108 align 32
104109 global build_heap
105110 build_heap:
111 endbranch
106112 %ifidn __OUTPUT_FORMAT__, win64
107113 push rsi
108114 push rdi
4444 ;; uint16_t len_start[32];
4545 ;; };
4646
47 global rfc1951_lookup_table:ISAL_SYM_TYPE_DATA_INTERNAL
47 mk_global rfc1951_lookup_table, data, internal
4848 rfc1951_lookup_table:
4949 len_to_code:
5050 db 0x00, 0x00, 0x00
13431343 };
13441344
13451345 #endif
1346 struct inflate_huff_code_large pregen_lit_huff_code = {
1347 .short_code_lookup = {
1348 0x24000102, 0x88010265, 0x44000103, 0xa8010277,
1349 0x24000102, 0x98010268, 0x78010220, 0xb80102e0,
1350 0x24000102, 0x88010273, 0x44000104, 0xb8010235,
1351 0x24000102, 0x74000108, 0x64000109, 0xc80102fd,
1352 0x24000102, 0x8801026f, 0x44000103, 0xb8010206,
1353 0x24000102, 0x98010270, 0x54000105, 0xc8010259,
1354 0x24000102, 0x9801020a, 0x44000104, 0xb8010249,
1355 0x24000102, 0xa8010230, 0x88010200, 0xb40000ad,
1356 0x24000102, 0x88010269, 0x44000103, 0x9400010d,
1357 0x24000102, 0x9801026d, 0xb8006520, 0xc8010223,
1358 0x24000102, 0x64000106, 0x44000104, 0xb801023f,
1359 0x24000102, 0xa8010204, 0x6400010a, 0xb4000087,
1360 0x24000102, 0x88010272, 0x44000103, 0xb8010210,
1361 0x24000102, 0x98010275, 0x54000105, 0xc80102a6,
1362 0x24000102, 0x98010263, 0x44000104, 0xb8010254,
1363 0x24000102, 0xa8010242, 0x88010261, 0xb40000d7,
1364 0x24000102, 0xc8006565, 0x44000103, 0xa80102ff,
1365 0x24000102, 0x9801026c, 0x98010320, 0xc8010211,
1366 0x24000102, 0xc8006573, 0x44000104, 0xb8010239,
1367 0x24000102, 0xa8010201, 0x64000109, 0xb4000017,
1368 0x24000102, 0xc800656f, 0x44000103, 0xb801020b,
1369 0x24000102, 0x98010274, 0x54000105, 0xc801027c,
1370 0x24000102, 0x9801022c, 0x44000104, 0xb801024f,
1371 0x24000102, 0xa8010232, 0xc8006500, 0xb40000c4,
1372 0x24000102, 0xc8006569, 0x44000103, 0xa4000111,
1373 0x24000102, 0x9801026e, 0x54000020, 0xc801023d,
1374 0x24000102, 0x64000106, 0x44000104, 0xb8010245,
1375 0x24000102, 0xa801022d, 0x6400010a, 0xb400009a,
1376 0x24000102, 0xc8006572, 0x44000103, 0xb8010228,
1377 0x24000102, 0x74000107, 0x54000105, 0xc80102e3,
1378 0x24000102, 0x98010264, 0x44000104, 0xb8010280,
1379 0x24000102, 0xa8010266, 0xc8006561, 0xb40000eb,
1380 0x24000102, 0xa8010365, 0x44000103, 0xa8010279,
1381 0x24000102, 0x74000068, 0x78010220, 0xb80102fe,
1382 0x24000102, 0xa8010373, 0x44000104, 0xb8010237,
1383 0x24000102, 0x74000108, 0x64000109, 0x36000008,
1384 0x24000102, 0xa801036f, 0x44000103, 0xb8010208,
1385 0x24000102, 0x74000070, 0x54000105, 0xc8010260,
1386 0x24000102, 0x7400000a, 0x44000104, 0xb801024d,
1387 0x24000102, 0xa8010231, 0xa8010300, 0xb40000b7,
1388 0x24000102, 0xa8010369, 0x44000103, 0x9400010f,
1389 0x24000102, 0x7400006d, 0xc8006820, 0xc801022a,
1390 0x24000102, 0x64000106, 0x44000104, 0xb8010243,
1391 0x24000102, 0xa801020d, 0x6400010a, 0xb4000092,
1392 0x24000102, 0xa8010372, 0x44000103, 0xb8010222,
1393 0x24000102, 0x74000075, 0x54000105, 0xc80102c1,
1394 0x24000102, 0x74000063, 0x44000104, 0xb8010276,
1395 0x24000102, 0xa8010262, 0xa8010361, 0xb40000df,
1396 0x24000102, 0x64000065, 0x44000103, 0x9400010b,
1397 0x24000102, 0x7400006c, 0xa8002020, 0xc8010218,
1398 0x24000102, 0x64000073, 0x44000104, 0xb801023b,
1399 0x24000102, 0xa8010203, 0x64000109, 0xb400007b,
1400 0x24000102, 0x6400006f, 0x44000103, 0xb801020e,
1401 0x24000102, 0x74000074, 0x54000105, 0xc801028f,
1402 0x24000102, 0x7400002c, 0x44000104, 0xb8010252,
1403 0x24000102, 0xa8010241, 0x64000000, 0xb40000cd,
1404 0x24000102, 0x64000069, 0x44000103, 0xb8010202,
1405 0x24000102, 0x7400006e, 0x54000020, 0xc8010255,
1406 0x24000102, 0x64000106, 0x44000104, 0xb8010247,
1407 0x24000102, 0xa801022e, 0x6400010a, 0xb40000a3,
1408 0x24000102, 0x64000072, 0x44000103, 0xb8010233,
1409 0x24000102, 0x74000107, 0x54000105, 0xc80102f3,
1410 0x24000102, 0x74000064, 0x44000104, 0xb80102c2,
1411 0x24000102, 0xa8010267, 0x64000061, 0xb40000f6,
1412 0x24000102, 0x88010265, 0x44000103, 0x84000077,
1413 0x24000102, 0xb8010368, 0x78010220, 0xb80102f0,
1414 0x24000102, 0x88010273, 0x44000104, 0xb8010236,
1415 0x24000102, 0x74000108, 0x64000109, 0xc400011d,
1416 0x24000102, 0x8801026f, 0x44000103, 0xb8010207,
1417 0x24000102, 0xb8010370, 0x54000105, 0xc801025c,
1418 0x24000102, 0xb801030a, 0x44000104, 0xb801024c,
1419 0x24000102, 0x84000030, 0x88010200, 0xb40000b2,
1420 0x24000102, 0x88010269, 0x44000103, 0x9400010e,
1421 0x24000102, 0xb801036d, 0xb8007320, 0xc8010225,
1422 0x24000102, 0x64000106, 0x44000104, 0xb8010240,
1423 0x24000102, 0x84000004, 0x6400010a, 0xb400008c,
1424 0x24000102, 0x88010272, 0x44000103, 0xb801021f,
1425 0x24000102, 0xb8010375, 0x54000105, 0xc80102b4,
1426 0x24000102, 0xb8010363, 0x44000104, 0xb801026b,
1427 0x24000102, 0x84000042, 0x88010261, 0xb40000db,
1428 0x24000102, 0x64000065, 0x44000103, 0x840000ff,
1429 0x24000102, 0xb801036c, 0x98010420, 0xc8010213,
1430 0x24000102, 0x64000073, 0x44000104, 0xb801023a,
1431 0x24000102, 0x84000001, 0x64000109, 0xb400001d,
1432 0x24000102, 0x6400006f, 0x44000103, 0xb801020c,
1433 0x24000102, 0xb8010374, 0x54000105, 0xc801027f,
1434 0x24000102, 0xb801032c, 0x44000104, 0xb8010250,
1435 0x24000102, 0x84000032, 0x64000000, 0xb40000c9,
1436 0x24000102, 0x64000069, 0x44000103, 0xa4000112,
1437 0x24000102, 0xb801036e, 0x54000020, 0xc801024b,
1438 0x24000102, 0x64000106, 0x44000104, 0xb8010246,
1439 0x24000102, 0x8400002d, 0x6400010a, 0xb400009e,
1440 0x24000102, 0x64000072, 0x44000103, 0xb8010229,
1441 0x24000102, 0x74000107, 0x54000105, 0xc80102e8,
1442 0x24000102, 0xb8010364, 0x44000104, 0xb80102c0,
1443 0x24000102, 0x84000066, 0x64000061, 0xb40000ef,
1444 0x24000102, 0xb8002065, 0x44000103, 0x84000079,
1445 0x24000102, 0x74000068, 0x78010220, 0xb4000115,
1446 0x24000102, 0xb8002073, 0x44000104, 0xb8010238,
1447 0x24000102, 0x74000108, 0x64000109, 0x36000018,
1448 0x24000102, 0xb800206f, 0x44000103, 0xb8010209,
1449 0x24000102, 0x74000070, 0x54000105, 0xc8010271,
1450 0x24000102, 0x7400000a, 0x44000104, 0xb801024e,
1451 0x24000102, 0x84000031, 0xb8002000, 0xb40000bb,
1452 0x24000102, 0xb8002069, 0x44000103, 0x94000110,
1453 0x24000102, 0x7400006d, 0xc8010820, 0xc801022f,
1454 0x24000102, 0x64000106, 0x44000104, 0xb8010244,
1455 0x24000102, 0x8400000d, 0x6400010a, 0xb4000096,
1456 0x24000102, 0xb8002072, 0x44000103, 0xb8010227,
1457 0x24000102, 0x74000075, 0x54000105, 0xc80102cf,
1458 0x24000102, 0x74000063, 0x44000104, 0xb8010278,
1459 0x24000102, 0x84000062, 0xb8002061, 0xb40000e5,
1460 0x24000102, 0x64000065, 0x44000103, 0x9400010c,
1461 0x24000102, 0x7400006c, 0xb8010920, 0xc801021e,
1462 0x24000102, 0x64000073, 0x44000104, 0xb801023e,
1463 0x24000102, 0x84000003, 0x64000109, 0xb4000083,
1464 0x24000102, 0x6400006f, 0x44000103, 0xb801020f,
1465 0x24000102, 0x74000074, 0x54000105, 0xc80102a0,
1466 0x24000102, 0x7400002c, 0x44000104, 0xb8010253,
1467 0x24000102, 0x84000041, 0x64000000, 0xb40000d3,
1468 0x24000102, 0x64000069, 0x44000103, 0xb8010205,
1469 0x24000102, 0x7400006e, 0x54000020, 0xc8010257,
1470 0x24000102, 0x64000106, 0x44000104, 0xb8010248,
1471 0x24000102, 0x8400002e, 0x6400010a, 0xb40000a9,
1472 0x24000102, 0x64000072, 0x44000103, 0xb8010234,
1473 0x24000102, 0x74000107, 0x54000105, 0xc80102f9,
1474 0x24000102, 0x74000064, 0x44000104, 0xb80102c3,
1475 0x24000102, 0x84000067, 0x64000061, 0x3e000120,
1476 0x24000102, 0x88010265, 0x44000103, 0xc8010377,
1477 0x24000102, 0x98010268, 0x78010220, 0x940000e0,
1478 0x24000102, 0x88010273, 0x44000104, 0x94000035,
1479 0x24000102, 0x74000108, 0x64000109, 0xc4000119,
1480 0x24000102, 0x8801026f, 0x44000103, 0x94000006,
1481 0x24000102, 0x98010270, 0x54000105, 0xc801025b,
1482 0x24000102, 0x9801020a, 0x44000104, 0x94000049,
1483 0x24000102, 0xc8010330, 0x88010200, 0xb40000af,
1484 0x24000102, 0x88010269, 0x44000103, 0x9400010d,
1485 0x24000102, 0x9801026d, 0xb8006f20, 0xc8010224,
1486 0x24000102, 0x64000106, 0x44000104, 0x9400003f,
1487 0x24000102, 0xc8010304, 0x6400010a, 0xb4000089,
1488 0x24000102, 0x88010272, 0x44000103, 0x94000010,
1489 0x24000102, 0x98010275, 0x54000105, 0xc80102b0,
1490 0x24000102, 0x98010263, 0x44000104, 0x94000054,
1491 0x24000102, 0xc8010342, 0x88010261, 0xb40000d9,
1492 0x24000102, 0xc8007365, 0x44000103, 0xc80103ff,
1493 0x24000102, 0x9801026c, 0x98010320, 0xc8010212,
1494 0x24000102, 0xc8007373, 0x44000104, 0x94000039,
1495 0x24000102, 0xc8010301, 0x64000109, 0xb400001a,
1496 0x24000102, 0xc800736f, 0x44000103, 0x9400000b,
1497 0x24000102, 0x98010274, 0x54000105, 0xc801027e,
1498 0x24000102, 0x9801022c, 0x44000104, 0x9400004f,
1499 0x24000102, 0xc8010332, 0xc8007300, 0xb40000c6,
1500 0x24000102, 0xc8007369, 0x44000103, 0xa4000113,
1501 0x24000102, 0x9801026e, 0x54000020, 0xc801024a,
1502 0x24000102, 0x64000106, 0x44000104, 0x94000045,
1503 0x24000102, 0xc801032d, 0x6400010a, 0xb400009c,
1504 0x24000102, 0xc8007372, 0x44000103, 0x94000028,
1505 0x24000102, 0x74000107, 0x54000105, 0xc80102e7,
1506 0x24000102, 0x98010264, 0x44000104, 0x94000080,
1507 0x24000102, 0xc8010366, 0xc8007361, 0xb40000ed,
1508 0x24000102, 0xa8010465, 0x44000103, 0xc8010379,
1509 0x24000102, 0x74000068, 0x78010220, 0x940000fe,
1510 0x24000102, 0xa8010473, 0x44000104, 0x94000037,
1511 0x24000102, 0x74000108, 0x64000109, 0x36000010,
1512 0x24000102, 0xa801046f, 0x44000103, 0x94000008,
1513 0x24000102, 0x74000070, 0x54000105, 0xc801026a,
1514 0x24000102, 0x7400000a, 0x44000104, 0x9400004d,
1515 0x24000102, 0xc8010331, 0xa8010400, 0xb40000b9,
1516 0x24000102, 0xa8010469, 0x44000103, 0x9400010f,
1517 0x24000102, 0x7400006d, 0xc8007020, 0xc801022b,
1518 0x24000102, 0x64000106, 0x44000104, 0x94000043,
1519 0x24000102, 0xc801030d, 0x6400010a, 0xb4000094,
1520 0x24000102, 0xa8010472, 0x44000103, 0x94000022,
1521 0x24000102, 0x74000075, 0x54000105, 0xc80102c7,
1522 0x24000102, 0x74000063, 0x44000104, 0x94000076,
1523 0x24000102, 0xc8010362, 0xa8010461, 0xb40000e2,
1524 0x24000102, 0x64000065, 0x44000103, 0x9400010b,
1525 0x24000102, 0x7400006c, 0xa8010520, 0xc801021c,
1526 0x24000102, 0x64000073, 0x44000104, 0x9400003b,
1527 0x24000102, 0xc8010303, 0x64000109, 0xb4000081,
1528 0x24000102, 0x6400006f, 0x44000103, 0x9400000e,
1529 0x24000102, 0x74000074, 0x54000105, 0xc8010290,
1530 0x24000102, 0x7400002c, 0x44000104, 0x94000052,
1531 0x24000102, 0xc8010341, 0x64000000, 0xb40000d1,
1532 0x24000102, 0x64000069, 0x44000103, 0x94000002,
1533 0x24000102, 0x7400006e, 0x54000020, 0xc8010256,
1534 0x24000102, 0x64000106, 0x44000104, 0x94000047,
1535 0x24000102, 0xc801032e, 0x6400010a, 0xb40000a7,
1536 0x24000102, 0x64000072, 0x44000103, 0x94000033,
1537 0x24000102, 0x74000107, 0x54000105, 0xc80102f8,
1538 0x24000102, 0x74000064, 0x44000104, 0x940000c2,
1539 0x24000102, 0xc8010367, 0x64000061, 0xb40000fa,
1540 0x24000102, 0x88010265, 0x44000103, 0x84000077,
1541 0x24000102, 0xc8002068, 0x78010220, 0x940000f0,
1542 0x24000102, 0x88010273, 0x44000104, 0x94000036,
1543 0x24000102, 0x74000108, 0x64000109, 0x36000000,
1544 0x24000102, 0x8801026f, 0x44000103, 0x94000007,
1545 0x24000102, 0xc8002070, 0x54000105, 0xc801025f,
1546 0x24000102, 0xc800200a, 0x44000104, 0x9400004c,
1547 0x24000102, 0x84000030, 0x88010200, 0xb40000b5,
1548 0x24000102, 0x88010269, 0x44000103, 0x9400010e,
1549 0x24000102, 0xc800206d, 0xc8000a20, 0xc8010226,
1550 0x24000102, 0x64000106, 0x44000104, 0x94000040,
1551 0x24000102, 0x84000004, 0x6400010a, 0xb400008e,
1552 0x24000102, 0x88010272, 0x44000103, 0x9400001f,
1553 0x24000102, 0xc8002075, 0x54000105, 0xc80102bd,
1554 0x24000102, 0xc8002063, 0x44000104, 0x9400006b,
1555 0x24000102, 0x84000042, 0x88010261, 0xb40000dd,
1556 0x24000102, 0x64000065, 0x44000103, 0x840000ff,
1557 0x24000102, 0xc800206c, 0x98010420, 0xc8010214,
1558 0x24000102, 0x64000073, 0x44000104, 0x9400003a,
1559 0x24000102, 0x84000001, 0x64000109, 0xb400005d,
1560 0x24000102, 0x6400006f, 0x44000103, 0x9400000c,
1561 0x24000102, 0xc8002074, 0x54000105, 0xc801028b,
1562 0x24000102, 0xc800202c, 0x44000104, 0x94000050,
1563 0x24000102, 0x84000032, 0x64000000, 0xb40000cb,
1564 0x24000102, 0x64000069, 0x44000103, 0xa4000114,
1565 0x24000102, 0xc800206e, 0x54000020, 0xc8010251,
1566 0x24000102, 0x64000106, 0x44000104, 0x94000046,
1567 0x24000102, 0x8400002d, 0x6400010a, 0xb40000a1,
1568 0x24000102, 0x64000072, 0x44000103, 0x94000029,
1569 0x24000102, 0x74000107, 0x54000105, 0xc80102f1,
1570 0x24000102, 0xc8002064, 0x44000104, 0x940000c0,
1571 0x24000102, 0x84000066, 0x64000061, 0xb40000f4,
1572 0x24000102, 0xc8010965, 0x44000103, 0x84000079,
1573 0x24000102, 0x74000068, 0x78010220, 0xb4000116,
1574 0x24000102, 0xc8010973, 0x44000104, 0x94000038,
1575 0x24000102, 0x74000108, 0x64000109, 0xb4000015,
1576 0x24000102, 0xc801096f, 0x44000103, 0x94000009,
1577 0x24000102, 0x74000070, 0x54000105, 0xc801027a,
1578 0x24000102, 0x7400000a, 0x44000104, 0x9400004e,
1579 0x24000102, 0x84000031, 0xc8010900, 0xb40000be,
1580 0x24000102, 0xc8010969, 0x44000103, 0x94000110,
1581 0x24000102, 0x7400006d, 0x54000020, 0xc801023c,
1582 0x24000102, 0x64000106, 0x44000104, 0x94000044,
1583 0x24000102, 0x8400000d, 0x6400010a, 0xb4000098,
1584 0x24000102, 0xc8010972, 0x44000103, 0x94000027,
1585 0x24000102, 0x74000075, 0x54000105, 0xc80102d0,
1586 0x24000102, 0x74000063, 0x44000104, 0x94000078,
1587 0x24000102, 0x84000062, 0xc8010961, 0xb40000e9,
1588 0x24000102, 0x64000065, 0x44000103, 0x9400010c,
1589 0x24000102, 0x7400006c, 0xb8000020, 0xc8010221,
1590 0x24000102, 0x64000073, 0x44000104, 0x9400003e,
1591 0x24000102, 0x84000003, 0x64000109, 0xb4000085,
1592 0x24000102, 0x6400006f, 0x44000103, 0x9400000f,
1593 0x24000102, 0x74000074, 0x54000105, 0xc80102a4,
1594 0x24000102, 0x7400002c, 0x44000104, 0x94000053,
1595 0x24000102, 0x84000041, 0x64000000, 0xb40000d5,
1596 0x24000102, 0x64000069, 0x44000103, 0x94000005,
1597 0x24000102, 0x7400006e, 0x54000020, 0xc8010258,
1598 0x24000102, 0x64000106, 0x44000104, 0x94000048,
1599 0x24000102, 0x8400002e, 0x6400010a, 0xb40000ab,
1600 0x24000102, 0x64000072, 0x44000103, 0x94000034,
1601 0x24000102, 0x74000107, 0x54000105, 0xc80102fc,
1602 0x24000102, 0x74000064, 0x44000104, 0x940000c3,
1603 0x24000102, 0x84000067, 0x64000061, 0x42000130,
1604 0x24000102, 0x88010265, 0x44000103, 0xa8010277,
1605 0x24000102, 0x98010268, 0x78010220, 0x940000e0,
1606 0x24000102, 0x88010273, 0x44000104, 0x94000035,
1607 0x24000102, 0x74000108, 0x64000109, 0xa40000fd,
1608 0x24000102, 0x8801026f, 0x44000103, 0x94000006,
1609 0x24000102, 0x98010270, 0x54000105, 0xa4000059,
1610 0x24000102, 0x9801020a, 0x44000104, 0x94000049,
1611 0x24000102, 0xa8010230, 0x88010200, 0xb40000ae,
1612 0x24000102, 0x88010269, 0x44000103, 0x9400010d,
1613 0x24000102, 0x9801026d, 0xb8006920, 0xa4000023,
1614 0x24000102, 0x64000106, 0x44000104, 0x9400003f,
1615 0x24000102, 0xa8010204, 0x6400010a, 0xb4000088,
1616 0x24000102, 0x88010272, 0x44000103, 0x94000010,
1617 0x24000102, 0x98010275, 0x54000105, 0xa40000a6,
1618 0x24000102, 0x98010263, 0x44000104, 0x94000054,
1619 0x24000102, 0xa8010242, 0x88010261, 0xb40000d8,
1620 0x24000102, 0xc8006f65, 0x44000103, 0xa80102ff,
1621 0x24000102, 0x9801026c, 0x98010320, 0xa4000011,
1622 0x24000102, 0xc8006f73, 0x44000104, 0x94000039,
1623 0x24000102, 0xa8010201, 0x64000109, 0xb4000019,
1624 0x24000102, 0xc8006f6f, 0x44000103, 0x9400000b,
1625 0x24000102, 0x98010274, 0x54000105, 0xa400007c,
1626 0x24000102, 0x9801022c, 0x44000104, 0x9400004f,
1627 0x24000102, 0xa8010232, 0xc8006f00, 0xb40000c5,
1628 0x24000102, 0xc8006f69, 0x44000103, 0xa4000111,
1629 0x24000102, 0x9801026e, 0x54000020, 0xa400003d,
1630 0x24000102, 0x64000106, 0x44000104, 0x94000045,
1631 0x24000102, 0xa801022d, 0x6400010a, 0xb400009b,
1632 0x24000102, 0xc8006f72, 0x44000103, 0x94000028,
1633 0x24000102, 0x74000107, 0x54000105, 0xa40000e3,
1634 0x24000102, 0x98010264, 0x44000104, 0x94000080,
1635 0x24000102, 0xa8010266, 0xc8006f61, 0xb40000ec,
1636 0x24000102, 0xa8010365, 0x44000103, 0xa8010279,
1637 0x24000102, 0x74000068, 0x78010220, 0x940000fe,
1638 0x24000102, 0xa8010373, 0x44000104, 0x94000037,
1639 0x24000102, 0x74000108, 0x64000109, 0x3600000a,
1640 0x24000102, 0xa801036f, 0x44000103, 0x94000008,
1641 0x24000102, 0x74000070, 0x54000105, 0xa4000060,
1642 0x24000102, 0x7400000a, 0x44000104, 0x9400004d,
1643 0x24000102, 0xa8010231, 0xa8010300, 0xb40000b8,
1644 0x24000102, 0xa8010369, 0x44000103, 0x9400010f,
1645 0x24000102, 0x7400006d, 0xc8006d20, 0xa400002a,
1646 0x24000102, 0x64000106, 0x44000104, 0x94000043,
1647 0x24000102, 0xa801020d, 0x6400010a, 0xb4000093,
1648 0x24000102, 0xa8010372, 0x44000103, 0x94000022,
1649 0x24000102, 0x74000075, 0x54000105, 0xa40000c1,
1650 0x24000102, 0x74000063, 0x44000104, 0x94000076,
1651 0x24000102, 0xa8010262, 0xa8010361, 0xb40000e1,
1652 0x24000102, 0x64000065, 0x44000103, 0x9400010b,
1653 0x24000102, 0x7400006c, 0xa8002020, 0xa4000018,
1654 0x24000102, 0x64000073, 0x44000104, 0x9400003b,
1655 0x24000102, 0xa8010203, 0x64000109, 0xb400007d,
1656 0x24000102, 0x6400006f, 0x44000103, 0x9400000e,
1657 0x24000102, 0x74000074, 0x54000105, 0xa400008f,
1658 0x24000102, 0x7400002c, 0x44000104, 0x94000052,
1659 0x24000102, 0xa8010241, 0x64000000, 0xb40000ce,
1660 0x24000102, 0x64000069, 0x44000103, 0x94000002,
1661 0x24000102, 0x7400006e, 0x54000020, 0xa4000055,
1662 0x24000102, 0x64000106, 0x44000104, 0x94000047,
1663 0x24000102, 0xa801022e, 0x6400010a, 0xb40000a5,
1664 0x24000102, 0x64000072, 0x44000103, 0x94000033,
1665 0x24000102, 0x74000107, 0x54000105, 0xa40000f3,
1666 0x24000102, 0x74000064, 0x44000104, 0x940000c2,
1667 0x24000102, 0xa8010267, 0x64000061, 0xb40000f7,
1668 0x24000102, 0x88010265, 0x44000103, 0x84000077,
1669 0x24000102, 0xb8010468, 0x78010220, 0x940000f0,
1670 0x24000102, 0x88010273, 0x44000104, 0x94000036,
1671 0x24000102, 0x74000108, 0x64000109, 0xc400011e,
1672 0x24000102, 0x8801026f, 0x44000103, 0x94000007,
1673 0x24000102, 0xb8010470, 0x54000105, 0xa400005c,
1674 0x24000102, 0xb801040a, 0x44000104, 0x9400004c,
1675 0x24000102, 0x84000030, 0x88010200, 0xb40000b3,
1676 0x24000102, 0x88010269, 0x44000103, 0x9400010e,
1677 0x24000102, 0xb801046d, 0xb8010620, 0xa4000025,
1678 0x24000102, 0x64000106, 0x44000104, 0x94000040,
1679 0x24000102, 0x84000004, 0x6400010a, 0xb400008d,
1680 0x24000102, 0x88010272, 0x44000103, 0x9400001f,
1681 0x24000102, 0xb8010475, 0x54000105, 0xa40000b4,
1682 0x24000102, 0xb8010463, 0x44000104, 0x9400006b,
1683 0x24000102, 0x84000042, 0x88010261, 0xb40000dc,
1684 0x24000102, 0x64000065, 0x44000103, 0x840000ff,
1685 0x24000102, 0xb801046c, 0x98010420, 0xa4000013,
1686 0x24000102, 0x64000073, 0x44000104, 0x9400003a,
1687 0x24000102, 0x84000001, 0x64000109, 0xb400005a,
1688 0x24000102, 0x6400006f, 0x44000103, 0x9400000c,
1689 0x24000102, 0xb8010474, 0x54000105, 0xa400007f,
1690 0x24000102, 0xb801042c, 0x44000104, 0x94000050,
1691 0x24000102, 0x84000032, 0x64000000, 0xb40000ca,
1692 0x24000102, 0x64000069, 0x44000103, 0xa4000112,
1693 0x24000102, 0xb801046e, 0x54000020, 0xa400004b,
1694 0x24000102, 0x64000106, 0x44000104, 0x94000046,
1695 0x24000102, 0x8400002d, 0x6400010a, 0xb400009f,
1696 0x24000102, 0x64000072, 0x44000103, 0x94000029,
1697 0x24000102, 0x74000107, 0x54000105, 0xa40000e8,
1698 0x24000102, 0xb8010464, 0x44000104, 0x940000c0,
1699 0x24000102, 0x84000066, 0x64000061, 0xb40000f2,
1700 0x24000102, 0xb8010565, 0x44000103, 0x84000079,
1701 0x24000102, 0x74000068, 0x78010220, 0xb4000117,
1702 0x24000102, 0xb8010573, 0x44000104, 0x94000038,
1703 0x24000102, 0x74000108, 0x64000109, 0x3600001a,
1704 0x24000102, 0xb801056f, 0x44000103, 0x94000009,
1705 0x24000102, 0x74000070, 0x54000105, 0xa4000071,
1706 0x24000102, 0x7400000a, 0x44000104, 0x9400004e,
1707 0x24000102, 0x84000031, 0xb8010500, 0xb40000bc,
1708 0x24000102, 0xb8010569, 0x44000103, 0x94000110,
1709 0x24000102, 0x7400006d, 0x54000020, 0xa400002f,
1710 0x24000102, 0x64000106, 0x44000104, 0x94000044,
1711 0x24000102, 0x8400000d, 0x6400010a, 0xb4000097,
1712 0x24000102, 0xb8010572, 0x44000103, 0x94000027,
1713 0x24000102, 0x74000075, 0x54000105, 0xa40000cf,
1714 0x24000102, 0x74000063, 0x44000104, 0x94000078,
1715 0x24000102, 0x84000062, 0xb8010561, 0xb40000e6,
1716 0x24000102, 0x64000065, 0x44000103, 0x9400010c,
1717 0x24000102, 0x7400006c, 0xb8010a20, 0xa400001e,
1718 0x24000102, 0x64000073, 0x44000104, 0x9400003e,
1719 0x24000102, 0x84000003, 0x64000109, 0xb4000084,
1720 0x24000102, 0x6400006f, 0x44000103, 0x9400000f,
1721 0x24000102, 0x74000074, 0x54000105, 0xa40000a0,
1722 0x24000102, 0x7400002c, 0x44000104, 0x94000053,
1723 0x24000102, 0x84000041, 0x64000000, 0xb40000d4,
1724 0x24000102, 0x64000069, 0x44000103, 0x94000005,
1725 0x24000102, 0x7400006e, 0x54000020, 0xa4000057,
1726 0x24000102, 0x64000106, 0x44000104, 0x94000048,
1727 0x24000102, 0x8400002e, 0x6400010a, 0xb40000aa,
1728 0x24000102, 0x64000072, 0x44000103, 0x94000034,
1729 0x24000102, 0x74000107, 0x54000105, 0xa40000f9,
1730 0x24000102, 0x74000064, 0x44000104, 0x940000c3,
1731 0x24000102, 0x84000067, 0x64000061, 0xb4000200,
1732 0x24000102, 0x88010265, 0x44000103, 0x84000077,
1733 0x24000102, 0x98010268, 0x78010220, 0x940000e0,
1734 0x24000102, 0x88010273, 0x44000104, 0x94000035,
1735 0x24000102, 0x74000108, 0x64000109, 0xc400011a,
1736 0x24000102, 0x8801026f, 0x44000103, 0x94000006,
1737 0x24000102, 0x98010270, 0x54000105, 0xa400005b,
1738 0x24000102, 0x9801020a, 0x44000104, 0x94000049,
1739 0x24000102, 0x84000030, 0x88010200, 0xb40000b1,
1740 0x24000102, 0x88010269, 0x44000103, 0x9400010d,
1741 0x24000102, 0x9801026d, 0xb8007220, 0xa4000024,
1742 0x24000102, 0x64000106, 0x44000104, 0x9400003f,
1743 0x24000102, 0x84000004, 0x6400010a, 0xb400008a,
1744 0x24000102, 0x88010272, 0x44000103, 0x94000010,
1745 0x24000102, 0x98010275, 0x54000105, 0xa40000b0,
1746 0x24000102, 0x98010263, 0x44000104, 0x94000054,
1747 0x24000102, 0x84000042, 0x88010261, 0xb40000da,
1748 0x24000102, 0x64000065, 0x44000103, 0x840000ff,
1749 0x24000102, 0x9801026c, 0x98010320, 0xa4000012,
1750 0x24000102, 0x64000073, 0x44000104, 0x94000039,
1751 0x24000102, 0x84000001, 0x64000109, 0xb400001b,
1752 0x24000102, 0x6400006f, 0x44000103, 0x9400000b,
1753 0x24000102, 0x98010274, 0x54000105, 0xa400007e,
1754 0x24000102, 0x9801022c, 0x44000104, 0x9400004f,
1755 0x24000102, 0x84000032, 0x64000000, 0xb40000c8,
1756 0x24000102, 0x64000069, 0x44000103, 0xa4000113,
1757 0x24000102, 0x9801026e, 0x54000020, 0xa400004a,
1758 0x24000102, 0x64000106, 0x44000104, 0x94000045,
1759 0x24000102, 0x8400002d, 0x6400010a, 0xb400009d,
1760 0x24000102, 0x64000072, 0x44000103, 0x94000028,
1761 0x24000102, 0x74000107, 0x54000105, 0xa40000e7,
1762 0x24000102, 0x98010264, 0x44000104, 0x94000080,
1763 0x24000102, 0x84000066, 0x64000061, 0xb40000ee,
1764 0x24000102, 0xa8010465, 0x44000103, 0x84000079,
1765 0x24000102, 0x74000068, 0x78010220, 0x940000fe,
1766 0x24000102, 0xa8010473, 0x44000104, 0x94000037,
1767 0x24000102, 0x74000108, 0x64000109, 0x36000012,
1768 0x24000102, 0xa801046f, 0x44000103, 0x94000008,
1769 0x24000102, 0x74000070, 0x54000105, 0xa400006a,
1770 0x24000102, 0x7400000a, 0x44000104, 0x9400004d,
1771 0x24000102, 0x84000031, 0xa8010400, 0xb40000ba,
1772 0x24000102, 0xa8010469, 0x44000103, 0x9400010f,
1773 0x24000102, 0x7400006d, 0xc8007520, 0xa400002b,
1774 0x24000102, 0x64000106, 0x44000104, 0x94000043,
1775 0x24000102, 0x8400000d, 0x6400010a, 0xb4000095,
1776 0x24000102, 0xa8010472, 0x44000103, 0x94000022,
1777 0x24000102, 0x74000075, 0x54000105, 0xa40000c7,
1778 0x24000102, 0x74000063, 0x44000104, 0x94000076,
1779 0x24000102, 0x84000062, 0xa8010461, 0xb40000e4,
1780 0x24000102, 0x64000065, 0x44000103, 0x9400010b,
1781 0x24000102, 0x7400006c, 0xa8010520, 0xa400001c,
1782 0x24000102, 0x64000073, 0x44000104, 0x9400003b,
1783 0x24000102, 0x84000003, 0x64000109, 0xb4000082,
1784 0x24000102, 0x6400006f, 0x44000103, 0x9400000e,
1785 0x24000102, 0x74000074, 0x54000105, 0xa4000090,
1786 0x24000102, 0x7400002c, 0x44000104, 0x94000052,
1787 0x24000102, 0x84000041, 0x64000000, 0xb40000d2,
1788 0x24000102, 0x64000069, 0x44000103, 0x94000002,
1789 0x24000102, 0x7400006e, 0x54000020, 0xa4000056,
1790 0x24000102, 0x64000106, 0x44000104, 0x94000047,
1791 0x24000102, 0x8400002e, 0x6400010a, 0xb40000a8,
1792 0x24000102, 0x64000072, 0x44000103, 0x94000033,
1793 0x24000102, 0x74000107, 0x54000105, 0xa40000f8,
1794 0x24000102, 0x74000064, 0x44000104, 0x940000c2,
1795 0x24000102, 0x84000067, 0x64000061, 0xb40000fb,
1796 0x24000102, 0x88010265, 0x44000103, 0x84000077,
1797 0x24000102, 0x74000068, 0x78010220, 0x940000f0,
1798 0x24000102, 0x88010273, 0x44000104, 0x94000036,
1799 0x24000102, 0x74000108, 0x64000109, 0x36000002,
1800 0x24000102, 0x8801026f, 0x44000103, 0x94000007,
1801 0x24000102, 0x74000070, 0x54000105, 0xa400005f,
1802 0x24000102, 0x7400000a, 0x44000104, 0x9400004c,
1803 0x24000102, 0x84000030, 0x88010200, 0xb40000b6,
1804 0x24000102, 0x88010269, 0x44000103, 0x9400010e,
1805 0x24000102, 0x7400006d, 0xc8006320, 0xa4000026,
1806 0x24000102, 0x64000106, 0x44000104, 0x94000040,
1807 0x24000102, 0x84000004, 0x6400010a, 0xb4000091,
1808 0x24000102, 0x88010272, 0x44000103, 0x9400001f,
1809 0x24000102, 0x74000075, 0x54000105, 0xa40000bd,
1810 0x24000102, 0x74000063, 0x44000104, 0x9400006b,
1811 0x24000102, 0x84000042, 0x88010261, 0xb40000de,
1812 0x24000102, 0x64000065, 0x44000103, 0x840000ff,
1813 0x24000102, 0x7400006c, 0x98010420, 0xa4000014,
1814 0x24000102, 0x64000073, 0x44000104, 0x9400003a,
1815 0x24000102, 0x84000001, 0x64000109, 0xb400005e,
1816 0x24000102, 0x6400006f, 0x44000103, 0x9400000c,
1817 0x24000102, 0x74000074, 0x54000105, 0xa400008b,
1818 0x24000102, 0x7400002c, 0x44000104, 0x94000050,
1819 0x24000102, 0x84000032, 0x64000000, 0xb40000cc,
1820 0x24000102, 0x64000069, 0x44000103, 0xa4000114,
1821 0x24000102, 0x7400006e, 0x54000020, 0xa4000051,
1822 0x24000102, 0x64000106, 0x44000104, 0x94000046,
1823 0x24000102, 0x8400002d, 0x6400010a, 0xb40000a2,
1824 0x24000102, 0x64000072, 0x44000103, 0x94000029,
1825 0x24000102, 0x74000107, 0x54000105, 0xa40000f1,
1826 0x24000102, 0x74000064, 0x44000104, 0x940000c0,
1827 0x24000102, 0x84000066, 0x64000061, 0xb40000f5,
1828 0x24000102, 0xc8000065, 0x44000103, 0x84000079,
1829 0x24000102, 0x74000068, 0x78010220, 0xb4000118,
1830 0x24000102, 0xc8000073, 0x44000104, 0x94000038,
1831 0x24000102, 0x74000108, 0x64000109, 0xb4000016,
1832 0x24000102, 0xc800006f, 0x44000103, 0x94000009,
1833 0x24000102, 0x74000070, 0x54000105, 0xa400007a,
1834 0x24000102, 0x7400000a, 0x44000104, 0x9400004e,
1835 0x24000102, 0x84000031, 0xc8000000, 0xb40000bf,
1836 0x24000102, 0xc8000069, 0x44000103, 0x94000110,
1837 0x24000102, 0x7400006d, 0x54000020, 0xa400003c,
1838 0x24000102, 0x64000106, 0x44000104, 0x94000044,
1839 0x24000102, 0x8400000d, 0x6400010a, 0xb4000099,
1840 0x24000102, 0xc8000072, 0x44000103, 0x94000027,
1841 0x24000102, 0x74000075, 0x54000105, 0xa40000d0,
1842 0x24000102, 0x74000063, 0x44000104, 0x94000078,
1843 0x24000102, 0x84000062, 0xc8000061, 0xb40000ea,
1844 0x24000102, 0x64000065, 0x44000103, 0x9400010c,
1845 0x24000102, 0x7400006c, 0xb8006120, 0xa4000021,
1846 0x24000102, 0x64000073, 0x44000104, 0x9400003e,
1847 0x24000102, 0x84000003, 0x64000109, 0xb4000086,
1848 0x24000102, 0x6400006f, 0x44000103, 0x9400000f,
1849 0x24000102, 0x74000074, 0x54000105, 0xa40000a4,
1850 0x24000102, 0x7400002c, 0x44000104, 0x94000053,
1851 0x24000102, 0x84000041, 0x64000000, 0xb40000d6,
1852 0x24000102, 0x64000069, 0x44000103, 0x94000005,
1853 0x24000102, 0x7400006e, 0x54000020, 0xa4000058,
1854 0x24000102, 0x64000106, 0x44000104, 0x94000048,
1855 0x24000102, 0x8400002e, 0x6400010a, 0xb40000ac,
1856 0x24000102, 0x64000072, 0x44000103, 0x94000034,
1857 0x24000102, 0x74000107, 0x54000105, 0xa40000fc,
1858 0x24000102, 0x74000064, 0x44000104, 0x940000c3,
1859 0x24000102, 0x84000067, 0x64000061, 0x46000140,
1860 0x24000102, 0x88010265, 0x44000103, 0xa8010277,
1861 0x24000102, 0x98010268, 0x78010220, 0xb80102e0,
1862 0x24000102, 0x88010273, 0x44000104, 0xb8010235,
1863 0x24000102, 0x74000108, 0x64000109, 0xa40000fd,
1864 0x24000102, 0x8801026f, 0x44000103, 0xb8010206,
1865 0x24000102, 0x98010270, 0x54000105, 0xa4000059,
1866 0x24000102, 0x9801020a, 0x44000104, 0xb8010249,
1867 0x24000102, 0xa8010230, 0x88010200, 0xb40000ad,
1868 0x24000102, 0x88010269, 0x44000103, 0x9400010d,
1869 0x24000102, 0x9801026d, 0xb8006520, 0xa4000023,
1870 0x24000102, 0x64000106, 0x44000104, 0xb801023f,
1871 0x24000102, 0xa8010204, 0x6400010a, 0xb4000087,
1872 0x24000102, 0x88010272, 0x44000103, 0xb8010210,
1873 0x24000102, 0x98010275, 0x54000105, 0xa40000a6,
1874 0x24000102, 0x98010263, 0x44000104, 0xb8010254,
1875 0x24000102, 0xa8010242, 0x88010261, 0xb40000d7,
1876 0x24000102, 0xc8006965, 0x44000103, 0xa80102ff,
1877 0x24000102, 0x9801026c, 0x98010320, 0xa4000011,
1878 0x24000102, 0xc8006973, 0x44000104, 0xb8010239,
1879 0x24000102, 0xa8010201, 0x64000109, 0xb4000017,
1880 0x24000102, 0xc800696f, 0x44000103, 0xb801020b,
1881 0x24000102, 0x98010274, 0x54000105, 0xa400007c,
1882 0x24000102, 0x9801022c, 0x44000104, 0xb801024f,
1883 0x24000102, 0xa8010232, 0xc8006900, 0xb40000c4,
1884 0x24000102, 0xc8006969, 0x44000103, 0xa4000111,
1885 0x24000102, 0x9801026e, 0x54000020, 0xa400003d,
1886 0x24000102, 0x64000106, 0x44000104, 0xb8010245,
1887 0x24000102, 0xa801022d, 0x6400010a, 0xb400009a,
1888 0x24000102, 0xc8006972, 0x44000103, 0xb8010228,
1889 0x24000102, 0x74000107, 0x54000105, 0xa40000e3,
1890 0x24000102, 0x98010264, 0x44000104, 0xb8010280,
1891 0x24000102, 0xa8010266, 0xc8006961, 0xb40000eb,
1892 0x24000102, 0xa8010365, 0x44000103, 0xa8010279,
1893 0x24000102, 0x74000068, 0x78010220, 0xb80102fe,
1894 0x24000102, 0xa8010373, 0x44000104, 0xb8010237,
1895 0x24000102, 0x74000108, 0x64000109, 0x3600000c,
1896 0x24000102, 0xa801036f, 0x44000103, 0xb8010208,
1897 0x24000102, 0x74000070, 0x54000105, 0xa4000060,
1898 0x24000102, 0x7400000a, 0x44000104, 0xb801024d,
1899 0x24000102, 0xa8010231, 0xa8010300, 0xb40000b7,
1900 0x24000102, 0xa8010369, 0x44000103, 0x9400010f,
1901 0x24000102, 0x7400006d, 0xc8006c20, 0xa400002a,
1902 0x24000102, 0x64000106, 0x44000104, 0xb8010243,
1903 0x24000102, 0xa801020d, 0x6400010a, 0xb4000092,
1904 0x24000102, 0xa8010372, 0x44000103, 0xb8010222,
1905 0x24000102, 0x74000075, 0x54000105, 0xa40000c1,
1906 0x24000102, 0x74000063, 0x44000104, 0xb8010276,
1907 0x24000102, 0xa8010262, 0xa8010361, 0xb40000df,
1908 0x24000102, 0x64000065, 0x44000103, 0x9400010b,
1909 0x24000102, 0x7400006c, 0xa8002020, 0xa4000018,
1910 0x24000102, 0x64000073, 0x44000104, 0xb801023b,
1911 0x24000102, 0xa8010203, 0x64000109, 0xb400007b,
1912 0x24000102, 0x6400006f, 0x44000103, 0xb801020e,
1913 0x24000102, 0x74000074, 0x54000105, 0xa400008f,
1914 0x24000102, 0x7400002c, 0x44000104, 0xb8010252,
1915 0x24000102, 0xa8010241, 0x64000000, 0xb40000cd,
1916 0x24000102, 0x64000069, 0x44000103, 0xb8010202,
1917 0x24000102, 0x7400006e, 0x54000020, 0xa4000055,
1918 0x24000102, 0x64000106, 0x44000104, 0xb8010247,
1919 0x24000102, 0xa801022e, 0x6400010a, 0xb40000a3,
1920 0x24000102, 0x64000072, 0x44000103, 0xb8010233,
1921 0x24000102, 0x74000107, 0x54000105, 0xa40000f3,
1922 0x24000102, 0x74000064, 0x44000104, 0xb80102c2,
1923 0x24000102, 0xa8010267, 0x64000061, 0xb40000f6,
1924 0x24000102, 0x88010265, 0x44000103, 0x84000077,
1925 0x24000102, 0xb8010368, 0x78010220, 0xb80102f0,
1926 0x24000102, 0x88010273, 0x44000104, 0xb8010236,
1927 0x24000102, 0x74000108, 0x64000109, 0xc400011f,
1928 0x24000102, 0x8801026f, 0x44000103, 0xb8010207,
1929 0x24000102, 0xb8010370, 0x54000105, 0xa400005c,
1930 0x24000102, 0xb801030a, 0x44000104, 0xb801024c,
1931 0x24000102, 0x84000030, 0x88010200, 0xb40000b2,
1932 0x24000102, 0x88010269, 0x44000103, 0x9400010e,
1933 0x24000102, 0xb801036d, 0xb8007320, 0xa4000025,
1934 0x24000102, 0x64000106, 0x44000104, 0xb8010240,
1935 0x24000102, 0x84000004, 0x6400010a, 0xb400008c,
1936 0x24000102, 0x88010272, 0x44000103, 0xb801021f,
1937 0x24000102, 0xb8010375, 0x54000105, 0xa40000b4,
1938 0x24000102, 0xb8010363, 0x44000104, 0xb801026b,
1939 0x24000102, 0x84000042, 0x88010261, 0xb40000db,
1940 0x24000102, 0x64000065, 0x44000103, 0x840000ff,
1941 0x24000102, 0xb801036c, 0x98010420, 0xa4000013,
1942 0x24000102, 0x64000073, 0x44000104, 0xb801023a,
1943 0x24000102, 0x84000001, 0x64000109, 0xb400001d,
1944 0x24000102, 0x6400006f, 0x44000103, 0xb801020c,
1945 0x24000102, 0xb8010374, 0x54000105, 0xa400007f,
1946 0x24000102, 0xb801032c, 0x44000104, 0xb8010250,
1947 0x24000102, 0x84000032, 0x64000000, 0xb40000c9,
1948 0x24000102, 0x64000069, 0x44000103, 0xa4000112,
1949 0x24000102, 0xb801036e, 0x54000020, 0xa400004b,
1950 0x24000102, 0x64000106, 0x44000104, 0xb8010246,
1951 0x24000102, 0x8400002d, 0x6400010a, 0xb400009e,
1952 0x24000102, 0x64000072, 0x44000103, 0xb8010229,
1953 0x24000102, 0x74000107, 0x54000105, 0xa40000e8,
1954 0x24000102, 0xb8010364, 0x44000104, 0xb80102c0,
1955 0x24000102, 0x84000066, 0x64000061, 0xb40000ef,
1956 0x24000102, 0xb8002065, 0x44000103, 0x84000079,
1957 0x24000102, 0x74000068, 0x78010220, 0xb4000115,
1958 0x24000102, 0xb8002073, 0x44000104, 0xb8010238,
1959 0x24000102, 0x74000108, 0x64000109, 0x3600001c,
1960 0x24000102, 0xb800206f, 0x44000103, 0xb8010209,
1961 0x24000102, 0x74000070, 0x54000105, 0xa4000071,
1962 0x24000102, 0x7400000a, 0x44000104, 0xb801024e,
1963 0x24000102, 0x84000031, 0xb8002000, 0xb40000bb,
1964 0x24000102, 0xb8002069, 0x44000103, 0x94000110,
1965 0x24000102, 0x7400006d, 0x54000020, 0xa400002f,
1966 0x24000102, 0x64000106, 0x44000104, 0xb8010244,
1967 0x24000102, 0x8400000d, 0x6400010a, 0xb4000096,
1968 0x24000102, 0xb8002072, 0x44000103, 0xb8010227,
1969 0x24000102, 0x74000075, 0x54000105, 0xa40000cf,
1970 0x24000102, 0x74000063, 0x44000104, 0xb8010278,
1971 0x24000102, 0x84000062, 0xb8002061, 0xb40000e5,
1972 0x24000102, 0x64000065, 0x44000103, 0x9400010c,
1973 0x24000102, 0x7400006c, 0xb8010920, 0xa400001e,
1974 0x24000102, 0x64000073, 0x44000104, 0xb801023e,
1975 0x24000102, 0x84000003, 0x64000109, 0xb4000083,
1976 0x24000102, 0x6400006f, 0x44000103, 0xb801020f,
1977 0x24000102, 0x74000074, 0x54000105, 0xa40000a0,
1978 0x24000102, 0x7400002c, 0x44000104, 0xb8010253,
1979 0x24000102, 0x84000041, 0x64000000, 0xb40000d3,
1980 0x24000102, 0x64000069, 0x44000103, 0xb8010205,
1981 0x24000102, 0x7400006e, 0x54000020, 0xa4000057,
1982 0x24000102, 0x64000106, 0x44000104, 0xb8010248,
1983 0x24000102, 0x8400002e, 0x6400010a, 0xb40000a9,
1984 0x24000102, 0x64000072, 0x44000103, 0xb8010234,
1985 0x24000102, 0x74000107, 0x54000105, 0xa40000f9,
1986 0x24000102, 0x74000064, 0x44000104, 0xb80102c3,
1987 0x24000102, 0x84000067, 0x64000061, 0x3e000128,
1988 0x24000102, 0x88010265, 0x44000103, 0xc8010477,
1989 0x24000102, 0x98010268, 0x78010220, 0x940000e0,
1990 0x24000102, 0x88010273, 0x44000104, 0x94000035,
1991 0x24000102, 0x74000108, 0x64000109, 0xc400011b,
1992 0x24000102, 0x8801026f, 0x44000103, 0x94000006,
1993 0x24000102, 0x98010270, 0x54000105, 0xa400005b,
1994 0x24000102, 0x9801020a, 0x44000104, 0x94000049,
1995 0x24000102, 0xc8010430, 0x88010200, 0xb40000af,
1996 0x24000102, 0x88010269, 0x44000103, 0x9400010d,
1997 0x24000102, 0x9801026d, 0xb8006f20, 0xa4000024,
1998 0x24000102, 0x64000106, 0x44000104, 0x9400003f,
1999 0x24000102, 0xc8010404, 0x6400010a, 0xb4000089,
2000 0x24000102, 0x88010272, 0x44000103, 0x94000010,
2001 0x24000102, 0x98010275, 0x54000105, 0xa40000b0,
2002 0x24000102, 0x98010263, 0x44000104, 0x94000054,
2003 0x24000102, 0xc8010442, 0x88010261, 0xb40000d9,
2004 0x24000102, 0xc8010665, 0x44000103, 0xc80104ff,
2005 0x24000102, 0x9801026c, 0x98010320, 0xa4000012,
2006 0x24000102, 0xc8010673, 0x44000104, 0x94000039,
2007 0x24000102, 0xc8010401, 0x64000109, 0xb400001a,
2008 0x24000102, 0xc801066f, 0x44000103, 0x9400000b,
2009 0x24000102, 0x98010274, 0x54000105, 0xa400007e,
2010 0x24000102, 0x9801022c, 0x44000104, 0x9400004f,
2011 0x24000102, 0xc8010432, 0xc8010600, 0xb40000c6,
2012 0x24000102, 0xc8010669, 0x44000103, 0xa4000113,
2013 0x24000102, 0x9801026e, 0x54000020, 0xa400004a,
2014 0x24000102, 0x64000106, 0x44000104, 0x94000045,
2015 0x24000102, 0xc801042d, 0x6400010a, 0xb400009c,
2016 0x24000102, 0xc8010672, 0x44000103, 0x94000028,
2017 0x24000102, 0x74000107, 0x54000105, 0xa40000e7,
2018 0x24000102, 0x98010264, 0x44000104, 0x94000080,
2019 0x24000102, 0xc8010466, 0xc8010661, 0xb40000ed,
2020 0x24000102, 0xa8010465, 0x44000103, 0xc8010479,
2021 0x24000102, 0x74000068, 0x78010220, 0x940000fe,
2022 0x24000102, 0xa8010473, 0x44000104, 0x94000037,
2023 0x24000102, 0x74000108, 0x64000109, 0x36000014,
2024 0x24000102, 0xa801046f, 0x44000103, 0x94000008,
2025 0x24000102, 0x74000070, 0x54000105, 0xa400006a,
2026 0x24000102, 0x7400000a, 0x44000104, 0x9400004d,
2027 0x24000102, 0xc8010431, 0xa8010400, 0xb40000b9,
2028 0x24000102, 0xa8010469, 0x44000103, 0x9400010f,
2029 0x24000102, 0x7400006d, 0xc8007420, 0xa400002b,
2030 0x24000102, 0x64000106, 0x44000104, 0x94000043,
2031 0x24000102, 0xc801040d, 0x6400010a, 0xb4000094,
2032 0x24000102, 0xa8010472, 0x44000103, 0x94000022,
2033 0x24000102, 0x74000075, 0x54000105, 0xa40000c7,
2034 0x24000102, 0x74000063, 0x44000104, 0x94000076,
2035 0x24000102, 0xc8010462, 0xa8010461, 0xb40000e2,
2036 0x24000102, 0x64000065, 0x44000103, 0x9400010b,
2037 0x24000102, 0x7400006c, 0xa8010520, 0xa400001c,
2038 0x24000102, 0x64000073, 0x44000104, 0x9400003b,
2039 0x24000102, 0xc8010403, 0x64000109, 0xb4000081,
2040 0x24000102, 0x6400006f, 0x44000103, 0x9400000e,
2041 0x24000102, 0x74000074, 0x54000105, 0xa4000090,
2042 0x24000102, 0x7400002c, 0x44000104, 0x94000052,
2043 0x24000102, 0xc8010441, 0x64000000, 0xb40000d1,
2044 0x24000102, 0x64000069, 0x44000103, 0x94000002,
2045 0x24000102, 0x7400006e, 0x54000020, 0xa4000056,
2046 0x24000102, 0x64000106, 0x44000104, 0x94000047,
2047 0x24000102, 0xc801042e, 0x6400010a, 0xb40000a7,
2048 0x24000102, 0x64000072, 0x44000103, 0x94000033,
2049 0x24000102, 0x74000107, 0x54000105, 0xa40000f8,
2050 0x24000102, 0x74000064, 0x44000104, 0x940000c2,
2051 0x24000102, 0xc8010467, 0x64000061, 0xb40000fa,
2052 0x24000102, 0x88010265, 0x44000103, 0x84000077,
2053 0x24000102, 0xc8010568, 0x78010220, 0x940000f0,
2054 0x24000102, 0x88010273, 0x44000104, 0x94000036,
2055 0x24000102, 0x74000108, 0x64000109, 0x36000004,
2056 0x24000102, 0x8801026f, 0x44000103, 0x94000007,
2057 0x24000102, 0xc8010570, 0x54000105, 0xa400005f,
2058 0x24000102, 0xc801050a, 0x44000104, 0x9400004c,
2059 0x24000102, 0x84000030, 0x88010200, 0xb40000b5,
2060 0x24000102, 0x88010269, 0x44000103, 0x9400010e,
2061 0x24000102, 0xc801056d, 0xc8002c20, 0xa4000026,
2062 0x24000102, 0x64000106, 0x44000104, 0x94000040,
2063 0x24000102, 0x84000004, 0x6400010a, 0xb400008e,
2064 0x24000102, 0x88010272, 0x44000103, 0x9400001f,
2065 0x24000102, 0xc8010575, 0x54000105, 0xa40000bd,
2066 0x24000102, 0xc8010563, 0x44000104, 0x9400006b,
2067 0x24000102, 0x84000042, 0x88010261, 0xb40000dd,
2068 0x24000102, 0x64000065, 0x44000103, 0x840000ff,
2069 0x24000102, 0xc801056c, 0x98010420, 0xa4000014,
2070 0x24000102, 0x64000073, 0x44000104, 0x9400003a,
2071 0x24000102, 0x84000001, 0x64000109, 0xb400005d,
2072 0x24000102, 0x6400006f, 0x44000103, 0x9400000c,
2073 0x24000102, 0xc8010574, 0x54000105, 0xa400008b,
2074 0x24000102, 0xc801052c, 0x44000104, 0x94000050,
2075 0x24000102, 0x84000032, 0x64000000, 0xb40000cb,
2076 0x24000102, 0x64000069, 0x44000103, 0xa4000114,
2077 0x24000102, 0xc801056e, 0x54000020, 0xa4000051,
2078 0x24000102, 0x64000106, 0x44000104, 0x94000046,
2079 0x24000102, 0x8400002d, 0x6400010a, 0xb40000a1,
2080 0x24000102, 0x64000072, 0x44000103, 0x94000029,
2081 0x24000102, 0x74000107, 0x54000105, 0xa40000f1,
2082 0x24000102, 0xc8010564, 0x44000104, 0x940000c0,
2083 0x24000102, 0x84000066, 0x64000061, 0xb40000f4,
2084 0x24000102, 0xc8010a65, 0x44000103, 0x84000079,
2085 0x24000102, 0x74000068, 0x78010220, 0xb4000116,
2086 0x24000102, 0xc8010a73, 0x44000104, 0x94000038,
2087 0x24000102, 0x74000108, 0x64000109, 0xb4000015,
2088 0x24000102, 0xc8010a6f, 0x44000103, 0x94000009,
2089 0x24000102, 0x74000070, 0x54000105, 0xa400007a,
2090 0x24000102, 0x7400000a, 0x44000104, 0x9400004e,
2091 0x24000102, 0x84000031, 0xc8010a00, 0xb40000be,
2092 0x24000102, 0xc8010a69, 0x44000103, 0x94000110,
2093 0x24000102, 0x7400006d, 0x54000020, 0xa400003c,
2094 0x24000102, 0x64000106, 0x44000104, 0x94000044,
2095 0x24000102, 0x8400000d, 0x6400010a, 0xb4000098,
2096 0x24000102, 0xc8010a72, 0x44000103, 0x94000027,
2097 0x24000102, 0x74000075, 0x54000105, 0xa40000d0,
2098 0x24000102, 0x74000063, 0x44000104, 0x94000078,
2099 0x24000102, 0x84000062, 0xc8010a61, 0xb40000e9,
2100 0x24000102, 0x64000065, 0x44000103, 0x9400010c,
2101 0x24000102, 0x7400006c, 0xb8000020, 0xa4000021,
2102 0x24000102, 0x64000073, 0x44000104, 0x9400003e,
2103 0x24000102, 0x84000003, 0x64000109, 0xb4000085,
2104 0x24000102, 0x6400006f, 0x44000103, 0x9400000f,
2105 0x24000102, 0x74000074, 0x54000105, 0xa40000a4,
2106 0x24000102, 0x7400002c, 0x44000104, 0x94000053,
2107 0x24000102, 0x84000041, 0x64000000, 0xb40000d5,
2108 0x24000102, 0x64000069, 0x44000103, 0x94000005,
2109 0x24000102, 0x7400006e, 0x54000020, 0xa4000058,
2110 0x24000102, 0x64000106, 0x44000104, 0x94000048,
2111 0x24000102, 0x8400002e, 0x6400010a, 0xb40000ab,
2112 0x24000102, 0x64000072, 0x44000103, 0x94000034,
2113 0x24000102, 0x74000107, 0x54000105, 0xa40000fc,
2114 0x24000102, 0x74000064, 0x44000104, 0x940000c3,
2115 0x24000102, 0x84000067, 0x64000061, 0x46000160,
2116 0x24000102, 0x88010265, 0x44000103, 0xa8010277,
2117 0x24000102, 0x98010268, 0x78010220, 0x940000e0,
2118 0x24000102, 0x88010273, 0x44000104, 0x94000035,
2119 0x24000102, 0x74000108, 0x64000109, 0xa40000fd,
2120 0x24000102, 0x8801026f, 0x44000103, 0x94000006,
2121 0x24000102, 0x98010270, 0x54000105, 0xa4000059,
2122 0x24000102, 0x9801020a, 0x44000104, 0x94000049,
2123 0x24000102, 0xa8010230, 0x88010200, 0xb40000ae,
2124 0x24000102, 0x88010269, 0x44000103, 0x9400010d,
2125 0x24000102, 0x9801026d, 0xb8006920, 0xa4000023,
2126 0x24000102, 0x64000106, 0x44000104, 0x9400003f,
2127 0x24000102, 0xa8010204, 0x6400010a, 0xb4000088,
2128 0x24000102, 0x88010272, 0x44000103, 0x94000010,
2129 0x24000102, 0x98010275, 0x54000105, 0xa40000a6,
2130 0x24000102, 0x98010263, 0x44000104, 0x94000054,
2131 0x24000102, 0xa8010242, 0x88010261, 0xb40000d8,
2132 0x24000102, 0xc8007265, 0x44000103, 0xa80102ff,
2133 0x24000102, 0x9801026c, 0x98010320, 0xa4000011,
2134 0x24000102, 0xc8007273, 0x44000104, 0x94000039,
2135 0x24000102, 0xa8010201, 0x64000109, 0xb4000019,
2136 0x24000102, 0xc800726f, 0x44000103, 0x9400000b,
2137 0x24000102, 0x98010274, 0x54000105, 0xa400007c,
2138 0x24000102, 0x9801022c, 0x44000104, 0x9400004f,
2139 0x24000102, 0xa8010232, 0xc8007200, 0xb40000c5,
2140 0x24000102, 0xc8007269, 0x44000103, 0xa4000111,
2141 0x24000102, 0x9801026e, 0x54000020, 0xa400003d,
2142 0x24000102, 0x64000106, 0x44000104, 0x94000045,
2143 0x24000102, 0xa801022d, 0x6400010a, 0xb400009b,
2144 0x24000102, 0xc8007272, 0x44000103, 0x94000028,
2145 0x24000102, 0x74000107, 0x54000105, 0xa40000e3,
2146 0x24000102, 0x98010264, 0x44000104, 0x94000080,
2147 0x24000102, 0xa8010266, 0xc8007261, 0xb40000ec,
2148 0x24000102, 0xa8010365, 0x44000103, 0xa8010279,
2149 0x24000102, 0x74000068, 0x78010220, 0x940000fe,
2150 0x24000102, 0xa8010373, 0x44000104, 0x94000037,
2151 0x24000102, 0x74000108, 0x64000109, 0x3600000e,
2152 0x24000102, 0xa801036f, 0x44000103, 0x94000008,
2153 0x24000102, 0x74000070, 0x54000105, 0xa4000060,
2154 0x24000102, 0x7400000a, 0x44000104, 0x9400004d,
2155 0x24000102, 0xa8010231, 0xa8010300, 0xb40000b8,
2156 0x24000102, 0xa8010369, 0x44000103, 0x9400010f,
2157 0x24000102, 0x7400006d, 0xc8006e20, 0xa400002a,
2158 0x24000102, 0x64000106, 0x44000104, 0x94000043,
2159 0x24000102, 0xa801020d, 0x6400010a, 0xb4000093,
2160 0x24000102, 0xa8010372, 0x44000103, 0x94000022,
2161 0x24000102, 0x74000075, 0x54000105, 0xa40000c1,
2162 0x24000102, 0x74000063, 0x44000104, 0x94000076,
2163 0x24000102, 0xa8010262, 0xa8010361, 0xb40000e1,
2164 0x24000102, 0x64000065, 0x44000103, 0x9400010b,
2165 0x24000102, 0x7400006c, 0xa8002020, 0xa4000018,
2166 0x24000102, 0x64000073, 0x44000104, 0x9400003b,
2167 0x24000102, 0xa8010203, 0x64000109, 0xb400007d,
2168 0x24000102, 0x6400006f, 0x44000103, 0x9400000e,
2169 0x24000102, 0x74000074, 0x54000105, 0xa400008f,
2170 0x24000102, 0x7400002c, 0x44000104, 0x94000052,
2171 0x24000102, 0xa8010241, 0x64000000, 0xb40000ce,
2172 0x24000102, 0x64000069, 0x44000103, 0x94000002,
2173 0x24000102, 0x7400006e, 0x54000020, 0xa4000055,
2174 0x24000102, 0x64000106, 0x44000104, 0x94000047,
2175 0x24000102, 0xa801022e, 0x6400010a, 0xb40000a5,
2176 0x24000102, 0x64000072, 0x44000103, 0x94000033,
2177 0x24000102, 0x74000107, 0x54000105, 0xa40000f3,
2178 0x24000102, 0x74000064, 0x44000104, 0x940000c2,
2179 0x24000102, 0xa8010267, 0x64000061, 0xb40000f7,
2180 0x24000102, 0x88010265, 0x44000103, 0x84000077,
2181 0x24000102, 0xb8010468, 0x78010220, 0x940000f0,
2182 0x24000102, 0x88010273, 0x44000104, 0x94000036,
2183 0x24000102, 0x74000108, 0x64000109, 0xc4000120,
2184 0x24000102, 0x8801026f, 0x44000103, 0x94000007,
2185 0x24000102, 0xb8010470, 0x54000105, 0xa400005c,
2186 0x24000102, 0xb801040a, 0x44000104, 0x9400004c,
2187 0x24000102, 0x84000030, 0x88010200, 0xb40000b3,
2188 0x24000102, 0x88010269, 0x44000103, 0x9400010e,
2189 0x24000102, 0xb801046d, 0xb8010620, 0xa4000025,
2190 0x24000102, 0x64000106, 0x44000104, 0x94000040,
2191 0x24000102, 0x84000004, 0x6400010a, 0xb400008d,
2192 0x24000102, 0x88010272, 0x44000103, 0x9400001f,
2193 0x24000102, 0xb8010475, 0x54000105, 0xa40000b4,
2194 0x24000102, 0xb8010463, 0x44000104, 0x9400006b,
2195 0x24000102, 0x84000042, 0x88010261, 0xb40000dc,
2196 0x24000102, 0x64000065, 0x44000103, 0x840000ff,
2197 0x24000102, 0xb801046c, 0x98010420, 0xa4000013,
2198 0x24000102, 0x64000073, 0x44000104, 0x9400003a,
2199 0x24000102, 0x84000001, 0x64000109, 0xb400005a,
2200 0x24000102, 0x6400006f, 0x44000103, 0x9400000c,
2201 0x24000102, 0xb8010474, 0x54000105, 0xa400007f,
2202 0x24000102, 0xb801042c, 0x44000104, 0x94000050,
2203 0x24000102, 0x84000032, 0x64000000, 0xb40000ca,
2204 0x24000102, 0x64000069, 0x44000103, 0xa4000112,
2205 0x24000102, 0xb801046e, 0x54000020, 0xa400004b,
2206 0x24000102, 0x64000106, 0x44000104, 0x94000046,
2207 0x24000102, 0x8400002d, 0x6400010a, 0xb400009f,
2208 0x24000102, 0x64000072, 0x44000103, 0x94000029,
2209 0x24000102, 0x74000107, 0x54000105, 0xa40000e8,
2210 0x24000102, 0xb8010464, 0x44000104, 0x940000c0,
2211 0x24000102, 0x84000066, 0x64000061, 0xb40000f2,
2212 0x24000102, 0xb8010565, 0x44000103, 0x84000079,
2213 0x24000102, 0x74000068, 0x78010220, 0xb4000117,
2214 0x24000102, 0xb8010573, 0x44000104, 0x94000038,
2215 0x24000102, 0x74000108, 0x64000109, 0x3600001e,
2216 0x24000102, 0xb801056f, 0x44000103, 0x94000009,
2217 0x24000102, 0x74000070, 0x54000105, 0xa4000071,
2218 0x24000102, 0x7400000a, 0x44000104, 0x9400004e,
2219 0x24000102, 0x84000031, 0xb8010500, 0xb40000bc,
2220 0x24000102, 0xb8010569, 0x44000103, 0x94000110,
2221 0x24000102, 0x7400006d, 0x54000020, 0xa400002f,
2222 0x24000102, 0x64000106, 0x44000104, 0x94000044,
2223 0x24000102, 0x8400000d, 0x6400010a, 0xb4000097,
2224 0x24000102, 0xb8010572, 0x44000103, 0x94000027,
2225 0x24000102, 0x74000075, 0x54000105, 0xa40000cf,
2226 0x24000102, 0x74000063, 0x44000104, 0x94000078,
2227 0x24000102, 0x84000062, 0xb8010561, 0xb40000e6,
2228 0x24000102, 0x64000065, 0x44000103, 0x9400010c,
2229 0x24000102, 0x7400006c, 0xb8010a20, 0xa400001e,
2230 0x24000102, 0x64000073, 0x44000104, 0x9400003e,
2231 0x24000102, 0x84000003, 0x64000109, 0xb4000084,
2232 0x24000102, 0x6400006f, 0x44000103, 0x9400000f,
2233 0x24000102, 0x74000074, 0x54000105, 0xa40000a0,
2234 0x24000102, 0x7400002c, 0x44000104, 0x94000053,
2235 0x24000102, 0x84000041, 0x64000000, 0xb40000d4,
2236 0x24000102, 0x64000069, 0x44000103, 0x94000005,
2237 0x24000102, 0x7400006e, 0x54000020, 0xa4000057,
2238 0x24000102, 0x64000106, 0x44000104, 0x94000048,
2239 0x24000102, 0x8400002e, 0x6400010a, 0xb40000aa,
2240 0x24000102, 0x64000072, 0x44000103, 0x94000034,
2241 0x24000102, 0x74000107, 0x54000105, 0xa40000f9,
2242 0x24000102, 0x74000064, 0x44000104, 0x940000c3,
2243 0x24000102, 0x84000067, 0x64000061, 0xb4000200,
2244 0x24000102, 0x88010265, 0x44000103, 0x84000077,
2245 0x24000102, 0x98010268, 0x78010220, 0x940000e0,
2246 0x24000102, 0x88010273, 0x44000104, 0x94000035,
2247 0x24000102, 0x74000108, 0x64000109, 0xc400011c,
2248 0x24000102, 0x8801026f, 0x44000103, 0x94000006,
2249 0x24000102, 0x98010270, 0x54000105, 0xa400005b,
2250 0x24000102, 0x9801020a, 0x44000104, 0x94000049,
2251 0x24000102, 0x84000030, 0x88010200, 0xb40000b1,
2252 0x24000102, 0x88010269, 0x44000103, 0x9400010d,
2253 0x24000102, 0x9801026d, 0xb8007220, 0xa4000024,
2254 0x24000102, 0x64000106, 0x44000104, 0x9400003f,
2255 0x24000102, 0x84000004, 0x6400010a, 0xb400008a,
2256 0x24000102, 0x88010272, 0x44000103, 0x94000010,
2257 0x24000102, 0x98010275, 0x54000105, 0xa40000b0,
2258 0x24000102, 0x98010263, 0x44000104, 0x94000054,
2259 0x24000102, 0x84000042, 0x88010261, 0xb40000da,
2260 0x24000102, 0x64000065, 0x44000103, 0x840000ff,
2261 0x24000102, 0x9801026c, 0x98010320, 0xa4000012,
2262 0x24000102, 0x64000073, 0x44000104, 0x94000039,
2263 0x24000102, 0x84000001, 0x64000109, 0xb400001b,
2264 0x24000102, 0x6400006f, 0x44000103, 0x9400000b,
2265 0x24000102, 0x98010274, 0x54000105, 0xa400007e,
2266 0x24000102, 0x9801022c, 0x44000104, 0x9400004f,
2267 0x24000102, 0x84000032, 0x64000000, 0xb40000c8,
2268 0x24000102, 0x64000069, 0x44000103, 0xa4000113,
2269 0x24000102, 0x9801026e, 0x54000020, 0xa400004a,
2270 0x24000102, 0x64000106, 0x44000104, 0x94000045,
2271 0x24000102, 0x8400002d, 0x6400010a, 0xb400009d,
2272 0x24000102, 0x64000072, 0x44000103, 0x94000028,
2273 0x24000102, 0x74000107, 0x54000105, 0xa40000e7,
2274 0x24000102, 0x98010264, 0x44000104, 0x94000080,
2275 0x24000102, 0x84000066, 0x64000061, 0xb40000ee,
2276 0x24000102, 0xa8010465, 0x44000103, 0x84000079,
2277 0x24000102, 0x74000068, 0x78010220, 0x940000fe,
2278 0x24000102, 0xa8010473, 0x44000104, 0x94000037,
2279 0x24000102, 0x74000108, 0x64000109, 0x36000016,
2280 0x24000102, 0xa801046f, 0x44000103, 0x94000008,
2281 0x24000102, 0x74000070, 0x54000105, 0xa400006a,
2282 0x24000102, 0x7400000a, 0x44000104, 0x9400004d,
2283 0x24000102, 0x84000031, 0xa8010400, 0xb40000ba,
2284 0x24000102, 0xa8010469, 0x44000103, 0x9400010f,
2285 0x24000102, 0x7400006d, 0xc8010720, 0xa400002b,
2286 0x24000102, 0x64000106, 0x44000104, 0x94000043,
2287 0x24000102, 0x8400000d, 0x6400010a, 0xb4000095,
2288 0x24000102, 0xa8010472, 0x44000103, 0x94000022,
2289 0x24000102, 0x74000075, 0x54000105, 0xa40000c7,
2290 0x24000102, 0x74000063, 0x44000104, 0x94000076,
2291 0x24000102, 0x84000062, 0xa8010461, 0xb40000e4,
2292 0x24000102, 0x64000065, 0x44000103, 0x9400010b,
2293 0x24000102, 0x7400006c, 0xa8010520, 0xa400001c,
2294 0x24000102, 0x64000073, 0x44000104, 0x9400003b,
2295 0x24000102, 0x84000003, 0x64000109, 0xb4000082,
2296 0x24000102, 0x6400006f, 0x44000103, 0x9400000e,
2297 0x24000102, 0x74000074, 0x54000105, 0xa4000090,
2298 0x24000102, 0x7400002c, 0x44000104, 0x94000052,
2299 0x24000102, 0x84000041, 0x64000000, 0xb40000d2,
2300 0x24000102, 0x64000069, 0x44000103, 0x94000002,
2301 0x24000102, 0x7400006e, 0x54000020, 0xa4000056,
2302 0x24000102, 0x64000106, 0x44000104, 0x94000047,
2303 0x24000102, 0x8400002e, 0x6400010a, 0xb40000a8,
2304 0x24000102, 0x64000072, 0x44000103, 0x94000033,
2305 0x24000102, 0x74000107, 0x54000105, 0xa40000f8,
2306 0x24000102, 0x74000064, 0x44000104, 0x940000c2,
2307 0x24000102, 0x84000067, 0x64000061, 0xb40000fb,
2308 0x24000102, 0x88010265, 0x44000103, 0x84000077,
2309 0x24000102, 0x74000068, 0x78010220, 0x940000f0,
2310 0x24000102, 0x88010273, 0x44000104, 0x94000036,
2311 0x24000102, 0x74000108, 0x64000109, 0x36000006,
2312 0x24000102, 0x8801026f, 0x44000103, 0x94000007,
2313 0x24000102, 0x74000070, 0x54000105, 0xa400005f,
2314 0x24000102, 0x7400000a, 0x44000104, 0x9400004c,
2315 0x24000102, 0x84000030, 0x88010200, 0xb40000b6,
2316 0x24000102, 0x88010269, 0x44000103, 0x9400010e,
2317 0x24000102, 0x7400006d, 0xc8006420, 0xa4000026,
2318 0x24000102, 0x64000106, 0x44000104, 0x94000040,
2319 0x24000102, 0x84000004, 0x6400010a, 0xb4000091,
2320 0x24000102, 0x88010272, 0x44000103, 0x9400001f,
2321 0x24000102, 0x74000075, 0x54000105, 0xa40000bd,
2322 0x24000102, 0x74000063, 0x44000104, 0x9400006b,
2323 0x24000102, 0x84000042, 0x88010261, 0xb40000de,
2324 0x24000102, 0x64000065, 0x44000103, 0x840000ff,
2325 0x24000102, 0x7400006c, 0x98010420, 0xa4000014,
2326 0x24000102, 0x64000073, 0x44000104, 0x9400003a,
2327 0x24000102, 0x84000001, 0x64000109, 0xb400005e,
2328 0x24000102, 0x6400006f, 0x44000103, 0x9400000c,
2329 0x24000102, 0x74000074, 0x54000105, 0xa400008b,
2330 0x24000102, 0x7400002c, 0x44000104, 0x94000050,
2331 0x24000102, 0x84000032, 0x64000000, 0xb40000cc,
2332 0x24000102, 0x64000069, 0x44000103, 0xa4000114,
2333 0x24000102, 0x7400006e, 0x54000020, 0xa4000051,
2334 0x24000102, 0x64000106, 0x44000104, 0x94000046,
2335 0x24000102, 0x8400002d, 0x6400010a, 0xb40000a2,
2336 0x24000102, 0x64000072, 0x44000103, 0x94000029,
2337 0x24000102, 0x74000107, 0x54000105, 0xa40000f1,
2338 0x24000102, 0x74000064, 0x44000104, 0x940000c0,
2339 0x24000102, 0x84000066, 0x64000061, 0xb40000f5,
2340 0x24000102, 0xc8006165, 0x44000103, 0x84000079,
2341 0x24000102, 0x74000068, 0x78010220, 0xb4000118,
2342 0x24000102, 0xc8006173, 0x44000104, 0x94000038,
2343 0x24000102, 0x74000108, 0x64000109, 0xb4000016,
2344 0x24000102, 0xc800616f, 0x44000103, 0x94000009,
2345 0x24000102, 0x74000070, 0x54000105, 0xa400007a,
2346 0x24000102, 0x7400000a, 0x44000104, 0x9400004e,
2347 0x24000102, 0x84000031, 0xc8006100, 0xb40000bf,
2348 0x24000102, 0xc8006169, 0x44000103, 0x94000110,
2349 0x24000102, 0x7400006d, 0x54000020, 0xa400003c,
2350 0x24000102, 0x64000106, 0x44000104, 0x94000044,
2351 0x24000102, 0x8400000d, 0x6400010a, 0xb4000099,
2352 0x24000102, 0xc8006172, 0x44000103, 0x94000027,
2353 0x24000102, 0x74000075, 0x54000105, 0xa40000d0,
2354 0x24000102, 0x74000063, 0x44000104, 0x94000078,
2355 0x24000102, 0x84000062, 0xc8006161, 0xb40000ea,
2356 0x24000102, 0x64000065, 0x44000103, 0x9400010c,
2357 0x24000102, 0x7400006c, 0xb8006120, 0xa4000021,
2358 0x24000102, 0x64000073, 0x44000104, 0x9400003e,
2359 0x24000102, 0x84000003, 0x64000109, 0xb4000086,
2360 0x24000102, 0x6400006f, 0x44000103, 0x9400000f,
2361 0x24000102, 0x74000074, 0x54000105, 0xa40000a4,
2362 0x24000102, 0x7400002c, 0x44000104, 0x94000053,
2363 0x24000102, 0x84000041, 0x64000000, 0xb40000d6,
2364 0x24000102, 0x64000069, 0x44000103, 0x94000005,
2365 0x24000102, 0x7400006e, 0x54000020, 0xa4000058,
2366 0x24000102, 0x64000106, 0x44000104, 0x94000048,
2367 0x24000102, 0x8400002e, 0x6400010a, 0xb40000ac,
2368 0x24000102, 0x64000072, 0x44000103, 0x94000034,
2369 0x24000102, 0x74000107, 0x54000105, 0xa40000fc,
2370 0x24000102, 0x74000064, 0x44000104, 0x940000c3,
2371 0x24000102, 0x84000067, 0x64000061, 0x52000020 },
2372
2373 .long_code_lookup = {
2374 0x3521, 0x3525, 0x3522, 0x3526, 0x3523, 0x3527, 0x3524, 0x3528,
2375 0x3529, 0x352d, 0x352a, 0x352e, 0x352b, 0x352f, 0x352c, 0x3530,
2376 0x3531, 0x3535, 0x3532, 0x3536, 0x3533, 0x3537, 0x3534, 0x3538,
2377 0x3539, 0x353d, 0x353a, 0x353e, 0x353b, 0x353f, 0x353c, 0x3540,
2378 0x49a1, 0x3d00, 0x49a2, 0x51c1, 0x49a3, 0x3d01, 0x49a4, 0x51e1,
2379 0x49a5, 0x3d00, 0x49a6, 0x51c2, 0x49a7, 0x3d01, 0x49a8, 0x51e2,
2380 0x49a9, 0x3d00, 0x49aa, 0x51c3, 0x49ab, 0x3d01, 0x49ac, 0x51e3,
2381 0x49ad, 0x3d00, 0x49ae, 0x51c4, 0x49af, 0x3d01, 0x49b0, 0x51e4,
2382 0x49b1, 0x3d00, 0x49b2, 0x51c5, 0x49b3, 0x3d01, 0x49b4, 0x51e5,
2383 0x49b5, 0x3d00, 0x49b6, 0x51c6, 0x49b7, 0x3d01, 0x49b8, 0x51e6,
2384 0x49b9, 0x3d00, 0x49ba, 0x51c7, 0x49bb, 0x3d01, 0x49bc, 0x51e7,
2385 0x49bd, 0x3d00, 0x49be, 0x51c8, 0x49bf, 0x3d01, 0x49c0, 0x51e8,
2386 0x49a1, 0x3d00, 0x49a2, 0x51c9, 0x49a3, 0x3d01, 0x49a4, 0x51e9,
2387 0x49a5, 0x3d00, 0x49a6, 0x51ca, 0x49a7, 0x3d01, 0x49a8, 0x51ea,
2388 0x49a9, 0x3d00, 0x49aa, 0x51cb, 0x49ab, 0x3d01, 0x49ac, 0x51eb,
2389 0x49ad, 0x3d00, 0x49ae, 0x51cc, 0x49af, 0x3d01, 0x49b0, 0x51ec,
2390 0x49b1, 0x3d00, 0x49b2, 0x51cd, 0x49b3, 0x3d01, 0x49b4, 0x51ed,
2391 0x49b5, 0x3d00, 0x49b6, 0x51ce, 0x49b7, 0x3d01, 0x49b8, 0x51ee,
2392 0x49b9, 0x3d00, 0x49ba, 0x51cf, 0x49bb, 0x3d01, 0x49bc, 0x51ef,
2393 0x49bd, 0x3d00, 0x49be, 0x51d0, 0x49bf, 0x3d01, 0x49c0, 0x51f0,
2394 0x49a1, 0x3d00, 0x49a2, 0x51d1, 0x49a3, 0x3d01, 0x49a4, 0x51f1,
2395 0x49a5, 0x3d00, 0x49a6, 0x51d2, 0x49a7, 0x3d01, 0x49a8, 0x51f2,
2396 0x49a9, 0x3d00, 0x49aa, 0x51d3, 0x49ab, 0x3d01, 0x49ac, 0x51f3,
2397 0x49ad, 0x3d00, 0x49ae, 0x51d4, 0x49af, 0x3d01, 0x49b0, 0x51f4,
2398 0x49b1, 0x3d00, 0x49b2, 0x51d5, 0x49b3, 0x3d01, 0x49b4, 0x51f5,
2399 0x49b5, 0x3d00, 0x49b6, 0x51d6, 0x49b7, 0x3d01, 0x49b8, 0x51f6,
2400 0x49b9, 0x3d00, 0x49ba, 0x51d7, 0x49bb, 0x3d01, 0x49bc, 0x51f7,
2401 0x49bd, 0x3d00, 0x49be, 0x51d8, 0x49bf, 0x3d01, 0x49c0, 0x51f8,
2402 0x49a1, 0x3d00, 0x49a2, 0x51d9, 0x49a3, 0x3d01, 0x49a4, 0x51f9,
2403 0x49a5, 0x3d00, 0x49a6, 0x51da, 0x49a7, 0x3d01, 0x49a8, 0x51fa,
2404 0x49a9, 0x3d00, 0x49aa, 0x51db, 0x49ab, 0x3d01, 0x49ac, 0x51fb,
2405 0x49ad, 0x3d00, 0x49ae, 0x51dc, 0x49af, 0x3d01, 0x49b0, 0x51fc,
2406 0x49b1, 0x3d00, 0x49b2, 0x51dd, 0x49b3, 0x3d01, 0x49b4, 0x51fd,
2407 0x49b5, 0x3d00, 0x49b6, 0x51de, 0x49b7, 0x3d01, 0x49b8, 0x51fe,
2408 0x49b9, 0x3d00, 0x49ba, 0x51df, 0x49bb, 0x3d01, 0x49bc, 0x51ff,
2409 0x49bd, 0x3d00, 0x49be, 0x51e0, 0x49bf, 0x3d01, 0x49c0, 0x5200,
2410 0x3d41, 0x3d43, 0x3d45, 0x3d47, 0x3d49, 0x3d4b, 0x3d4d, 0x3d4f,
2411 0x3d42, 0x3d44, 0x3d46, 0x3d48, 0x3d4a, 0x3d4c, 0x3d4e, 0x3d50,
2412 0x4151, 0x4152, 0x4153, 0x4154, 0x4155, 0x4156, 0x4157, 0x4158,
2413 0x4159, 0x415a, 0x415b, 0x415c, 0x415d, 0x415e, 0x415f, 0x4160,
2414 0x4561, 0x4571, 0x4562, 0x4572, 0x4563, 0x4573, 0x4564, 0x4574,
2415 0x4565, 0x4575, 0x4566, 0x4576, 0x4567, 0x4577, 0x4568, 0x4578,
2416 0x4569, 0x4579, 0x456a, 0x457a, 0x456b, 0x457b, 0x456c, 0x457c,
2417 0x456d, 0x457d, 0x456e, 0x457e, 0x456f, 0x457f, 0x4570, 0x4580,
2418 0x4581, 0x4582, 0x4583, 0x4584, 0x4585, 0x4586, 0x4587, 0x4588,
2419 0x4589, 0x458a, 0x458b, 0x458c, 0x458d, 0x458e, 0x458f, 0x4590,
2420 0x4591, 0x4592, 0x4593, 0x4594, 0x4595, 0x4596, 0x4597, 0x4598,
2421 0x4599, 0x459a, 0x459b, 0x459c, 0x459d, 0x459e, 0x459f, 0x45a0,
2422 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2423 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2424 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2425 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2426 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2427 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2428 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2429 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2430 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2431 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2432 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2433 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2434 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2435 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2436 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2437 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2438 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2439 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2440 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2441 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2442 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2443 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2444 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2445 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2446 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2447 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2448 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2449 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2450 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2451 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2452 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2453 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2454 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2455 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2456 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2457 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2458 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2459 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2460 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2461 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2462 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2463 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2464 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2465 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2466 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2467 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2468 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2469 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2470 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2471 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2472 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2473 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2474 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2475 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2476 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2477 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2478 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2479 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2480 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2481 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2482 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2483 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2484 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2485 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2486 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2487 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2488 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2489 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2490 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2491 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2492 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2493 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2494 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2495 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2496 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2497 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2498 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2499 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2500 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2501 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2502 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2503 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2504 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2505 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2506 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2507 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2508 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2509 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2510 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2511 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2512 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2513 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2514 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2515 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2516 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2517 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2518 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2519 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2520 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2521 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2522 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2523 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2524 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2525 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2526 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2527 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2528 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2529 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2530 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2531 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }
2532 };
2533
2534 struct inflate_huff_code_small pregen_dist_huff_code = {
2535 .short_code_lookup = {
2536 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
2537 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069,
2538 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047,
2539 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3825,
2540 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
2541 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd,
2542 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068,
2543 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4000,
2544 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
2545 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069,
2546 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047,
2547 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3846,
2548 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
2549 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd,
2550 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068,
2551 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4024,
2552 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
2553 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069,
2554 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047,
2555 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3825,
2556 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
2557 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd,
2558 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068,
2559 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4002,
2560 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
2561 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069,
2562 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047,
2563 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3846,
2564 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
2565 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd,
2566 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068,
2567 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4801,
2568 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
2569 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069,
2570 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047,
2571 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3825,
2572 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
2573 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd,
2574 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068,
2575 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4000,
2576 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
2577 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069,
2578 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047,
2579 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3846,
2580 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
2581 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd,
2582 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068,
2583 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4024,
2584 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
2585 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069,
2586 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047,
2587 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3825,
2588 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
2589 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd,
2590 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068,
2591 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4002,
2592 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
2593 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069,
2594 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047,
2595 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3846,
2596 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
2597 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd,
2598 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068,
2599 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4803,
2600 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
2601 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069,
2602 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047,
2603 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3825,
2604 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
2605 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd,
2606 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068,
2607 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4000,
2608 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
2609 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069,
2610 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047,
2611 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3846,
2612 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
2613 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd,
2614 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068,
2615 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4024,
2616 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
2617 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069,
2618 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047,
2619 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3825,
2620 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
2621 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd,
2622 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068,
2623 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4002,
2624 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
2625 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069,
2626 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047,
2627 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3846,
2628 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
2629 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd,
2630 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068,
2631 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4801,
2632 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
2633 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069,
2634 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047,
2635 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3825,
2636 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
2637 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd,
2638 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068,
2639 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4000,
2640 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
2641 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069,
2642 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047,
2643 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3846,
2644 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
2645 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd,
2646 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068,
2647 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4024,
2648 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
2649 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069,
2650 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047,
2651 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3825,
2652 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
2653 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd,
2654 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068,
2655 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4002,
2656 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
2657 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069,
2658 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047,
2659 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3846,
2660 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
2661 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd,
2662 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068,
2663 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4803 },
2664
2665 .long_code_lookup = {
2666 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2667 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2668 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2669 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2670 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2671 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2672 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2673 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2674 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2675 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }
2676 };
2677
00 /**********************************************************************
1 Copyright(c) 2019 Arm Corporation All rights reserved.
1 Copyright(c) 2020 Arm Corporation All rights reserved.
22
33 Redistribution and use in source and binary forms, with or without
44 modification, are permitted provided that the following conditions
216216 _func_entry; \
217217 })
218218
219 /**
220 * Micro-Architector definitions
221 * Reference: https://developer.arm.com/docs/ddi0595/f/aarch64-system-registers/midr_el1
222 */
223
224 #define CPU_IMPLEMENTER_RESERVE 0x00
225 #define CPU_IMPLEMENTER_ARM 0x41
226
227
228 #define CPU_PART_CORTEX_A57 0xD07
229 #define CPU_PART_CORTEX_A72 0xD08
230 #define CPU_PART_NEOVERSE_N1 0xD0C
231
232 #define MICRO_ARCH_ID(imp,part) \
233 (((CPU_IMPLEMENTER_##imp&0xff)<<24)|((CPU_PART_##part&0xfff)<<4))
234
235 #ifndef HWCAP_CPUID
236 #define HWCAP_CPUID (1<<11)
237 #endif
238
239 /**
240 * @brief get_micro_arch_id
241 *
242 * read micro-architector register instruction if possible.This function
243 * provides microarchitecture information and make microarchitecture optimization
244 * possible.
245 *
246 * Read system registers(MRS) is forbidden in userspace. If executed, it
247 * will raise illegal instruction error. Kernel provides a solution for
248 * this issue. The solution depends on HWCAP_CPUID flags. Reference(1)
249 * describes how to use it. It provides a "illegal insstruction" handler
250 * in kernel space, the handler will execute MRS and return the correct
251 * value to userspace.
252 *
253 * To avoid too many kernel trap, this function MUST be only called in
254 * dispatcher. And HWCAP must be match,That will make sure there are no
255 * illegal instruction errors. HWCAP_CPUID should be available to get the
256 * best performance.
257 *
258 * NOTICE:
259 * - HWCAP_CPUID should be available. Otherwise it returns reserve value
260 * - It MUST be called inside dispather.
261 * - It MUST meet the HWCAP requirements
262 *
263 * Example:
264 * DEFINE_INTERFACE_DISPATCHER(crc32_iscsi)
265 * {
266 * unsigned long auxval = getauxval(AT_HWCAP);
267 * // MUST do the judgement is MUST.
268 * if ((HWCAP_CRC32 | HWCAP_PMULL) == (auxval & (HWCAP_CRC32 | HWCAP_PMULL))) {
269 * switch (get_micro_arch_id()) {
270 * case MICRO_ARCH_ID(ARM, CORTEX_A57):
271 * return PROVIDER_INFO(crc32_pmull_crc_for_a57);
272 * case MICRO_ARCH_ID(ARM, CORTEX_A72):
273 * return PROVIDER_INFO(crc32_pmull_crc_for_a72);
274 * case MICRO_ARCH_ID(ARM, NEOVERSE_N1):
275 * return PROVIDER_INFO(crc32_pmull_crc_for_n1);
276 * case default:
277 * return PROVIDER_INFO(crc32_pmull_crc_for_others);
278 * }
279 * }
280 * return PROVIDER_BASIC(crc32_iscsi);
281 * }
282 * KNOWN ISSUE:
283 * On a heterogeneous system (big.LITTLE), it will work but the performance
284 * might not be the best one as expected.
285 *
286 * If this function is called on the big core, it will return the function
287 * optimized for the big core.
288 *
289 * If execution is then scheduled to the little core. It will still work (1),
290 * but the function won't be optimized for the little core, thus the performance
291 * won't be as expected.
292 *
293 * References:
294 * - [CPU Feature detection](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/arm64/cpu-feature-registers.rst?h=v5.5)
295 *
296 */
297 static inline uint32_t get_micro_arch_id(void)
298 {
299 uint32_t id=CPU_IMPLEMENTER_RESERVE;
300 if ((getauxval(AT_HWCAP) & HWCAP_CPUID)) {
301 /** Here will trap into kernel space */
302 asm("mrs %0, MIDR_EL1 " : "=r" (id));
303 }
304 return id&0xff00fff0;
305 }
306
307
308
219309 #endif /* __ASSEMBLY__ */
220310 #endif
313313 #define IGZIP_NO_HIST 0
314314 #define IGZIP_HIST 1
315315 #define IGZIP_DICT_HIST 2
316 #define IGZIP_DICT_HASH_SET 3
316317
317318 /** @brief Holds Bit Buffer information*/
318319 struct BitBuf2 {
684685 */
685686 int isal_deflate_set_dict(struct isal_zstream *stream, uint8_t *dict, uint32_t dict_len);
686687
688 /** @brief Structure for holding processed dictionary information */
689
690 struct isal_dict {
691 uint32_t params;
692 uint32_t level;
693 uint32_t hist_size;
694 uint32_t hash_size;
695 uint8_t history[ISAL_DEF_HIST_SIZE];
696 uint16_t hashtable[IGZIP_LVL3_HASH_SIZE];
697 };
698
699 /**
700 * @brief Process dictionary to reuse later
701 *
702 * Processes a dictionary so that the generated output can be reused to reset a
703 * new deflate stream more quickly than isal_deflate_set_dict() alone. This
704 * function is paired with isal_deflate_reset_dict() when using the same
705 * dictionary on multiple deflate objects. The stream.level must be set prior to
706 * calling this function to process the dictionary correctly. If the dictionary
707 * is longer than IGZIP_HIST_SIZE, only the last IGZIP_HIST_SIZE bytes will be
708 * used.
709 *
710 * @param stream Structure holding state information on the compression streams.
711 * @param dict_str: Structure to hold processed dictionary info to reuse later.
712 * @param dict: Array containing dictionary to use.
713 * @param dict_len: Length of dict.
714 * @returns COMP_OK,
715 * ISAL_INVALID_STATE (dictionary could not be processed)
716 */
717 int isal_deflate_process_dict(struct isal_zstream *stream, struct isal_dict *dict_str,
718 uint8_t *dict, uint32_t dict_len);
719
720 /**
721 * @brief Reset compression dictionary to use
722 *
723 * Similar to isal_deflate_set_dict() but on pre-processed dictionary
724 * data. Pairing with isal_deflate_process_dict() can reduce the processing time
725 * on subsequent compression with dictionary especially on small files.
726 *
727 * Like isal_deflate_set_dict(), this function is to be called after
728 * isal_deflate_init, or after completing a SYNC_FLUSH or FULL_FLUSH and before
729 * the next call do isal_deflate. Changing compression level between dictionary
730 * process and reset will cause return of ISAL_INVALID_STATE.
731 *
732 * @param stream Structure holding state information on the compression streams.
733 * @param dict_str: Structure with pre-processed dictionary info.
734 * @returns COMP_OK,
735 * ISAL_INVALID_STATE or other (dictionary could not be reset)
736 */
737 int isal_deflate_reset_dict(struct isal_zstream *stream, struct isal_dict *dict_str);
738
739
687740 /**
688741 * @brief Fast data (deflate) compression for storage applications.
689742 *
6868 mbin_def_ptr %1_mbinit
6969
7070 section .text
71 global %1:ISAL_SYM_TYPE_FUNCTION
71 mk_global %1, function
7272 %1_mbinit:
73 endbranch
7374 ;;; only called the first time to setup hardware match
7475 call %1_dispatch_init
7576 ;;; falls thru to execute the hw optimized code
7677 %1:
78 endbranch
7779 jmp mbin_ptr_sz [%1_dispatched]
7880 %endmacro
7981
2828
2929 %ifndef _REG_SIZES_ASM_
3030 %define _REG_SIZES_ASM_
31
32 %ifdef __NASM_VER__
33 %ifidn __OUTPUT_FORMAT__, win64
34 %error nasm not supported in windows
35 %else
36 %define endproc_frame
37 %endif
38 %endif
3931
4032 %ifndef AS_FEATURE_LEVEL
4133 %define AS_FEATURE_LEVEL 4
207199 section .text
208200 %endif
209201 %ifidn __OUTPUT_FORMAT__,elf64
202 %define __x86_64__
210203 section .note.GNU-stack noalloc noexec nowrite progbits
211204 section .text
205 %endif
206 %ifidn __OUTPUT_FORMAT__,win64
207 %define __x86_64__
208 %endif
209 %ifidn __OUTPUT_FORMAT__,macho64
210 %define __x86_64__
211 %endif
212
213 %ifdef __x86_64__
214 %define endbranch db 0xf3, 0x0f, 0x1e, 0xfa
215 %else
216 %define endbranch db 0xf3, 0x0f, 0x1e, 0xfb
212217 %endif
213218
214219 %ifdef REL_TEXT
219224 %define WRT_OPT
220225 %endif
221226
227 %macro mk_global 1-3
228 %ifdef __NASM_VER__
229 %ifidn __OUTPUT_FORMAT__, macho64
230 global %1
231 %elifidn __OUTPUT_FORMAT__, win64
232 global %1
233 %else
234 global %1:%2 %3
235 %endif
236 %else
237 global %1:%2 %3
238 %endif
239 %endmacro
240
241
242 ; Fixes for nasm lack of MS proc helpers
243 %ifdef __NASM_VER__
244 %ifidn __OUTPUT_FORMAT__, win64
245 %macro alloc_stack 1
246 sub rsp, %1
247 %endmacro
248
249 %macro proc_frame 1
250 %1:
251 %endmacro
252
253 %macro save_xmm128 2
254 movdqa [rsp + %2], %1
255 %endmacro
256
257 %macro save_reg 2
258 mov [rsp + %2], %1
259 %endmacro
260
261 %macro rex_push_reg 1
262 push %1
263 %endmacro
264
265 %macro push_reg 1
266 push %1
267 %endmacro
268
269 %define end_prolog
270 %endif
271
272 %define endproc_frame
273 %endif
274
222275 %ifidn __OUTPUT_FORMAT__, macho64
223276 %define elf64 macho64
224277 mac_equ equ 1
225 %ifdef __NASM_VER__
226 %define ISAL_SYM_TYPE_FUNCTION
227 %define ISAL_SYM_TYPE_DATA_INTERNAL
228 %else
229 %define ISAL_SYM_TYPE_FUNCTION function
230 %define ISAL_SYM_TYPE_DATA_INTERNAL data internal
231 %endif
232 %else
233 %define ISAL_SYM_TYPE_FUNCTION function
234 %define ISAL_SYM_TYPE_DATA_INTERNAL data internal
235278 %endif
236279
237280 %macro slversion 4
00 LIBRARY isa-l
1 VERSION 2.29
1 VERSION 2.30
22 EXPORTS
33
44 ec_encode_data_sse @1
112112 isal_zero_detect @109
113113 isal_gzip_header_init @110
114114 isal_adler32 @111
115 isal_deflate_process_dict @112
116 isal_deflate_reset_dict @113
3838 # trace - get simulator trace
3939 # clean - remove object files
4040
41 version ?= 2.29.0
41 version ?= 2.30.0
4242 host_cpu ?= $(shell uname -m | sed -e 's/amd/x86_/')
4343 arch ?= $(shell uname | grep -v -e Linux -e BSD )
4444
3939 %define tmpb r11b
4040 %define tmp3 arg4
4141 %define return rax
42 %define func(x) x:
42 %define func(x) x: endbranch
4343 %define FUNC_SAVE
4444 %define FUNC_RESTORE
4545 %endif
7272 section .text
7373
7474 align 16
75 global mem_zero_detect_avx:ISAL_SYM_TYPE_FUNCTION
75 mk_global mem_zero_detect_avx, function
7676 func(mem_zero_detect_avx)
7777 FUNC_SAVE
7878 mov pos, 0
3939 %define tmpb r11b
4040 %define tmp3 arg4
4141 %define return rax
42 %define func(x) x:
42 %define func(x) x: endbranch
4343 %define FUNC_SAVE
4444 %define FUNC_RESTORE
4545 %endif
7272 section .text
7373
7474 align 16
75 global mem_zero_detect_sse:ISAL_SYM_TYPE_FUNCTION
75 mk_global mem_zero_detect_sse, function
7676 func(mem_zero_detect_sse)
7777 FUNC_SAVE
7878 mov pos, 0
00 .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.11.
1 .TH IGZIP "1" "February 2020" "igzip command line interface 2.29.0" "User Commands"
1 .TH IGZIP "1" "November 2020" "igzip command line interface 2.30.0" "User Commands"
22 .SH NAME
33 igzip \- compress or decompress files similar to gzip
44 .SH SYNOPSIS
816816 int suffix_index = 0;
817817 uint32_t file_time;
818818
819 // Allocate mem and setup to hold gzip header info
819820 if (infile_name_len == stdin_file_name_len &&
820821 infile_name != NULL &&
821822 memcmp(infile_name, stdin_file_name, infile_name_len) == 0) {
883884 state.next_in = inbuf;
884885 state.avail_in = fread_safe(state.next_in, 1, inbuf_size, in, infile_name);
885886
887 // Actually read and save the header info
886888 ret = isal_read_gzip_header(&state, &gz_hdr);
887889 if (ret != ISAL_DECOMP_OK) {
888890 log_print(ERROR, "igzip: Error invalid gzip header found for file %s\n",
914916 goto decompress_file_cleanup;
915917 }
916918
919 // Start reading in compressed data and decompress
917920 do {
918921 if (state.avail_in == 0) {
919922 state.next_in = inbuf;
935938 if (out != NULL)
936939 fwrite_safe(outbuf, 1, state.next_out - outbuf, out, outfile_name);
937940
938 } while (!feof(in) || state.avail_out == 0);
941 } while (state.block_state != ISAL_BLOCK_FINISH // while not done
942 && (!feof(in) || state.avail_out == 0) // and work to do
943 );
944
945 // Add the following to look for and decode additional concatenated files
946 if (!feof(in) && state.avail_in == 0) {
947 state.next_in = inbuf;
948 state.avail_in = fread_safe(state.next_in, 1, inbuf_size, in, infile_name);
949 }
950
951 while (state.avail_in > 0 && state.next_in[0] == 31) {
952 // Look for magic numbers for gzip header. Follows the gzread() decision
953 // whether to treat as trailing junk
954 if (state.avail_in > 1 && state.next_in[1] != 139)
955 break;
956
957 isal_inflate_reset(&state);
958 state.crc_flag = ISAL_GZIP; // Let isal_inflate() process extra headers
959 do {
960 if (state.avail_in == 0 && !feof(in)) {
961 state.next_in = inbuf;
962 state.avail_in =
963 fread_safe(state.next_in, 1, inbuf_size, in, infile_name);
964 }
965
966 state.next_out = outbuf;
967 state.avail_out = outbuf_size;
968
969 ret = isal_inflate(&state);
970 if (ret != ISAL_DECOMP_OK) {
971 log_print(ERROR,
972 "igzip: Error while decompressing extra concatenated"
973 "gzip files on %s\n", infile_name);
974 goto decompress_file_cleanup;
975 }
976
977 if (out != NULL)
978 fwrite_safe(outbuf, 1, state.next_out - outbuf, out,
979 outfile_name);
980
981 } while (state.block_state != ISAL_BLOCK_FINISH
982 && (!feof(in) || state.avail_out == 0));
983
984 if (!feof(in) && state.avail_in == 0) {
985 state.next_in = inbuf;
986 state.avail_in =
987 fread_safe(state.next_in, 1, inbuf_size, in, infile_name);
988 }
989 }
939990
940991 if (state.block_state != ISAL_BLOCK_FINISH)
941992 log_print(ERROR, "igzip: Error %s does not contain a complete gzip file\n",
7575 cat $TEST_FILE | $IGZIP | $IGZIP -d | $DIFF $TEST_FILE - || ret=1
7676 cat $TEST_FILE | $IGZIP - | $IGZIP -d - | $DIFF $TEST_FILE - || ret=1
7777 pass_check $ret "Piping compression and decompression"
78
79 # Test multiple concatenated gzip files
80 ret=0
81 (for i in `seq 3`; do $IGZIP -c $TEST_FILE ; done) | $IGZIP -t || ret=1
82 pass_check $ret "Multiple gzip concatenated files"
83
84 if command -V md5sum >/dev/null 2>&1; then
85 sum1=$((for i in `seq 15`; do $IGZIP -c $TEST_FILE; done) | $IGZIP -cd | md5sum)
86 sum2=$((for i in `seq 15`; do cat $TEST_FILE; done) | md5sum)
87 [[ "$sum1" == "$sum2" ]] && ret=0 || ret=1
88 pass_check $ret "Multiple large gzip concat test"
89 clear_dir
90 else
91 echo "Skip: Multiple large gzip concat test"
92 fi
93
7894
7995 #Test outifle options
8096 $IGZIP $TEST_FILE -o $file2$ds && $IGZIP $file2$ds -d -o $file1 && \
4545 %define tmp r11
4646 %define tmp3 arg4
4747 %define return rax
48 %define func(x) x:
48 %define func(x) x: endbranch
4949 %define FUNC_SAVE
5050 %define FUNC_RESTORE
5151 %endif
121121 section .text
122122
123123 align 16
124 global pq_check_sse:ISAL_SYM_TYPE_FUNCTION
124 mk_global pq_check_sse, function
125125 func(pq_check_sse)
126126 FUNC_SAVE
127127 sub vec, 3 ;Keep as offset to last source
4545 %define tmp r11
4646 %define return rax
4747 %define PS 8
48 %define func(x) x:
48 %define func(x) x: endbranch
4949 %define FUNC_SAVE
5050 %define FUNC_RESTORE
5151
7878 %define arg1 ecx
7979 %define return eax
8080 %define PS 4
81 %define func(x) x:
81 %define func(x) x: endbranch
8282 %define arg(x) [ebp+8+PS*x]
8383 %define arg2 edi ; must sav/restore
8484 %define arg3 esi
140140 section .text
141141
142142 align 16
143 global pq_check_sse:ISAL_SYM_TYPE_FUNCTION
143 mk_global pq_check_sse, function
144144 func(pq_check_sse)
145145 FUNC_SAVE
146146 sub vec, 3 ;Keep as offset to last source
4545 %define tmp r11
4646 %define tmp3 arg4
4747 %define return rax
48 %define func(x) x:
48 %define func(x) x: endbranch
4949 %define FUNC_SAVE
5050 %define FUNC_RESTORE
5151 %endif
6262 %define func(x) proc_frame x
6363 %macro FUNC_SAVE 0
6464 alloc_stack stack_size
65 save_xmm128 xmm6, 0*16
66 save_xmm128 xmm7, 1*16
67 save_xmm128 xmm8, 2*16
68 save_xmm128 xmm9, 3*16
69 save_xmm128 xmm10, 4*16
70 save_xmm128 xmm11, 5*16
71 save_xmm128 xmm14, 6*16
72 save_xmm128 xmm15, 7*16
65 vmovdqa [rsp + 0*16], xmm6
66 vmovdqa [rsp + 1*16], xmm7
67 vmovdqa [rsp + 2*16], xmm8
68 vmovdqa [rsp + 3*16], xmm9
69 vmovdqa [rsp + 4*16], xmm10
70 vmovdqa [rsp + 5*16], xmm11
71 vmovdqa [rsp + 6*16], xmm14
72 vmovdqa [rsp + 7*16], xmm15
7373 end_prolog
7474 %endmacro
7575
7676 %macro FUNC_RESTORE 0
77 movdqa xmm6, [rsp + 0*16]
78 movdqa xmm7, [rsp + 1*16]
79 movdqa xmm8, [rsp + 2*16]
80 movdqa xmm9, [rsp + 3*16]
81 movdqa xmm10, [rsp + 4*16]
82 movdqa xmm11, [rsp + 5*16]
83 movdqa xmm14, [rsp + 6*16]
84 movdqa xmm15, [rsp + 7*16]
77 vmovdqa xmm6, [rsp + 0*16]
78 vmovdqa xmm7, [rsp + 1*16]
79 vmovdqa xmm8, [rsp + 2*16]
80 vmovdqa xmm9, [rsp + 3*16]
81 vmovdqa xmm10, [rsp + 4*16]
82 vmovdqa xmm11, [rsp + 5*16]
83 vmovdqa xmm14, [rsp + 6*16]
84 vmovdqa xmm15, [rsp + 7*16]
8585 add rsp, stack_size
8686 %endmacro
8787 %endif
124124 section .text
125125
126126 align 16
127 global pq_gen_avx:ISAL_SYM_TYPE_FUNCTION
127 mk_global pq_gen_avx, function
128128 func(pq_gen_avx)
129129 FUNC_SAVE
130130 sub vec, 3 ;Keep as offset to last source
4545 %define tmp r11
4646 %define tmp3 arg4
4747 %define return rax
48 %define func(x) x:
48 %define func(x) x: endbranch
4949 %define FUNC_SAVE
5050 %define FUNC_RESTORE
5151 %endif
125125 section .text
126126
127127 align 16
128 global pq_gen_avx2:ISAL_SYM_TYPE_FUNCTION
128 mk_global pq_gen_avx2, function
129129 func(pq_gen_avx2)
130130 FUNC_SAVE
131131 sub vec, 3 ;Keep as offset to last source
4848 %define tmp r11
4949 %define tmp3 arg4
5050 %define return rax
51 %define func(x) x:
51 %define func(x) x: endbranch
5252 %define FUNC_SAVE
5353 %define FUNC_RESTORE
5454 %endif
122122 section .text
123123
124124 align 16
125 global pq_gen_avx512:ISAL_SYM_TYPE_FUNCTION
125 mk_global pq_gen_avx512, function
126126 func(pq_gen_avx512)
127127 FUNC_SAVE
128128 sub vec, 3 ;Keep as offset to last source
4545 %define tmp r11
4646 %define tmp3 arg4
4747 %define return rax
48 %define func(x) x:
48 %define func(x) x: endbranch
4949 %define FUNC_SAVE
5050 %define FUNC_RESTORE
5151 %endif
121121 section .text
122122
123123 align 16
124 global pq_gen_sse:ISAL_SYM_TYPE_FUNCTION
124 mk_global pq_gen_sse, function
125125 func(pq_gen_sse)
126126 FUNC_SAVE
127127 sub vec, 3 ;Keep as offset to last source
4545 %define tmp r11
4646 %define return rax
4747 %define PS 8
48 %define func(x) x:
48 %define func(x) x: endbranch
4949 %define FUNC_SAVE
5050 %define FUNC_RESTORE
5151
7777 %define arg1 ecx
7878 %define return eax
7979 %define PS 4
80 %define func(x) x:
80 %define func(x) x: endbranch
8181 %define arg(x) [ebp+8+PS*x]
8282 %define arg2 edi ; must sav/restore
8383 %define arg3 esi
139139 section .text
140140
141141 align 16
142 global pq_gen_sse:ISAL_SYM_TYPE_FUNCTION
142 mk_global pq_gen_sse, function
143143 func(pq_gen_sse)
144144 FUNC_SAVE
145145 sub vec, 3 ;Keep as offset to last source
7171 ;;;;
7272 ; pq_check multibinary function
7373 ;;;;
74 global pq_check:ISAL_SYM_TYPE_FUNCTION
74 mk_global pq_check, function
7575 pq_check_mbinit:
76 endbranch
7677 call pq_check_dispatch_init
7778 pq_check:
79 endbranch
7880 jmp qword [pq_check_dispatched]
7981
8082 pq_check_dispatch_init:
103105 ;;;;
104106 ; xor_check multibinary function
105107 ;;;;
106 global xor_check:ISAL_SYM_TYPE_FUNCTION
108 mk_global xor_check, function
107109 xor_check_mbinit:
110 endbranch
108111 call xor_check_dispatch_init
109112 xor_check:
113 endbranch
110114 jmp qword [xor_check_dispatched]
111115
112116 xor_check_dispatch_init:
4848 %define tmp3 arg4
4949 %define return rax
5050 %define PS 8
51 %define func(x) x:
51 %define func(x) x: endbranch
5252 %define FUNC_SAVE
5353 %define FUNC_RESTORE
5454
8787 %define tmp3 edx
8888 %define return eax
8989 %define PS 4
90 %define func(x) x:
90 %define func(x) x: endbranch
9191 %define arg(x) [ebp+8+PS*x]
9292 %define arg2 edi ; must sav/restore
9393 %define arg3 esi
136136 section .text
137137
138138 align 16
139 global xor_check_sse:ISAL_SYM_TYPE_FUNCTION
139 mk_global xor_check_sse, function
140140 func(xor_check_sse)
141141 FUNC_SAVE
142142 %ifidn PS,8 ;64-bit code
4444 %define arg5 r9
4545 %define tmp r11
4646 %define tmp3 arg4
47 %define func(x) x:
47 %define func(x) x: endbranch
4848 %define return rax
4949 %define FUNC_SAVE
5050 %define FUNC_RESTORE
9999 section .text
100100
101101 align 16
102 global xor_gen_avx:ISAL_SYM_TYPE_FUNCTION
102 mk_global xor_gen_avx, function
103103 func(xor_gen_avx)
104104
105105 FUNC_SAVE
4646 %define arg5 r9
4747 %define tmp r11
4848 %define tmp3 arg4
49 %define func(x) x:
49 %define func(x) x: endbranch
5050 %define return rax
5151 %define FUNC_SAVE
5252 %define FUNC_RESTORE
102102 section .text
103103
104104 align 16
105 global xor_gen_avx512:ISAL_SYM_TYPE_FUNCTION
105 mk_global xor_gen_avx512, function
106106 func(xor_gen_avx512)
107107 FUNC_SAVE
108108 sub vec, 2 ;Keep as offset to last source
4848 %define tmp3 arg4
4949 %define return rax
5050 %define PS 8
51 %define func(x) x:
51 %define func(x) x: endbranch
5252 %define FUNC_SAVE
5353 %define FUNC_RESTORE
5454
8787 %define tmp3 edx
8888 %define return eax
8989 %define PS 4
90 %define func(x) x:
90 %define func(x) x: endbranch
9191 %define arg(x) [ebp+8+PS*x]
9292 %define arg2 edi ; must sav/restore
9393 %define arg3 esi
136136 section .text
137137
138138 align 16
139 global xor_gen_sse:ISAL_SYM_TYPE_FUNCTION
139 mk_global xor_gen_sse, function
140140 func(xor_gen_sse)
141141 FUNC_SAVE
142142 %ifidn PS,8 ;64-bit code
0 # Regenerate nmake file from makefiles or check its consistency
1
2 test_nmake_file: tst.nmake
3 @diff -u Makefile.nmake tst.nmake || (echo Potential nmake consistency issue; $(RM) tst.nmake; false;)
4 @echo No nmake consistency issues
5 @$(RM) tst.nmake
6
7 FORCE:
8 Makefile.nmake tst.nmake: FORCE
9 @echo Regenerating $@
10 @echo '########################################################################' > $@
11 @cat LICENSE | sed -e 's/^/#/ ' >> $@
12 @echo '########################################################################' >> $@
13 @echo '' >> $@
14 @echo '# This file can be auto-regenerated with $$make -f Makefile.unx Makefile.nmake' >> $@
15 @echo '' >> $@
16 @echo -n 'objs =' >> $@
17 @$(foreach o, $(subst /,\\,$(objs:.o=.obj)), printf " %s\n\t%s" \\ $(o) >> $@; )
18 @echo '' >> $@
19 @echo '' >> $@
20 @echo 'INCLUDES = $(INCLUDE)' >> $@
21 @echo '# Modern asm feature level, consider upgrading nasm/yasm before decreasing feature_level' >> $@
22 @echo 'FEAT_FLAGS = -DHAVE_AS_KNOWS_AVX512 -DAS_FEATURE_LEVEL=10' >> $@
23 @echo 'CFLAGS_REL = -O2 -DNDEBUG /Z7 /MD /Gy' >> $@
24 @echo 'CFLAGS_DBG = -Od -DDEBUG /Z7 /MDd' >> $@
25 @echo 'LINKFLAGS = -nologo -incremental:no -debug' >> $@
26 @echo 'CFLAGS = $$(CFLAGS_REL) -nologo -D_USE_MATH_DEFINES $$(FEAT_FLAGS) $$(INCLUDES) $$(D)' >> $@
27 @echo 'AFLAGS = -f win64 $$(FEAT_FLAGS) $$(INCLUDES) $$(D)' >> $@
28 @echo 'CC = cl' >> $@
29 @echo '# or CC = icl -Qstd=c99' >> $@
30 @echo 'AS = nasm' >> $@
31 @echo '' >> $@
32 @echo 'lib: bin static dll' >> $@
33 @echo 'static: bin isa-l_static.lib' >> $@
34 @echo 'dll: bin isa-l.dll' >> $@
35 @echo '' >> $@
36 @echo 'bin: ; -mkdir $$@' >> $@
37 @echo '' >> $@
38 @echo 'isa-l_static.lib: $$(objs)' >> $@
39 @echo ' lib -out:$$@ @<<' >> $@
40 @echo '$$?' >> $@
41 @echo '<<' >> $@
42 @echo '' >> $@
43 @echo 'isa-l.dll: $$(objs)' >> $@
44 @echo ' link -out:$$@ -dll -def:isa-l.def $$(LINKFLAGS) @<<' >> $@
45 @echo '$$?' >> $@
46 @echo '<<' >> $@
47 @echo '' >> $@
48 @$(foreach b, $(units), \
49 printf "{%s}.c.obj:\n\t\$$(CC) \$$(CFLAGS) /c -Fo\$$@ \$$?\n{%s}.asm.obj:\n\t\$$(AS) \$$(AFLAGS) -o \$$@ \$$?\n\n" $(b) $(b) >> $@; )
50 @echo '' >> $@
51 ifneq (,$(examples))
52 @echo "# Examples" >> $@
53 @echo -n 'ex =' >> $@
54 @$(foreach ex, $(notdir $(examples)), printf " %s\n\t%s.exe" \\ $(ex) >> $@; )
55 @echo '' >> $@
56 @echo '' >> $@
57 @echo 'ex: lib $$(ex)' >> $@
58 @echo '' >> $@
59 @echo '$$(ex): $$(@B).obj' >> $@
60 endif
61 @echo '' >> $@
62 @echo '.obj.exe:' >> $@
63 @echo ' link /out:$$@ $$(LINKFLAGS) isa-l.lib $$?' >> $@
64 @echo '' >> $@
65 @echo '# Check tests' >> $@
66 @echo -n 'checks =' >> $@
67 @$(foreach check, $(notdir $(check_tests)), printf " %s\n\t%s.exe" \\ $(check) >> $@; )
68 @echo '' >> $@
69 @echo '' >> $@
70 @echo 'checks: lib $$(checks)' >> $@
71 @echo '$$(checks): $$(@B).obj' >> $@
72 @echo 'check: $$(checks)' >> $@
73 @echo ' !$$?' >> $@
74 @echo '' >> $@
75 @echo '# Unit tests' >> $@
76 @echo -n 'tests =' >> $@
77 @$(foreach test, $(notdir $(unit_tests)), printf " %s\n\t%s.exe" \\ $(test) >> $@; )
78 @echo '' >> $@
79 @echo '' >> $@
80 @echo 'tests: lib $$(tests)' >> $@
81 @echo '$$(tests): $$(@B).obj' >> $@
82 @echo '' >> $@
83 @echo '# Performance tests' >> $@
84 @echo -n 'perfs =' >> $@
85 @$(foreach perf, $(notdir $(perf_tests)), printf " %s\n\t%s.exe" \\ $(perf) >> $@; )
86 @echo '' >> $@
87 @echo '' >> $@
88 @echo 'perfs: lib $$(perfs)' >> $@
89 @echo '$$(perfs): $$(@B).obj' >> $@
90 @echo '' >> $@
91 @echo -n 'progs =' >> $@
92 @$(foreach prog, $(notdir $(bin_PROGRAMS)), printf " %s\n\t%s.exe" \\ $(prog) >> $@; )
93 @echo '' >> $@
94 @echo '' >> $@
95 @echo 'progs: lib $$(progs)' >> $@
96 @$(foreach p, $(notdir $(bin_PROGRAMS)), \
97 printf "%s.exe: %s\n\tlink /out:\$$@ \$$(LINKFLAGS) isa-l.lib \$$?\n" $(p) $(subst /,\\,$(programs_$(p)_SOURCES:.c=.obj)) >> $@; )
98 @echo '' >> $@
99 @echo 'clean:' >> $@
100 @echo ' -if exist *.obj del *.obj' >> $@
101 @echo ' -if exist bin\*.obj del bin\*.obj' >> $@
102 @echo ' -if exist isa-l_static.lib del isa-l_static.lib' >> $@
103 @echo ' -if exist *.exe del *.exe' >> $@
104 @echo ' -if exist *.pdb del *.pdb' >> $@
105 @echo ' -if exist isa-l.lib del isa-l.lib' >> $@
106 @echo ' -if exist isa-l.dll del isa-l.dll' >> $@
107 @echo ' -if exist isa-l.exp del isa-l.exp' >> $@
108 @echo '' >> $@
109 $(if $(findstring igzip,$(units)),@echo 'zlib.lib:' >> $@ )
110 @cat $(foreach unit,$(units), $(unit)/Makefile.am) | sed \
111 -e '/: /!d' \
112 -e 's/\([^ :]*\)[ ]*/\1.exe /g' \
113 -e :c -e 's/:\(.*\).exe/:\1/;tc' \
114 -e 's/\.o[ $$]/.obj /g' \
115 -e 's/\.o\.exe[ ]:/.obj:/g' \
116 -e '/CFLAGS_.*+=/d' \
117 -e '/:.*\%.*:/d' \
118 -e 's/ :/:/' \
119 -e 's/LDLIBS *+=//' \
120 -e 's/-lz/zlib.lib/' \
121 -e 's/ $$//' \
122 >> $@
0 #/bin/sh
1
2 # Filter out unnecessary options added by automake
3
4 while [ -n "$*" ]; do
5 case "$1" in
6 -o )
7 # Supported options with arg
8 options="$options $1 $2"
9 shift
10 object="$1"
11 shift
12 ;;
13 -f | -D )
14 # Supported options with arg
15 options="$options $1 $2"
16 shift
17 shift
18 ;;
19 -I | -i )
20 options="$options $1 $2/"
21 shift
22 shift
23 ;;
24 --prefix* )
25 # Supported options without arg
26 options="$options $1"
27 shift
28 ;;
29 -I* | -i* )
30 options="$options $1/"
31 shift
32 ;;
33 -D* ) # For defines we need to remove spaces
34 case "$1" in
35 *' '* ) ;;
36 *) options="$options $1" ;;
37 esac
38 shift
39 ;;
40 #-blah )
41 # Unsupported options with args - none known
42 -* )
43 # Unsupported options with no args
44 shift
45 ;;
46 * )
47 args="$args $1"
48 shift
49 ;;
50 esac
51 done
52
53 nasm $options $args
54 $CET_LD -r -z ibt -z shstk -o $object.tmp $object
55 mv $object.tmp $object
157157 # Test custom hufftables
158158 test_start "generate_custom_hufftables"
159159 ./generate_custom_hufftables $in_file
160 $MAKE -f Makefile.unx clean
160161 $MAKE -f Makefile.unx -j $cpus D="NO_STATIC_INFLATE_H" checks
161162 ./igzip_rand_test $in_file
162 ./generate_static_inflate
163 diff -q static_inflate.h igzip/static_inflate.h
164 rm -rf static_inflate.h
165163 rm -rf hufftables_c.c
166164 test_end "generate_custom_hufftables" $?
167165
168166 msg+=$'Custom hufftable build: Pass\n'
169167
170168 $MAKE -f Makefile.unx clean
169
170 test_start "nmake_file_consistency"
171 $MAKE -f Makefile.unx host_cpu="x86_64" test_nmake_file
172 test_end "nmake_file_consistency" $?
173 msg+=$'Nmake file consistency: Pass\n'
171174
172175 # noarch build
173176 test_start "noarch_build"
0 #/bin/sh
1
2 # Filter out unnecessary options added by automake
3
4 while [ -n "$*" ]; do
5 case "$1" in
6 -o )
7 # Supported options with arg
8 options="$options $1 $2"
9 shift
10 object="$1"
11 shift
12 ;;
13 -f | -I | -i | -D )
14 # Supported options with arg
15 options="$options $1 $2"
16 shift
17 shift
18 ;;
19 -I* | -i* | --prefix* )
20 # Supported options without arg
21 options="$options $1"
22 shift
23 ;;
24 -D* ) # For defines we need to remove spaces
25 case "$1" in
26 *' '* ) ;;
27 *) options="$options $1" ;;
28 esac
29 shift
30 ;;
31 #-blah )
32 # Unsupported options with args - none known
33 -* )
34 # Unsupported options with no args
35 shift
36 ;;
37 * )
38 args="$args $1"
39 shift
40 ;;
41 esac
42 done
43
44 yasm $options $args
45 $CET_LD -r -z ibt -z shstk -o $object.tmp $object
46 mv $object.tmp $object